In [None]:
# 0) Mount Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install rasterio

Collecting rasterio
  Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting cligj>=0.5 (from rasterio)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Collecting click-plugins (from rasterio)
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl.metadata (6.4 kB)
Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m105.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Installing collected packages: cligj, click-plugins, affine, rasterio
Successfully installed affine-2.4.0 click-plugins-1.1.1 cligj-0.7.2 rasterio-1.4.3


In [None]:
# 1) Imports
import os, math, time
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, models
from tifffile import imread
from torch.cuda.amp import autocast, GradScaler

In [None]:
!pip install imagecodecs

Collecting imagecodecs
  Downloading imagecodecs-2025.3.30-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Downloading imagecodecs-2025.3.30-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (45.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.6/45.6 MB[0m [31m50.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: imagecodecs
Successfully installed imagecodecs-2025.3.30


In [None]:
# 2) Cached dataset that loads each TIFF once into RAM
class CachedLSTDataset(Dataset):
    def __init__(self, df, patches_dir, weather_cols,
                 transform, target_size=(56,56)):
        self.transform    = transform
        self.weather_cols = weather_cols
        self.target_size  = target_size

        self.raw      = []
        self.targets  = []
        self.weathers = []

        for _, row in df.iterrows():
            arr = imread(os.path.join(patches_dir, row["filename"])).astype(np.float32)
            # store raw 4×H×W array
            self.raw.append(arr)
            # pre‐store target band
            t = torch.tensor(arr[0], dtype=torch.float32).unsqueeze(0)
            self.targets.append(t)
            # pre‐store weather vector
            w = row[self.weather_cols].values.astype(np.float32)
            self.weathers.append(torch.from_numpy(w))

    def __len__(self):
        return len(self.raw)

    def __getitem__(self, idx):
        arr = self.raw[idx]
        # build RGB image, apply transform
        img_np = arr[[1,2,3]].transpose(1,2,0).astype(np.uint8)
        img    = self.transform(img_np)               # [3,224,224]
        # resize target
        tgt = F.interpolate(self.targets[idx].unsqueeze(0),
                            size=self.target_size,
                            mode='bilinear',
                            align_corners=False
                           ).squeeze(0)               # [1,56,56]
        weather = self.weathers[idx]                   # [meteo_dim]
        return img, weather, tgt

In [None]:
# 1) Which meteorological columns?
weather_cols = [
    "air_temp_C",
    "dew_point_C",
    "relative_humidity_percent",
    "wind_speed_m_s",
    "precipitation_in",
]

# 2) Load & clean CSV
df = pd.read_csv("/content/drive/MyDrive/PatchedOutput/patch_with_meteo.csv")
for c in weather_cols:
    df[c] = pd.to_numeric(df[c], errors="coerce")

# drop any row missing weather data or patch_filename
df = df.dropna(subset=weather_cols + ["patch_filename"]).reset_index(drop=True)
print(f"Using {len(df)} samples after dropna")

# 3) Define cached Dataset
class CachedLSTDataset(Dataset):
    def __init__(self, df, patches_dir, weather_cols,
                 transform, target_size=(56,56)):
        self.transform    = transform
        self.weather_cols = weather_cols
        self.target_size  = target_size

        self.raw      = []
        self.targets  = []
        self.weathers = []

        for _, row in df.iterrows():
            # load once
            arr = imread(os.path.join(patches_dir, row["patch_filename"])).astype(np.float32)
            self.raw.append(arr)

            # pre-store full-res target
            t = torch.tensor(arr[0], dtype=torch.float32).unsqueeze(0)
            self.targets.append(t)

            # pre-store weather vector
            w = row[self.weather_cols].values.astype(np.float32)
            self.weathers.append(torch.from_numpy(w))

    def __len__(self):
        return len(self.raw)

    def __getitem__(self, idx):
        arr     = self.raw[idx]
        img_np  = arr[[1,2,3]].transpose(1,2,0).astype(np.uint8)
        img     = self.transform(img_np)               # [3,224,224]

        tgt     = F.interpolate(
                    self.targets[idx].unsqueeze(0),
                    size=self.target_size,
                    mode='bilinear',
                    align_corners=False
                  ).squeeze(0)                         # [1,56,56]

        weather = self.weathers[idx]                   # [5]
        return img, weather, tgt

# 4) Instantiate dataset & loaders
patches_dir = "/content/drive/MyDrive/PatchedOutput_Cleaned"
transform   = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485,0.456,0.406],
        std =[0.229,0.224,0.225],
    ),
])

dataset = CachedLSTDataset(df, patches_dir, weather_cols,
                           transform, target_size=(56,56))

n_train    = int(0.8 * len(dataset))
train_ds, val_ds = random_split(dataset, [n_train, len(dataset)-n_train])

train_dl = DataLoader(train_ds,
                      batch_size=16,
                      shuffle=True,
                      num_workers=4,
                      pin_memory=True,
                      persistent_workers=True)

val_dl   = DataLoader(val_ds,
                      batch_size=16,
                      shuffle=False,
                      num_workers=4,
                      pin_memory=True,
                      persistent_workers=True)

print(f"Train samples: {len(train_ds)}, Val samples: {len(val_ds)}")

Using 18813 samples after dropna
Train samples: 15050, Val samples: 3763


In [None]:
# 4) Model definition
class CNN_MLP(nn.Module):
    def __init__(self, meteo_dim, H, W):
        super().__init__()
        # spatial encoder
        self.cnn = models.resnet18(pretrained=True)
        for name, p in self.cnn.named_parameters():
            if not (name.startswith('layer4') or name.startswith('fc')):
                p.requires_grad = False
        self.cnn.conv1 = nn.Conv2d(3,64,7,2,3)
        self.cnn.fc    = nn.Identity()  # outputs 512

        # weather MLP
        self.mlp = nn.Sequential(
            nn.Linear(meteo_dim,16), nn.ReLU(),
            nn.Linear(16,16),        nn.ReLU()
        )
        # fusion head
        self.head = nn.Sequential(
            nn.Linear(512+16,512), nn.ReLU(),
            nn.Linear(512, H*W)
        )
        self.H, self.W = H, W

    def forward(self, x, m):
        f = self.cnn(x)                   # [B,512]
        w = self.mlp(m)                   # [B,16]
        h = torch.cat([f,w], dim=1)       # [B,528]
        out = self.head(h)                # [B,H*W]
        return out.view(-1,1,self.H,self.W)  # [B,1,H,W]

In [None]:
# 5) Instantiate model, optimizer, loss, scaler
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
_,C,H,W = next(iter(train_dl))[0].shape  # get H,W from sample if needed
meteo_dim = len(weather_cols)

model  = CNN_MLP(meteo_dim, 56, 56).to(device)
opt    = torch.optim.Adam(model.parameters(), lr=1e-4)
lossf  = nn.MSELoss()
scaler = GradScaler()

  scaler = GradScaler()


In [None]:
# 6) Training loop with mixed precision
num_epochs = 10
for epoch in range(1, num_epochs+1):
    # — Train —
    model.train()
    running_loss = 0.0
    for x, m, y in train_dl:
        x,m,y = x.to(device), m.to(device), y.to(device)
        opt.zero_grad()
        with autocast():
            pred = model(x, m)
            loss = lossf(pred, y)
        scaler.scale(loss).backward()
        scaler.step(opt)
        scaler.update()
        running_loss += loss.item() * x.size(0)
    train_rmse = math.sqrt(running_loss / len(train_dl.dataset))

    # — Validate —
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for x, m, y in val_dl:
            x,m,y = x.to(device), m.to(device), y.to(device)
            with autocast():
                pred = model(x, m)
                loss = lossf(pred, y)
            val_loss += loss.item() * x.size(0)
    val_rmse = math.sqrt(val_loss / len(val_dl.dataset))

    print(f"Epoch {epoch:02d} ▶ Train RMSE: {train_rmse:.3f} | Val RMSE: {val_rmse:.3f}")

  with autocast():
  with autocast():


Epoch 01 ▶ Train RMSE: 4.406 | Val RMSE: 2.539
Epoch 02 ▶ Train RMSE: 2.019 | Val RMSE: 2.760
Epoch 03 ▶ Train RMSE: 1.801 | Val RMSE: 1.794
Epoch 04 ▶ Train RMSE: 1.695 | Val RMSE: 2.491
Epoch 05 ▶ Train RMSE: 1.709 | Val RMSE: 1.496
Epoch 06 ▶ Train RMSE: 1.496 | Val RMSE: 2.784
Epoch 07 ▶ Train RMSE: 1.462 | Val RMSE: 3.352
Epoch 08 ▶ Train RMSE: 1.380 | Val RMSE: 1.291
Epoch 09 ▶ Train RMSE: 1.316 | Val RMSE: 1.436
Epoch 10 ▶ Train RMSE: 1.247 | Val RMSE: 2.012


In [None]:
!pip install imagecodes

Collecting imagecodes
  Downloading imagecodes-0.0.1-py3-none-any.whl.metadata (576 bytes)
Downloading imagecodes-0.0.1-py3-none-any.whl (1.2 kB)
Installing collected packages: imagecodes
Successfully installed imagecodes-0.0.1
