In [1]:
from datetime import datetime, time, timedelta
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import xarray as xr
from ocf_blosc2 import Blosc2
from torch.utils.data import DataLoader, IterableDataset
from torchinfo import summary
import json
plt.rcParams["figure.figsize"] = (20, 12)
%load_ext autoreload
%autoreload 2

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


## Train a model

In [3]:
from dataset import HDF5Dataset
dataset = HDF5Dataset("./data/processed_data/processed_train.hdf5", True, True, True, True)
data_loader = DataLoader(dataset, batch_size=32, pin_memory=True, num_workers=8, shuffle=True)
print(f"train dataset len: {len(dataset)}")

train dataset len: 23440


In [4]:
from submission.model import OurResnet2
model = OurResnet2(image_size=128).to(device)
model.load_state_dict(torch.load("submission/OurResnetCombo-Full-NoWeather-ep12.pt", map_location=device))
criterion = nn.L1Loss()
optimiser = optim.Adam(model.parameters(), lr=1e-3)
summary(model, input_size=[(1, 12), (1, 12, 1, 128, 128), (1, 6, 10, 128, 128)])
# x = torch.randn((1, 12)).to(device)
# y = torch.randn((1, 12, 1, 128, 128)).to(device)
# z = torch.randn((1, 6, 10, 128, 128)).to(device)
# model(x, y, z)

Layer (type:depth-idx)                        Output Shape              Param #
OurResnet2                                    [1, 48]                   --
├─VideoResNet: 1-1                            [1, 512]                  --
│    └─BasicStem: 2-1                         [1, 64, 1, 64, 64]        --
│    │    └─Conv3d: 3-1                       [1, 64, 1, 64, 64]        112,896
│    │    └─BatchNorm3d: 3-2                  [1, 64, 1, 64, 64]        128
│    │    └─ReLU: 3-3                         [1, 64, 1, 64, 64]        --
│    └─Sequential: 2-2                        [1, 64, 1, 64, 64]        --
│    │    └─BasicBlock: 3-4                   [1, 64, 1, 64, 64]        221,440
│    │    └─BasicBlock: 3-5                   [1, 64, 1, 64, 64]        221,440
│    └─Sequential: 2-3                        [1, 128, 1, 32, 32]       --
│    │    └─BasicBlock: 3-6                   [1, 128, 1, 32, 32]       672,512
│    │    └─BasicBlock: 3-7                   [1, 128, 1, 32, 32]       88

In [9]:
EPOCHS = 100
MODEL_KEY="OurResnetCombo-Full-NoWeather"
print(f"Training model key {MODEL_KEY}")
from tqdm import tqdm
for epoch in range(EPOCHS):
    with torch.no_grad():
        from validate import main
        main(model=model)
    model.train()

    running_loss = 0.0
    count = 0
    for i, (pv_features, hrv_features, nwp, extra, pv_targets) in (pbar := tqdm(enumerate(data_loader), total=len(data_loader))):
        optimiser.zero_grad()
        with torch.autocast(device_type=device):
            nwp = nwp.permute(0, 2, 1, 3, 4)
            hrv_features = torch.unsqueeze(hrv_features, 2) # channels as first dim then number of "frames"
            predictions = model(
                pv_features.to(device,dtype=torch.float),
                hrv_features.to(device,dtype=torch.float),
                nwp.to(device,dtype=torch.float),
            )
            loss = criterion(predictions, pv_targets.to(device, dtype=torch.float))
        loss.backward()

        optimiser.step()

        size = int(pv_targets.size(0))
        running_loss += float(loss) * size
        count += size

        if i % 50 == 49:
            pbar.set_description(f"Epoch {epoch + 1}, {i + 1}: {running_loss / count}")

    print(f"Epoch {epoch + 1}: {running_loss / count}")
    torch.save(model.state_dict(), f"submission/{MODEL_KEY}-ep{epoch + 1}.pt")
    print("Saved model!")

[autoreload of submission.run failed: Traceback (most recent call last):
  File "/home/dsingh/miniconda3/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 276, in check
    superreload(m, reload, self.old_objects)
  File "/home/dsingh/miniconda3/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 500, in superreload
    update_generic(old_obj, new_obj)
  File "/home/dsingh/miniconda3/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 397, in update_generic
    update(a, b)
  File "/home/dsingh/miniconda3/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 349, in update_class
    if update_generic(old_obj, new_obj):
       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/dsingh/miniconda3/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 397, in update_generic
    update(a, b)
  File "/home/dsingh/miniconda3/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 309, in update_function

Training model key OurResnetCombo-Full-NoWeather
MAE: 0.15056399


Epoch 1, 250: 0.08719039228558541:  39%|███▊      | 283/733 [02:31<03:59,  1.88it/s]

In [None]:
# Save your model
# torch.save(model.state_dict(), "submission/model.pt")