In [1]:
%load_ext autoreload
%autoreload 3
import sys
import os
sys.path.append(os.path.abspath('../src'))

import yaml
import torch
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
import random
import os
from sklearn.model_selection import train_test_split

from data_utils import ReportingDataset
from train_utils import SubsetSampler, SubsetRandomSampler

torch.use_deterministic_algorithms(True)

In [2]:
# Load config vars
with open('../config.yaml', 'r') as f:
    config = yaml.safe_load(f)

SEED = config['training']['seed']
BATCH_SIZE = config['training']['batch_size']
PAST_UNITS = config['model']['past_units']
MAX_DELAY = config['model']['max_delay']
STATE = config['model']['state']
DEVICE = config['training']['device']

# Experiment vars
weeks = False
random_split = True
dow = False

In [3]:
# Load Dengue Data
path = "../data/dengue-sp-reporting-delay.csv"
dengdf = pd.read_csv(path, index_col=0)
dengdf = np.array(dengdf.values, dtype = np.float32)
max_val = dengdf.max(axis = 1).max(axis=0)
dataset = ReportingDataset(dengdf, max_val=max_val, triangle=True, past_units=PAST_UNITS, max_delay=MAX_DELAY, future_obs=0, vector_y = False, dow = dow, return_number_obs = 0)


In [4]:
# Create train/test split using generator for reproducability
all_idcs = range(dataset.__len__())
train_idcs, test_idcs = train_test_split(all_idcs, test_size=0.25, shuffle=True, random_state=SEED)
train_idcs, val_idcs = train_test_split(train_idcs, test_size=0.25, shuffle=True, random_state=SEED)
val_batch_size, test_batch_size = len(val_idcs), len(test_idcs)

g = torch.Generator()
g.manual_seed(SEED)

train_sampler = SubsetRandomSampler(train_idcs, generator=g)
val_sampler = SubsetRandomSampler(val_idcs)
test_sampler = SubsetSampler(test_idcs)

train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=train_sampler)
val_loader = DataLoader(dataset, batch_size=val_batch_size, sampler=val_sampler, shuffle=False)
test_loader = DataLoader(dataset, batch_size=test_batch_size, sampler=test_sampler, shuffle=False)


In [5]:
def regen_data():
    g = torch.Generator()
    g.manual_seed(SEED)
    global train_loader
    train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=SubsetRandomSampler(train_idcs, generator=g))

def set_seeds(SEED):
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    os.environ["PYTHONHASHSEED"] = str(SEED)
    random.seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.backends.cudnn.deterministic = True

set_seeds(SEED)

In [6]:
%reload_ext autoreload
%autoreload 3
from train_utils import train, EarlyStopper
from NowcastPNN import NowcastPNN
set_seeds(SEED) # reproducible training runs
regen_data()

In [None]:
# Train model
early_stopper = EarlyStopper(patience=30, past_units=PAST_UNITS, max_delay=MAX_DELAY, weeks=weeks, future_obs=0, random_split=random_split, dow = dow)
nowcast_pnn = NowcastPNN(past_units=PAST_UNITS, max_delay=MAX_DELAY, conv_channels=[16, 1], hidden_units=[16, 8], dropout_probs=[0.3, 0.1])
train(nowcast_pnn, num_epochs=200, train_loader=train_loader, val_loader=val_loader, early_stopper=early_stopper, loss_fct="nll", device = DEVICE, dow = dow)

# Load best set of weights on test/validation set
nowcast_pnn.load_state_dict(torch.load(f"../src/outputs/weights/weights-{PAST_UNITS}-{MAX_DELAY}-{'week' if weeks else 'day'}-fut0{'-rec' if not random_split else ''}{'-dow' if dow else ''}"))



Epoch 1 - Train loss: 2.49e+05 - Val loss: 1.88e+06 - ES count: 0
Epoch 2 - Train loss: 2.42e+05 - Val loss: 1.81e+06 - ES count: 0
Epoch 3 - Train loss: 2.38e+05 - Val loss: 1.82e+06 - ES count: 1
Epoch 4 - Train loss: 2.33e+05 - Val loss: 1.74e+06 - ES count: 0


Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x106eb37d0>>
Traceback (most recent call last):
  File "/Users/jamieharris/Documents/GitHub/Imperial/Dengue-Nowcasting-Thesis/DengeNowcastingVenv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 


Epoch 5 - Train loss: 2.29e+05 - Val loss: 1.71e+06 - ES count: 0
Epoch 6 - Train loss: 2.25e+05 - Val loss: 1.73e+06 - ES count: 1
Epoch 7 - Train loss: 2.21e+05 - Val loss: 1.7e+06 - ES count: 0
Epoch 8 - Train loss: 2.18e+05 - Val loss: 1.59e+06 - ES count: 0
Epoch 9 - Train loss: 2.13e+05 - Val loss: 1.69e+06 - ES count: 1
