In [None]:
import pandas as pd
import torch

from torch.utils.data import DataLoader

from utils.config import Training_Config
from utils.loops import train_loop, eval_loop
from utils.lstm import LSTMRegressor
from utils.weather_dataset import WeatherDataset

config = Training_Config()
data_folder = '../resource/dataset'

# Load dataset

In [None]:
metadata = pd.read_csv(f'{data_folder}/metadata.csv')
metadata = metadata[metadata['attributes'] != 'weather_description']
city_coords = pd.read_csv(f'{data_folder}/raw/city_attributes.csv')

train_dfs = {
    attribute: pd.read_csv(f'{data_folder}/train-test-split/train/{attribute}.csv', index_col=0)
    for attribute in metadata['attributes']
}
test_dfs = {
    attribute: pd.read_csv(f'{data_folder}/train-test-split/test/{attribute}.csv', index_col=0)
    for attribute in metadata['attributes']
}

city_coords = (city_coords
    .set_index('City')
    .loc[train_dfs['humidity'].columns]
        [['Latitude', 'Longitude']]
)

train_set = WeatherDataset(config.lag_duration, city_coords, *train_dfs.values())
test_set = WeatherDataset(config.lag_duration, city_coords, *test_dfs.values())

# Load model

In [5]:
def run_epochs(
    epochs,
    model, loss_fn,
    optimizer,
    train_loader,
    test_loader,
    use_gpu: bool = False,
):
    loss = {
        'train': [],
        'eval': []
    }
    for epoch in range(epochs):
        print(f"Epoch {epoch}:")
        train_loss = train_loop(
            model, loss_fn, optimizer, train_loader, use_gpu
        )
        print(f"  Train loss: {sum(train_loss) / len(train_loss):<.8f} || ", end='')

        eval_loss = eval_loop(
            model, loss_fn, test_loader, use_gpu
        )
        print(f"Eval loss: {sum(eval_loss) / len(eval_loss)}")

        loss['train'].append(train_loss)
        loss['eval'].append(eval_loss)

    return loss

In [None]:
train_loader = DataLoader(train_set, config.batch_size, shuffle=True)
test_loader = DataLoader(test_set, config.batch_size, shuffle=True)

In [None]:
lstm_regressor = LSTMRegressor(
    train_set[0][0].shape[1], len(train_dfs),
    num_layers=4, hidden_size=512,
    fc_hidden_dims=(),
)
if torch.cuda.is_available():
    lstm_regressor.cuda()

mse_loss = torch.nn.MSELoss()
adam_opt = torch.optim.Adam(lstm_regressor.parameters(), config.learning_rate)

# Train loop

In [16]:
global_loss = {
    'train': [],
    'eval': []
}

In [None]:
epochs = 50

loss = run_epochs(epochs,
    lstm_regressor, mse_loss,
    adam_opt, train_loader, test_loader,
    torch.cuda.is_available()
)

global_loss['train'].extend(loss['train'])
global_loss['eval'].extend(loss['eval'])
config.epochs += epochs

In [None]:
import matplotlib.pyplot as plt

plt.title(f"Learning rate: {config.learning_rate}")
plt.plot([sum(loss) / len(loss) for loss in global_loss['train']], label='Train Loss')
plt.plot([sum(loss) / len(loss) for loss in global_loss['eval']], label='Eval Loss')
plt.legend();

In [None]:
checkpoint = {
    'model': lstm_regressor.state_dict(),
    'optimizer': adam_opt.state_dict(),
    'loss': global_loss,
    'config': config.to_dict()
}

torch.save(checkpoint, '../resource/models/lstm_mlp/4layer_2mlp_cp4.tar')