In [10]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [11]:
PREDICTORS = ["tmax", "tmin", "rain"]
TARGET = "tmax_tomorrow"

data = pd.read_csv("../../data/clean_weather.csv", index_col=0)
data = data.ffill()

scaler = StandardScaler()
data[PREDICTORS] = scaler.fit_transform(data[PREDICTORS])

split_data = np.split(data, [int(.7*len(data)), int(.85*len(data))])
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = [[d[PREDICTORS].to_numpy(), d[[TARGET]].to_numpy()] for d in split_data]

In [12]:
class WeatherDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return torch.from_numpy(self.x[idx]).float(), torch.from_numpy(self.y[idx]).float()

In [13]:
BATCH_SIZE = 32
EPOCHS = 50

train_dataset = WeatherDataset(train_x, train_y)
valid_dataset = WeatherDataset(valid_x, valid_y)

train = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [14]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.dense = nn.Linear(len(PREDICTORS), 1)

    def forward(self, x):
        return self.dense(x)

In [15]:
model = NeuralNetwork()
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [16]:
for epoch in range(EPOCHS):
    epoch_loss = 0

    for batch, (x, y) in enumerate(train):
        optimizer.zero_grad()
        pred = model(x)

        loss = loss_fn(pred, y)
        epoch_loss += loss.item()

        loss.backward()
        optimizer.step()

    if epoch % 10 == 0:
        with torch.no_grad():
            valid_loss = 0
            for batch, (x, y) in enumerate(valid):
                pred = model(x)
                loss = loss_fn(pred, y)
                valid_loss += loss.item()

        print(f"Epoch: {epoch} Train MSE: {epoch_loss/len(train)} Valid MSE: {valid_loss/len(valid)}")

Epoch: 0 Train MSE: 2587.9095252784523 Valid MSE: 1504.641393661499
Epoch: 10 Train MSE: 22.63124664087553 Valid MSE: 21.72279591858387
Epoch: 20 Train MSE: 22.138093909701787 Valid MSE: 21.206778809428215
Epoch: 30 Train MSE: 22.13684495880797 Valid MSE: 20.906649634242058
Epoch: 40 Train MSE: 22.142512514784528 Valid MSE: 21.068805783987045
