In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from pytorch_lightning import seed_everything
seed_everything(42)
# sets seeds for numpy, torch and python.random.

Global seed set to 42


42

Load train, val and test dataset

In [2]:
from torch.utils.data import Dataset

class EnronDataset(Dataset):
    def __init__(self, df, target_col, preprocess=True):
        df = self.preprocess(df) if preprocess else df
        attr_cols = list(df.columns)
        del(attr_cols[attr_cols.index(target_col)])
        self.x = torch.from_numpy(df[attr_cols].to_numpy()).float()
        self.y = torch.from_numpy(df[target_col].to_numpy()).float()

    def preprocess(self, df):
        hour = pd.get_dummies(df['hour'], prefix_sep='_', prefix='hour', drop_first=True)
        day = pd.get_dummies(df['day'], prefix_sep='_', prefix='day', drop_first=True)
        month = pd.get_dummies(df['month'], prefix_sep='_', prefix='month', drop_first=True)
        weekday = pd.get_dummies(df['weekday'], prefix_sep='_', prefix='weekday', drop_first=True)

        # Merge the original df with the categorical computed feature (order id preserved)
        df = pd.concat([df, hour, day, month, weekday], axis=1, )

        # drop the numerical df attributes
        df.drop(
            columns=['hour', 'day', 'month', 'weekday'], 
            axis=1, 
            inplace=True
        )

        return df

    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, i):
        return self.x[i], self.y[i]

In [3]:
import pandas as pd

df_train = pd.read_csv(
    'data/enron_train_hourly.csv',
)
df_val = pd.read_csv(
    'data/enron_val_hourly.csv',
)
df_train_val = pd.read_csv(
    'data/enron_train+val_hourly.csv',
)
df_test_weekday = pd.read_csv(
    'data/enron_wednesday-15-february_hourly.csv',
)
df_test_holyday = pd.read_csv(
    'data/enron_sunday_10_june_hourly.csv',
)

target_col = 'counter'

In [4]:
import os
from torch.utils.data import DataLoader

train_loader = DataLoader(
    EnronDataset(df_train, target_col),
    # num_workers=os.cpu_count(),
    num_workers=0,
    batch_size=len(df_train)
)

val_loader = DataLoader(
    EnronDataset(df_val, target_col),
    # num_workers=os.cpu_count(),
    num_workers=0,
    batch_size=len(df_val)
)

train_val_loader = DataLoader(
    EnronDataset(df_train_val, target_col),
    # num_workers=os.cpu_count(),
    num_workers=0,
    batch_size=len(df_train_val)
)

test_weekday_loader = DataLoader(
    EnronDataset(df_val, target_col),
    # num_workers=os.cpu_count(),
    num_workers=0,
    batch_size=len(df_test_weekday)
)

test_holyday_loader = DataLoader(
    EnronDataset(df_val, target_col),
    # num_workers=os.cpu_count(),
    num_workers=0,
    batch_size=len(df_test_holyday)
)

Define a Regression Neural Network

In [5]:
import pytorch_lightning as pl

class LinearNetwork(pl.LightningModule):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Linear(input_size, hidden_size), 
            nn.ReLU(), 
            nn.Linear(hidden_size, hidden_size), 
            nn.ReLU(), 
            nn.Linear(hidden_size, output_size)
        )

    def forward(self, x):
        # in lightning, forward defines the prediction/inference actions
        x = self.mlp(x)
        return x

    def training_step(self, batch, batch_idx):
        # training_step defined the train loop.
        # It is independent of forward
        x, y = batch
        x = x.view(x.size(0), -1)
        y = y.view(y.size(0), 1)
        y_pred = self.mlp(x)
        loss = F.mse_loss(y_pred, y)
        # Logging to TensorBoard by default
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        x = x.view(x.size(0), -1)
        y = y.view(y.size(0), 1)
        y_pred = self.mlp(x)
        loss = F.mse_loss(y_pred, y)
        # Logging to TensorBoard by default
        self.log("val_loss", loss)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        x = x.view(x.size(0), -1)
        y = y.view(y.size(0), 1)
        y_pred = self.mlp(x)
        print("test_predictions", y_pred)
        loss = F.mse_loss(y_pred, y)
        # Logging to TensorBoard by default
        self.log("test_loss", loss)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=0.1)
        lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=0.9999)
        return [optimizer], [lr_scheduler]

model = LinearNetwork(
    input_size=len(df_train.columns)-1,
    hidden_size=128,
    output_size=1
)

In [6]:
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

trainer = Trainer(
    deterministic=True, 
    max_epochs=4000,
    # log_every_n_steps=1,
    check_val_every_n_epoch=100,
    callbacks=[EarlyStopping(monitor="val_loss", mode="min")]
)
trainer.fit(
    model=model, 
    train_dataloaders=train_loader,
    val_dataloaders=val_loader,
)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: /Users/lorenzobacchiani/Desktop/ABS-Simulations-Comparison/predictiveGlobalScaling/workload_predictor/lightning_logs

  | Name | Type       | Params
------------------------------------
0 | mlp  | Sequential | 17.3 K
------------------------------------
17.3 K    Trainable params
0         Non-trainable params
17.3 K    Total params
0.069     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


RuntimeError: mat1 and mat2 shapes cannot be multiplied (3621x70 and 4x128)

In [None]:
trainer.fit(
    model=model, 
    train_dataloaders=train_val_loader,
)

In [None]:
trainer.test(
    ckpt_path="best", 
    dataloaders=test_weekday_loader,
    verbose=True
)

In [None]:
trainer.test(
    ckpt_path="best", 
    dataloaders=test_holyday_loader,
    verbose=True
)