In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from pytorch_lightning import seed_everything
seed_everything(42, workers=True)
# sets seeds for numpy, torch and python.random.

Global seed set to 42


42

Load train, val and test dataset

In [2]:
import pandas as pd

df_all = pd.read_csv(
    'data/all_emails_categorical_hour.csv',
)
df_train = pd.read_csv(
    'data/train_emails_categorical_hour.csv',
)
df_val = pd.read_csv(
    'data/val_emails_categorical_hour.csv',
)
df_test = pd.read_csv(
    'data/tuesday_27_june_emails_categorical_hour.csv',
)

In [3]:
df_train.head(3)

Unnamed: 0,counter,hour_1,hour_2,hour_3,hour_4,hour_5,hour_6,hour_7,hour_8,hour_9,...,month_9,month_10,month_11,month_12,weekday_1,weekday_2,weekday_3,weekday_4,weekday_5,weekday_6
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,75,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [4]:
from torch.utils.data import Dataset

class EnronDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, i):
        return self.x[i], self.y[i]

In [5]:
target_col = 'counter'
train_cols = list(df_all.columns)
del(train_cols[train_cols.index('counter')])

In [6]:
import os
from torch.utils.data import DataLoader

train_loader = DataLoader(
    EnronDataset(
        torch.from_numpy(df_train[train_cols].to_numpy()).float(),
        torch.from_numpy(df_train[target_col].to_numpy()).float(),
    ),
    num_workers=os.cpu_count(),
    batch_size=len(df_train)
)

val_loader = DataLoader(
    EnronDataset(
        torch.from_numpy(df_val[train_cols].to_numpy()).float(),
        torch.from_numpy(df_val[target_col].to_numpy()).float(),
    ),
    num_workers=os.cpu_count(),
    batch_size=len(df_val)
)

test_loader = DataLoader(
    EnronDataset(
        torch.from_numpy(df_test[train_cols].to_numpy()).float(),
        torch.from_numpy(df_test[target_col].to_numpy()).float(),
    ),
    num_workers=os.cpu_count(),
    batch_size=len(df_test)
)

Define a Regression Neural Network

In [7]:
import pytorch_lightning as pl

class LinearNetwork(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Linear(70, 64), 
            nn.ReLU(), 
            nn.Linear(64, 32), 
            nn.ReLU(), 
            nn.Linear(32, 1)
        )

    def forward(self, x):
        # in lightning, forward defines the prediction/inference actions
        x = self.mlp(x)
        return x

    def training_step(self, batch, batch_idx):
        # training_step defined the train loop.
        # It is independent of forward
        x, y = batch
        x = x.view(x.size(0), -1)
        y = y.view(y.size(0), 1)
        y_pred = self.mlp(x)
        loss = F.mse_loss(y_pred, y)
        # Logging to TensorBoard by default
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        x = x.view(x.size(0), -1)
        y = y.view(y.size(0), 1)
        y_pred = self.mlp(x)
        loss = F.mse_loss(y_pred, y)
        # Logging to TensorBoard by default
        self.log("val_loss", loss)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        x = x.view(x.size(0), -1)
        y = y.view(y.size(0), 1)
        y_pred = self.mlp(x)
        print("test_predictions", y_pred)
        loss = F.mse_loss(y_pred, y)
        # Logging to TensorBoard by default
        self.log("test_loss", loss)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=0.1)
        lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=0.9999)
        return [optimizer], [lr_scheduler]

model = LinearNetwork()

In [8]:
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

trainer = Trainer(
    deterministic=True, 
    max_epochs=4000,
    log_every_n_steps=1,
    check_val_every_n_epoch=100,
    callbacks=[EarlyStopping(monitor="val_loss", mode="min")]
)
trainer.fit(
    model=model, 
    train_dataloaders=train_loader,
    val_dataloaders=val_loader,
)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name | Type       | Params
------------------------------------
0 | mlp  | Sequential | 6.7 K 
------------------------------------
6.7 K     Trainable params
0         Non-trainable params
6.7 K     Total params
0.027     Total estimated model params size (MB)


Epoch 699: 100%|██████████| 2/2 [03:38<00:00, 109.49s/it, loss=218, v_num=20]    


In [9]:
trainer.test(
    ckpt_path="best", 
    dataloaders=test_loader,
    verbose=True
)

Restoring states from the checkpoint path at /home/thezingaro/Developer/ABS-Simulations-Comparison/predictiveGlobalScaling/workload_predictor/lightning_logs/version_20/checkpoints/epoch=699-step=700.ckpt
Loaded model weights from checkpoint at /home/thezingaro/Developer/ABS-Simulations-Comparison/predictiveGlobalScaling/workload_predictor/lightning_logs/version_20/checkpoints/epoch=699-step=700.ckpt


Testing DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]test_predictions tensor([[  14.8872],
        [   8.1838],
        [  10.0693],
        [  11.8793],
        [   9.1408],
        [  11.1155],
        [  16.5406],
        [  14.8124],
        [  26.9808],
        [  17.7927],
        [  11.7585],
        [  19.3534],
        [   7.3737],
        [  20.7238],
        [  10.7997],
        [  32.6084],
        [1129.9993],
        [   7.6053],
        [   6.0110],
        [   8.0640],
        [   6.4636],
        [   3.1282],
        [   5.5628],
        [   9.0316]])
Testing DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.86it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss            54.17729187011719
──────────────────

[{'test_loss': 54.17729187011719}]

In [10]:
preds = torch.tensor([13,12,15,12,15,13,15,22,22,24,21,17,10,14,12,24,1121,7,6,8,7,5,4,5]).float()
targets = torch.tensor([9,4,12,10,9,6,24,36,29,26,12,23,14,21,19,13,1128,1,5,8,6,5,2,1]).float()