# Importing Libraries

In [1]:
from data_preparation import Preparation
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn import metrics
import plotly.express as px

# Predefining Constants (Including Hyperparameters)

In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 32
LEARNING_RATE = 0.00001
EPOCH = 85

# Loading the data and performing a train-test split

In [3]:
df = pd.read_csv("COVIDiSTRESS_April_May_Combined.csv", encoding= 'unicode_escape')

## Uncomment if preparation needs to be on the fly
# prep = Preparation()
# df = prep.clean(df)
# df = prep.selection_alteration(df)
# df = prep.scaling(df)

df = df.drop(["Unnamed: 0", "1", "Uninformative response"], axis="columns")
y = df["PSS10_avg"].to_numpy().reshape(-1, 1)
x = df.drop("PSS10_avg", axis="columns").to_numpy()
x, x_test, y, y_test = train_test_split(x, y, test_size=0.02)
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.1)

# Creating a datset batch generator for training and testing

In [4]:
train_dataset = TensorDataset(torch.Tensor(x_train), torch.Tensor(y_train))
train_dataloader = DataLoader(train_dataset, BATCH_SIZE, shuffle=True)
test_dataset = TensorDataset(torch.Tensor(x_test), torch.Tensor(y_test))
test_dataloader = DataLoader(test_dataset, BATCH_SIZE, shuffle=True)
val_dataset = TensorDataset(torch.Tensor(x_val), torch.Tensor(y_val))
val_dataloader = DataLoader(val_dataset, BATCH_SIZE, shuffle=True)

# Model Defination

In [5]:
class StressNN(nn.Module):
    def __init__(self, input_size = 54):
        super(StressNN, self).__init__()
        self.LinBlock = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.LeakyReLU(),
            nn.Linear(256, 128),
            nn.LeakyReLU(),
            nn.Linear(128, 64),
            nn.LeakyReLU(),
            nn.Linear(64, 32),
            nn.LeakyReLU(),
            nn.Linear(32, 16),
            nn.LeakyReLU(),
            nn.Linear(16, 1)
        )
    def forward(self, x):
        x = self.LinBlock(x)
        #x = torch.sigmoid(x)
        return x

# Creating Helper functions to facilitate training and testing

## Train function for mini-batch training

In [6]:
def train(model, loss, maeloss, mseloss, optimizer, data):
    nbat = len(data)
    aloss, aacc, ar2, amloss, amsloss = 0, 0, 0, 0, 0
    for batch, (x, y) in enumerate(data):
        x = x.to(DEVICE)
        y = y.to(DEVICE)
        ypred = model(x)
        bloss = loss(ypred ,y.float())
        with torch.no_grad():
            acc = ((ypred.squeeze()>=0.50).int()==y).sum().item()/BATCH_SIZE
            r2 = metrics.r2_score(y.cpu().numpy(), ypred.cpu().numpy())

            mloss = maeloss(ypred, y.float()).item()
            msloss = mseloss(ypred, y.float()).item()
            amloss += mloss
            amsloss += msloss
            aloss += bloss.item()
            aacc += acc
            ar2 += r2

        optimizer.zero_grad()
        bloss.backward()
        optimizer.step()

        print("Train - crossentropy_loss: %f  mae_loss:%f  r2: %f  mse_loss: %f  accuracy: %f  [%d/%d]"%(bloss.item(), mloss, r2, msloss, acc, batch+1, nbat), end="\r")
    aloss /= nbat
    aacc /= nbat
    ar2 /= nbat
    amloss /= nbat
    amsloss /= nbat
    print("\nTrain - crossentropy_loss: %f  mae_loss: %f  r2: %f  mse_loss: %f  accuracy: %f  [Average]"%(aloss, amloss, ar2, amsloss, aacc))
    return (aloss, amloss, ar2, amsloss, aacc)

## Test function to evaluate the model on unseen data

In [7]:
def test(model, loss, maeloss, mseloss, data):
    nbat = len(data)
    acc, bloss, r2, mloss, msloss = 0, 0, 0, 0, 0

    with torch.no_grad():
        for x, y in data:
            x = x.to(DEVICE)
            y = y.to(DEVICE)
            ypred = model(x)
            bloss += loss(ypred ,y.float())
            mloss += maeloss(ypred ,y.float()).item()
            msloss += mseloss(ypred, y.float()).item()
            acc += ((ypred.squeeze()>=0.50).int()==y).sum().item()/BATCH_SIZE
            r2 += metrics.r2_score(y.cpu().numpy(), ypred.cpu().numpy())
    
    acc /= nbat
    bloss /= nbat
    mloss /= nbat
    msloss /= nbat
    r2 /= nbat
    print("Test - crossentropy_loss: %f  mae_loss: %f  mse_loss: %f  r2: %f  accuracy: %f"%(bloss.item(), mloss, msloss, r2, acc))
    return (bloss.item(), mloss, r2, msloss, acc)

## Training and simultaneous validation loop

In [8]:
def fit(model, loss, mloss, msloss, optimizer, train_loader, test_loader, epochs):
    train_history = {
        "epochs": list(range(1, epochs+1)),
        "crossentropy_loss": list(),
        "r2": list(),
        "mae_loss": list(),
        "mse_loss": list(),
        "accuracy": list()
    }
    test_history = {
        "epochs": list(range(1, epochs+1)),
        "crossentropy_loss": list(),
        "r2": list(),
        "mae_loss": list(),
        "mse_loss": list(),
        "accuracy": list()
    }
    for e in range(epochs):
        print("EPOCH %d/%d"%(e+1, epochs))

        l, m, r2, ms, acc = train(model, loss, mloss, msloss, optimizer, train_loader)
        train_history["crossentropy_loss"].append(l)
        train_history["r2"].append(r2)
        train_history["mae_loss"].append(m)
        train_history["mse_loss"].append(ms)
        train_history["accuracy"].append(acc)

        l, m, r2, ms, acc = test(model, loss, mloss, msloss, test_loader)
        test_history["crossentropy_loss"].append(l)
        test_history["r2"].append(r2)
        test_history["mae_loss"].append(m)
        test_history["mse_loss"].append(ms)
        test_history["accuracy"].append(acc)

        print("\n--------------------\n")
    return {
        "train": train_history,
        "test": test_history
    }

# Creating Model, loss, and optimizer objects

In [9]:
model = StressNN(x.shape[1]).to(DEVICE)
model

StressNN(
  (LinBlock): Sequential(
    (0): Linear(in_features=54, out_features=256, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): LeakyReLU(negative_slope=0.01)
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): LeakyReLU(negative_slope=0.01)
    (6): Linear(in_features=64, out_features=32, bias=True)
    (7): LeakyReLU(negative_slope=0.01)
    (8): Linear(in_features=32, out_features=16, bias=True)
    (9): LeakyReLU(negative_slope=0.01)
    (10): Linear(in_features=16, out_features=1, bias=True)
  )
)

In [10]:
loss = nn.MSELoss()
mloss = nn.L1Loss(reduction="mean")
msloss = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Training

In [11]:
history = fit(model, loss, mloss, msloss, optimizer, train_dataloader, val_dataloader, EPOCH)

EPOCH 1/85
Train - crossentropy_loss: 0.011142  mae_loss:0.091064  r2: 0.544516  mse_loss: 0.011142  accuracy: 0.000000  [4769/4769]
Train - crossentropy_loss: 0.015486  mae_loss: 0.098910  r2: 0.258861  mse_loss: 0.015486  accuracy: 0.011526  [Average]
Test - crossentropy_loss: 0.012328  mae_loss: 0.087836  mse_loss: 0.012328  r2: 0.404226  accuracy: 0.010200

--------------------

EPOCH 2/85
Train - crossentropy_loss: 0.016501  mae_loss:0.101127  r2: 0.519552  mse_loss: 0.016501  accuracy: 0.000000  [4769/4769]
Train - crossentropy_loss: 0.011884  mae_loss: 0.086026  r2: 0.424294  mse_loss: 0.011884  accuracy: 0.009436  [Average]
Test - crossentropy_loss: 0.011645  mae_loss: 0.085240  mse_loss: 0.011645  r2: 0.437740  accuracy: 0.009906

--------------------

EPOCH 3/85
Train - crossentropy_loss: 0.023809  mae_loss:0.127767  r2: -0.237820  mse_loss: 0.023809  accuracy: 0.000000  [4769/4769]
Train - crossentropy_loss: 0.011432  mae_loss: 0.084287  r2: 0.446701  mse_loss: 0.011432  acc

## Training Performance

In [12]:
px.line(pd.DataFrame(history["train"]))

## Testing Performance

In [13]:
px.line(pd.DataFrame(history["test"]))

# Saving the model for future predictions and feature importance generation

In [14]:
torch.save(model.state_dict(), "var/model_april_may_mse.pt")