In [1]:
from data_preparation import Preparation
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn import metrics
import plotly.express as px

In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 32
LEARNING_RATE = 0.00001
EPOCH = 85

In [3]:
df = pd.read_csv("COVIDiSTRESS_April_27_clean.csv", encoding= 'unicode_escape')
prep = Preparation()
df = prep.clean(df)
df = prep.selection_alteration(df)
df = prep.scaling(df)
y = df["PSS10_avg"].to_numpy().reshape(-1, 1)
x = df.drop("PSS10_avg", axis="columns").to_numpy()
x, x_test, y, y_test = train_test_split(x, y, test_size=0.02)
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.1)



In [4]:
train_dataset = TensorDataset(torch.Tensor(x_train), torch.Tensor(y_train))
train_dataloader = DataLoader(train_dataset, BATCH_SIZE, shuffle=True)
test_dataset = TensorDataset(torch.Tensor(x_test), torch.Tensor(y_test))
test_dataloader = DataLoader(test_dataset, BATCH_SIZE, shuffle=True)
val_dataset = TensorDataset(torch.Tensor(x_val), torch.Tensor(y_val))
val_dataloader = DataLoader(val_dataset, BATCH_SIZE, shuffle=True)

In [5]:
class StressNN(nn.Module):
    def __init__(self, input_size = 56):
        super(StressNN, self).__init__()
        self.LinBlock = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.LeakyReLU(),
            nn.Linear(256, 128),
            nn.LeakyReLU(),
            nn.Linear(128, 64),
            nn.LeakyReLU(),
            nn.Linear(64, 32),
            nn.LeakyReLU(),
            nn.Linear(32, 16),
            nn.LeakyReLU(),
            nn.Linear(16, 1)
        )
    def forward(self, x):
        x = self.LinBlock(x)
        x = torch.sigmoid(x)
        return x

In [6]:
def train(model, loss, maeloss, optimizer, data):
    nbat = len(data)
    aloss, aacc, ar2, amloss = 0, 0, 0, 0
    for batch, (x, y) in enumerate(data):
        x = x.to(DEVICE)
        y = y.to(DEVICE)
        ypred = model(x)
        bloss = loss(ypred ,y.float())
        with torch.no_grad():
            acc = ((ypred.squeeze()>=0.50).int()==y).sum().item()/BATCH_SIZE
            r2 = metrics.r2_score(y.cpu().numpy(), ypred.cpu().numpy())

            mloss = maeloss(ypred, y.float()).item()
            amloss += mloss
            aloss += bloss.item()
            aacc += acc
            ar2 += r2

        optimizer.zero_grad()
        bloss.backward()
        optimizer.step()

        print("Train - crossentropy_loss: %f  mae_loss:%f  r2: %f  accuracy: %f  [%d/%d]"%(bloss.item(), mloss, r2, acc, batch+1, nbat), end="\r")
    aloss /= nbat
    aacc /= nbat
    ar2 /= nbat
    amloss /= nbat
    print("\nTrain - crossentropy_loss: %f  mae_loss: %f  r2: %f  accuracy: %f  [Average]"%(aloss, amloss, ar2, aacc))
    return (aloss, amloss, ar2, aacc)

In [7]:
def test(model, loss, maeloss, data):
    nbat = len(data)
    acc, bloss, r2, mloss = 0, 0, 0, 0

    with torch.no_grad():
        for x, y in data:
            x = x.to(DEVICE)
            y = y.to(DEVICE)
            ypred = model(x)
            bloss += loss(ypred ,y.float())
            mloss += maeloss(ypred ,y.float()).item()
            acc += ((ypred.squeeze()>=0.50).int()==y).sum().item()/BATCH_SIZE
            r2 += metrics.r2_score(y.cpu().numpy(), ypred.cpu().numpy())
    
    acc /= nbat
    bloss /= nbat
    mloss /= nbat
    r2 /= nbat
    print("Test - crossentropy_loss: %f  mae_loss: %f  r2: %f  accuracy: %f"%(bloss.item(), mloss, r2, acc))
    return (bloss.item(), mloss, r2, acc)

In [8]:
def fit(model, loss, mloss, optimizer, train_loader, test_loader, epochs):
    train_history = {
        "epochs": list(range(1, epochs+1)),
        "crossentropy_loss": list(),
        "r2": list(),
        "mae_loss": list(),
        "accuracy": list()
    }
    test_history = {
        "epochs": list(range(1, epochs+1)),
        "crossentropy_loss": list(),
        "r2": list(),
        "mae_loss": list(),
        "accuracy": list()
    }
    for e in range(epochs):
        print("EPOCH %d/%d"%(e+1, epochs))

        l, m, r2, acc = train(model, loss, mloss, optimizer, train_loader)
        train_history["crossentropy_loss"].append(l)
        train_history["r2"].append(r2)
        train_history["mae_loss"].append(m)
        train_history["accuracy"].append(acc)

        l, m, r2, acc = test(model, loss, mloss, test_loader)
        test_history["crossentropy_loss"].append(l)
        test_history["r2"].append(r2)
        test_history["mae_loss"].append(m)
        test_history["accuracy"].append(acc)

        print("\n--------------------\n")
    return {
        "train": train_history,
        "test": test_history
    }

In [9]:
model = StressNN(x.shape[1]).to(DEVICE)
model

StressNN(
  (LinBlock): Sequential(
    (0): Linear(in_features=56, out_features=256, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): LeakyReLU(negative_slope=0.01)
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): LeakyReLU(negative_slope=0.01)
    (6): Linear(in_features=64, out_features=32, bias=True)
    (7): LeakyReLU(negative_slope=0.01)
    (8): Linear(in_features=32, out_features=16, bias=True)
    (9): LeakyReLU(negative_slope=0.01)
    (10): Linear(in_features=16, out_features=1, bias=True)
  )
)

In [10]:
loss = nn.BCELoss()
mloss = nn.L1Loss(reduction="mean")
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [11]:
history = fit(model, loss, mloss, optimizer, train_dataloader, val_dataloader, EPOCH)

EPOCH 1/70
Train - crossentropy_loss: 0.680880  mae_loss:0.082848  r2: 0.264658  accuracy: 0.000000  [2293/2293]
Train - crossentropy_loss: 0.680413  mae_loss: 0.101107  r2: 0.227419  accuracy: 0.010330  [Average]
Test - crossentropy_loss: 0.673565  mae_loss: 0.087962  r2: 0.396084  accuracy: 0.009436

--------------------

EPOCH 2/70
Train - crossentropy_loss: 0.654360  mae_loss:0.112350  r2: 0.496765  accuracy: 0.000000  [2293/2293]
Train - crossentropy_loss: 0.671784  mae_loss: 0.086101  r2: 0.423242  accuracy: 0.009404  [Average]
Test - crossentropy_loss: 0.671899  mae_loss: 0.085265  r2: 0.430694  accuracy: 0.009559

--------------------

EPOCH 3/70
Train - crossentropy_loss: 0.679410  mae_loss:0.076879  r2: 0.430783  accuracy: 0.000000  [2293/2293]
Train - crossentropy_loss: 0.670659  mae_loss: 0.084062  r2: 0.447130  accuracy: 0.009390  [Average]
Test - crossentropy_loss: 0.671195  mae_loss: 0.083660  r2: 0.447409  accuracy: 0.008456

--------------------

EPOCH 4/70
Train - cro

In [12]:
px.line(pd.DataFrame(history["train"]))

In [13]:
px.line(pd.DataFrame(history["test"]))