In [1]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.model_selection import train_test_split

device = "cuda" if torch.cuda.is_available() else "mps"

## CALCULATE METRICS

In [2]:
def getConfusionMatrix(labels: np.ndarray, predictions: np.ndarray):
    confusion = dict()
    confusion["TP"] = np.sum(labels & predictions)
    confusion["TN"] = np.sum(~labels & ~predictions)
    confusion["FP"] = np.sum(~labels & predictions)
    confusion["FN"] = np.sum(labels & ~predictions)
    return confusion

def getMetrics(confusion):
    metrics = dict()
    metrics["accuracy"] = (confusion["TP"] + confusion["TN"]) / (confusion["TP"] + confusion["TN"] + confusion["FP"] + confusion["FN"])
    metrics["precision"] = confusion["TP"] / (confusion["TP"] + confusion["FP"])
    metrics["recall"] = confusion["TP"] / (confusion["TP"] + confusion["FN"])
    metrics["f1"] = 2 * (metrics["precision"] * metrics["recall"]) / (metrics["precision"] + metrics["recall"])
    return metrics

def printMetrics(confusion):
    print("Accuracy: \t", confusion["accuracy"])
    print("Precision: \t", confusion["precision"])
    print("Recall: \t", confusion["recall"])
    print("F1: \t\t", confusion["f1"])

## LOAD DATA

In [3]:
covid = pd.read_parquet('covidClean.parquet')

toRemove = ["PATIENT_ID", "USMER", "SYMPTOMS_DATE",
            "MEDICAL_UNIT", "ADMISSION_DATE", "PATIENT_TYPE",
            "DEATH_DATE", "ORIGIN_COUNTRY"]
covid = covid.drop(columns = toRemove)
covid = covid.drop(columns= ["DIED", "INTUBED", "ICU"])


covid.AGE = covid.AGE.astype('float32')

labels = covid.AT_RISK
covid = covid.drop(columns = ["AT_RISK"])

trainX, testX, trainY, testY = train_test_split(covid, labels, test_size = 0.25)

print("Train labels:")
print(trainY.value_counts())
print("\nTest labels:")
print(testY.value_counts())

Train labels:
True     234113
False    223950
Name: AT_RISK, dtype: int64

Test labels:
True     77751
False    74937
Name: AT_RISK, dtype: int64


In [4]:
trainX  = np.array(trainX, dtype=np.float32)
trainX  = torch.tensor(trainX).to(device)

testX   = np.array(testX, dtype=np.float32)
testX   = torch.tensor(testX).to(device)

trainY  = torch.tensor(trainY.values, dtype=torch.float32).reshape(-1, 1).to(device)

In [5]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.input = nn.Linear(14, 32)
        self.actInput = nn.ReLU()
        self.hidden1 = nn.Linear(32, 32)
        self.act1 = nn.ReLU()
        self.hidden2 = nn.Linear(32, 32)
        self.act2 = nn.ReLU()
        self.output = nn.Linear(32, 1)
        self.actOutput = nn.Sigmoid()
        
    def forward(self, x):
        x = self.actInput(self.input(x))
        x = self.act1(self.hidden1(x))
        x = self.act2(self.hidden2(x))
        x = self.actOutput(self.output(x))
        return x
    
mlp = MLP().to(device)
print(mlp)

MLP(
  (input): Linear(in_features=14, out_features=32, bias=True)
  (actInput): ReLU()
  (hidden1): Linear(in_features=32, out_features=32, bias=True)
  (act1): ReLU()
  (hidden2): Linear(in_features=32, out_features=32, bias=True)
  (act2): ReLU()
  (output): Linear(in_features=32, out_features=1, bias=True)
  (actOutput): Sigmoid()
)


In [6]:
lossFN = nn.BCELoss().to(device)
optimizer = optim.Adam(mlp.parameters(), lr=0.001)

In [8]:
n_epoch = 25
batchSize = 32

for epoch in range(n_epoch):
     loss = None
     for i in range(0, len(trainX), batchSize):
          x = trainX[i:i+batchSize]
          y = trainY[i:i+batchSize]
          pred = mlp(x)
          loss = lossFN(pred, y)
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()
     print(f"Epoch {epoch+1} loss: {loss}, accuracy: {((pred > 0.5) == y).sum().item() / len(y)}")
torch.save(mlp, "mlp.pt")

Epoch 1 loss: 0.6093527674674988, accuracy: 0.6666666666666666
Epoch 2 loss: 0.6131796836853027, accuracy: 0.6666666666666666
Epoch 3 loss: 0.6226098537445068, accuracy: 0.6666666666666666
Epoch 4 loss: 0.6191810965538025, accuracy: 0.6666666666666666
Epoch 5 loss: 0.6159239411354065, accuracy: 0.6666666666666666
Epoch 6 loss: 0.6168416738510132, accuracy: 0.6666666666666666


KeyboardInterrupt: 

In [None]:
mlp = torch.load("mlp.pt")

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


predTrain = mlp(trainX)
predTest = mlp(testX)

resTrain = (predTrain > 0.5)
resTest = (predTest > 0.5)

print("MLP Train: ")
print("Accuracy: \t", accuracy_score(trainY.cpu().detach().numpy().astype(bool), resTrain.cpu().detach().numpy()))
print("Precision: \t", precision_score(trainY.cpu().detach().numpy().astype(bool), resTrain.cpu().detach().numpy()))
print("Recall: \t", recall_score(trainY.cpu().detach().numpy().astype(bool), resTrain.cpu().detach().numpy()))
print("F1: \t\t", f1_score(trainY.cpu().detach().numpy().astype(bool), resTrain.cpu().detach().numpy()))

print("MLP Test: ")
print("Accuracy: \t", accuracy_score(testY, resTest.cpu().detach().numpy()))
print("Precision: \t", precision_score(testY, resTest.cpu().detach().numpy()))
print("Recall: \t", recall_score(testY, resTest.cpu().detach().numpy()))
print("F1: \t\t", f1_score(testY, resTest.cpu().detach().numpy()))

MLP Train: 
Accuracy: 	 0.6466293937733456
Precision: 	 0.6390760532090913
Recall: 	 0.7081607795371498
F1: 		 0.671847124460735
MLP Test: 
Accuracy: 	 0.6453355862936183
Precision: 	 0.6369703267521072
Recall: 	 0.7083809499351558
F1: 		 0.6707804169275758
