In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix


In [None]:
df = pd.read_csv('covid_exams_final.csv')
print("Dataset loaded successfully.")
display(df.head())


Dataset loaded successfully.


Unnamed: 0,ID_aTENDIMENTO,25HIDROXIVITAMINA_D,ADENOSINA_DEAMINASE,ADENOVIRUS,AGNOR,ALANINA_AMINOTRANSFERASE,ALBUMINA,ALDOSTERONA,ALFA_1_ANTITRIPSINA,ALFA_1_GLICOPROTEÍNA_ÁCIDA,...,VELOCIDADE_DE_HEMOSSEDIMENTAÇÃO,VITAMINA_B_12,VLDL__COLESTEROL,VÍRUS_SINCICIAL_RESPIRATÓRIO,Vírus_sincicial_respiratório_A_HRSVA_e_B_HRSVB,ÁCIDO_FÓLICO,ÁCIDO_VALPRÓICO,ÁCIDO_ÚRICO,AA_NASCIMENTO,IC_SEXO
0,016586ae084a86326b913fe74146b102,0.0,0.0,0,0.0,199,0.0,0.0,0,0,...,0,0.0,0,0.0,0.0,0.0,0.0,0.0,1979,1
1,018c7e14ec0445c170461b744bd7e026,0.0,0.0,0,0.0,78,2.9,0.0,0,0,...,0,0.0,46,0.0,0.0,0.0,0.0,0.0,1974,1
2,0200a7fee74490fcac31fba3c1c3b282,0.0,0.0,0,0.0,17,2.9,0.0,0,0,...,0,0.0,0,0.0,0.0,0.0,0.0,0.0,1985,0
3,02a99cab514cf42129c3bbc01a53b3a3,0.0,0.0,0,0.0,106,2.1,0.0,0,0,...,0,0.0,44,0.0,0.0,0.0,0.0,0.0,1962,0
4,033a501926526b396c8672c83a8834f2,0.0,0.0,0,0.0,37,0.0,0.0,0,0,...,0,0.0,0,0.0,0.0,0.0,0.0,0.0,1958,1


In [None]:

X_train = df.drop(['COVID19__PESQUISA_DE_ANTICORPOS_IgG', 'ID_aTENDIMENTO'], axis=1)
y_train = df['COVID19__PESQUISA_DE_ANTICORPOS_IgG']


In [None]:

input_dim = X_train.shape[1]
output_dim = 2

class SimpleNet(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleNet, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, output_dim)
        )

    def forward(self, x):
        return self.net(x)

model = SimpleNet(input_dim, output_dim)


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [None]:
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=43)

X_train_folds = {}
y_train_folds = {}
X_val_folds = {}
y_val_folds = {}

for fold, (train_index, val_index) in enumerate(kf.split(X_train, y_train)):

    X_train_fold = X_train.iloc[train_index]
    y_train_fold = y_train.iloc[train_index]
    X_val_fold = X_train.iloc[val_index]
    y_val_fold = y_train.iloc[val_index]

    X_train_folds[f'X_train_{fold+1}'] = X_train_fold
    y_train_folds[f'y_train_{fold+1}'] = y_train_fold
    X_val_folds[f'X_val_{fold+1}'] = X_val_fold
    y_val_folds[f'y_val_{fold+1}'] = y_val_fold

    print(f"Fold {fold+1}:")
    print(f"  X_train_{fold+1} shape: {X_train_fold.shape}")
    print(f"  y_train_{fold+1} shape: {y_train_fold.shape}")
    print(f"  X_val_{fold+1} shape: {X_val_fold.shape}")
    print(f"  y_val_{fold+1} shape: {y_val_fold.shape}")


Fold 1:
  X_train_1 shape: (443, 359)
  y_train_1 shape: (443,)
  X_val_1 shape: (111, 359)
  y_val_1 shape: (111,)
Fold 2:
  X_train_2 shape: (443, 359)
  y_train_2 shape: (443,)
  X_val_2 shape: (111, 359)
  y_val_2 shape: (111,)
Fold 3:
  X_train_3 shape: (443, 359)
  y_train_3 shape: (443,)
  X_val_3 shape: (111, 359)
  y_val_3 shape: (111,)
Fold 4:
  X_train_4 shape: (443, 359)
  y_train_4 shape: (443,)
  X_val_4 shape: (111, 359)
  y_val_4 shape: (111,)
Fold 5:
  X_train_5 shape: (444, 359)
  y_train_5 shape: (444,)
  X_val_5 shape: (110, 359)
  y_val_5 shape: (110,)


In [None]:
num_epochs = 1000
batch_size = 32

for fold in range(k):
    print(f"Starting training for Fold {fold+1}")

    X_train_fold = X_train_folds[f'X_train_{fold+1}']
    y_train_fold = y_train_folds[f'y_train_{fold+1}']
    X_val_fold = X_val_folds[f'X_val_{fold+1}']
    y_val_fold = y_val_folds[f'y_val_{fold+1}']

    X_train_tensor = torch.tensor(X_train_fold.values, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train_fold.values, dtype=torch.long)
    X_val_tensor = torch.tensor(X_val_fold.values, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val_fold.values, dtype=torch.long)

    model = SimpleNet(input_dim, output_dim)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for i in range(0, len(X_train_tensor), batch_size):
            inputs = X_train_tensor[i:i + batch_size]
            labels = y_train_tensor[i:i + batch_size]

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
    print(f"Finished training for Fold {fold+1}")

    model.eval()
    with torch.no_grad():
        outputs = model(X_val_tensor)
        _, predicted = torch.max(outputs.data, 1)
        tn, fp, fn, tp = confusion_matrix(y_val_tensor.numpy(), predicted.numpy()).ravel()

        print(f"Fold {fold+1} Validation Metrics:")
        print(f"  True Positives (TP): {tp}")
        print(f"  True Negatives (TN): {tn}")
        print(f"  False Positives (FP): {fp}")
        print(f"  False Negatives (FN): {fn}")

print("K-fold cross-validation complete.")

Starting training for Fold 1
Finished training for Fold 1
Fold 1 Validation Metrics:
  True Positives (TP): 50
  True Negatives (TN): 11
  False Positives (FP): 27
  False Negatives (FN): 23
Starting training for Fold 2
Finished training for Fold 2
Fold 2 Validation Metrics:
  True Positives (TP): 50
  True Negatives (TN): 13
  False Positives (FP): 32
  False Negatives (FN): 16
Starting training for Fold 3
Finished training for Fold 3
Fold 3 Validation Metrics:
  True Positives (TP): 66
  True Negatives (TN): 14
  False Positives (FP): 21
  False Negatives (FN): 10
Starting training for Fold 4
Finished training for Fold 4
Fold 4 Validation Metrics:
  True Positives (TP): 56
  True Negatives (TN): 16
  False Positives (FP): 23
  False Negatives (FN): 16
Starting training for Fold 5
Finished training for Fold 5
Fold 5 Validation Metrics:
  True Positives (TP): 63
  True Negatives (TN): 7
  False Positives (FP): 23
  False Negatives (FN): 17
K-fold cross-validation complete.
