In [1]:
import torch
import pandas as pd
import numpy as np
from Model import Model
from PMDataset import PMDatasetPredict
from tqdm.notebook import tqdm

In [2]:
def predict(
    model: torch.nn.Module,
    dataLoader: torch.utils.data.DataLoader,
    idCSVPath,
    device: torch.device,
):
    """
    This function computes the following evaluation metrics on a given dataset: Accuracy, recall, confusion matrix, 1 vs rest confusion matrix.

    Parameters
    ----------
    model: The model to evaluate.

    dataLoader: Dataloader for the dataset to evaluate the model.

    resultSavePath: path to where the results shoul be saved.

    name of the results file.

    device: Device to run the model on.

    """
    df = pd.read_csv(idCSVPath)
    arr = torch.Tensor().to(device)

    model.eval()
    with torch.inference_mode():
        for batch, X in tqdm(enumerate(dataLoader)):
            X = X.to(device)

            logits = model(X)
            y_pred= torch.round(torch.sigmoid(logits))
            arr = torch.cat((arr, y_pred), 0)

    df["class"] = pd.Series(torch.squeeze(arr, 1).cpu().numpy().astype(np.uint8))

    return df

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [4]:
import torch.utils
import torch.utils.data


testData= PMDatasetPredict('PreProcessing/X_test.npy')
testLoader= torch.utils.data.DataLoader(testData,batch_size=128,shuffle=False)

In [5]:
model= Model(377,1,2,1024).to(device)


In [6]:
for i in range(0,5):
    model.load_state_dict(torch.load(f'Models/Baseline/Baseline_FOLD_{i}.pth'))
    predict(model=model,dataLoader=testLoader,idCSVPath='PreProcessing/testIDNums.csv',device=device).to_csv(f'Models/Baseline/Fold{i}_results.csv',index=False)


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

In [13]:
df= pd.read_csv('PreProcessing/testIDNums.csv', index_col='id')
for i in range(0,5):
    tempDf= pd.read_csv(f'Models/Baseline/Fold{i}_results.csv', index_col='id')['class'].apply(lambda x: 'p' if x==1 else 'e')
    tempDf.columns=[f'class_Fold{i}']
    df=pd.concat([df,tempDf],axis=1, ignore_index=False)

dfMode=df.mode(axis=1)
dfMode.name='class'
dfMode.to_csv('Models/Baseline/Baseline_5_fold_Mode.csv',index=True)


  dfMode.colums=['class']


        class class class class class
id                                   
3116945     e     e     e     e     e
3116946     p     p     p     p     p
3116947     p     p     p     p     p
3116948     p     p     p     p     p
3116949     e     e     e     e     e
...       ...   ...   ...   ...   ...
5194904     p     p     p     p     p
5194905     p     p     p     p     p
5194906     p     p     p     p     p
5194907     e     e     e     e     e
5194908     e     e     e     e     e

[2077964 rows x 5 columns]


         0
id        
3116945  e
3116946  p
3116947  p
3116948  p
3116949  e
...     ..
3117040  e
3117041  p
3117042  e
3117043  p
3117044  p

[100 rows x 1 columns]
