In [1]:
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd 
import os 
from loaddata import MultiModalLoader
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from model import MultistainModel
import torch.optim as optim
from tqdm import tqdm
from torchmetrics import AUROC
from torch.utils.tensorboard import SummaryWriter




In [2]:
f = "data/datatable.csv"
train_DS = MultiModalLoader(f, "TRAIN",n_mods=3)
test_DS = MultiModalLoader(f, "TEST",n_mods=3)
valid_DS = MultiModalLoader(f, "VALIDATION",n_mods=3)

In [3]:
train_loader = DataLoader(train_DS, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_DS, batch_size=32, shuffle=False)
test_loader = DataLoader(test_DS, batch_size=32, shuffle=False)

In [4]:
model = MultistainModel(n_classes = 2)
#model = torch.compile(premodel)  # TODO not working yet 
criterion = nn.CrossEntropyLoss()  
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)  # TODO optimal optimizer for this task?
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.9)  # TODO optimal scheduler? 
auroc = AUROC(task="multilabel", num_labels=2)  # TODO do for categorical
activate = nn.Softmax(dim=1)  # TODO for catefgorical needs to be a softmax



In [5]:
n_epochs = 2
for epoch in range(n_epochs):
    model.train()
    train_loss = []
    train_auroc = []
    with tqdm(train_loader, unit="batch") as tepoch:
        for idx,(x, y) in enumerate(tepoch):
            tepoch.set_description(f"Epoch {epoch+1}|Train")

            pred = model(x)
            loss = criterion(pred,y.float())
            loss.backward()
            optimizer.step()
            
            tr_auroc = auroc(activate(pred), y.int())

            train_loss.append(loss.item())
            train_auroc.append(tr_auroc)
            tepoch.set_postfix(train_loss=loss.item(), train_AUROC=tr_auroc.item())
            if idx>0:  # get rid of before running 
                break
    mean_train_loss = np.mean(train_loss)  # TODO save in tensorboard
    mean_train_auroc = np.mean(train_auroc)  # TODO save in tensorboard

    scheduler.step()
    valid_loss = []
    valid_auroc = []
    model.eval()
    with tqdm(valid_loader, unit="batch") as vepoch:
        for idx,(x, y) in enumerate(vepoch):
            vepoch.set_description(f"Epoch {epoch+1}|Valid")
            out = model(x)
            
            loss= criterion(out,y.float())
            val_auroc  = auroc(activate(out), y.int())
            
            valid_loss.append(loss.item())
            valid_auroc.append(val_auroc)
            vepoch.set_postfix(valid_loss=loss.item(), valid_AUROC=val_auroc.item())
            if idx>0: # TODO get rid of before running 
                break

    mean_valid_loss = np.mean(valid_loss)  # TODO save in tensorboard
    mean_valid_auroc = np.mean(valid_auroc)  # TODO save in tensorboard


#safe model!!! and stuff

Epoch 1|Train:   8%|▊         | 1/13 [01:11<14:14, 71.23s/batch, train_AUROC=0.533, train_loss=0.69] 
Epoch 1|Valid:  50%|█████     | 1/2 [00:28<00:28, 28.28s/batch, valid_AUROC=0.486, valid_loss=0.79]
Epoch 2|Train:   8%|▊         | 1/13 [01:06<13:23, 66.99s/batch, train_AUROC=0.472, train_loss=0.949]
Epoch 2|Valid:  50%|█████     | 1/2 [00:22<00:22, 22.30s/batch, valid_AUROC=0.271, valid_loss=1.52]


## Evaluate

In [13]:
test_loss = []
test_aurocs = []
model.eval()
results = []
with tqdm(test_loader, unit="batch") as testepoch:
    for idx,(x, y) in enumerate(testepoch):
        testepoch.set_description(f"Test")
        out = model(x)
        
        loss = criterion(out,y.float())
        test_auroc = auroc(activate(out), y.int())

        results.append( valid_DS.__reverse_transformation__(out) )
        results.append([idx,y,out])
        test_loss.append(loss.item())
        test_aurocs.append(test_auroc.item())
        testepoch.set_postfix(test_loss=loss.item(), test_AUROC=val_auroc.item())


        if idx>0: # TODO get rid of before running 
            break

    mean_test_loss = np.mean(test_loss)  # TODO save in tensorboard
    mean_test_auroc = np.mean(test_aurocs)  # TODO save in tensorboard
    results_df = pd.DataFrame(results,columns=["idx","label","pred"])

Test:  25%|██▌       | 1/4 [00:34<01:43, 34.60s/batch, test_AUROC=0.271, test_loss=1.77] 


In [14]:
results_folder = "results"
os.path.exists(results_folder)
if not os.path.exists(results_folder):
    os.mkdir(results_folder)
f = results_df.to_csv((results_folder+"/result_table.csv"))
#  TODO settings store as file

In [17]:
results

[[0,
  tensor([[0., 1.],
          [0., 1.],
          [0., 1.],
          [0., 1.],
          [0., 1.],
          [0., 1.],
          [0., 1.],
          [1., 0.],
          [0., 1.],
          [1., 0.],
          [1., 0.],
          [0., 1.],
          [0., 1.],
          [0., 1.],
          [0., 1.],
          [0., 1.],
          [0., 1.],
          [0., 1.],
          [1., 0.],
          [0., 1.],
          [0., 1.],
          [0., 1.],
          [1., 0.],
          [0., 1.],
          [1., 0.],
          [0., 1.],
          [0., 1.],
          [0., 1.],
          [0., 1.],
          [1., 0.],
          [0., 1.],
          [1., 0.]], dtype=torch.float64),
  tensor([[-1.2159,  1.8575],
          [-1.1679,  1.8080],
          [-1.1987,  1.8268],
          [-1.2114,  1.8228],
          [-1.1828,  1.8585],
          [-1.2068,  1.8495],
          [-1.1743,  1.8384],
          [-1.1939,  1.8543],
          [-1.2056,  1.8209],
          [-1.1994,  1.8294],
          [-1.1568,  1.8382],
  