In [1]:
import time
import pathlib
import os
import glob

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.dataset import random_split
from torchvision import datasets, transforms

import torchmetrics
import timm

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import cv2

from datamodules import RSNAdataset
from models import RACNet
from plotting import show_failures, plot_loss_and_acc
from utils import load_image, LossMeter

  from .autonotebook import tqdm as notebook_tqdm


# HYPERPARAMETERS

In [2]:
PATH = 'data/reduced_dataset/'
MODEL = 'resnet18'
BATCH_SIZE = 3
NUM_EPOCHS = 5
LEARNING_RATE = 0.0001

NUM_WORKERS = 4
NUM_CLASSES = 2

KFOLD= 10 
# Device
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# MODEL

# DATASET

In [3]:

folds_xtrain = np.load('./data/folds/new_folds/xtrain.npy', allow_pickle=True)
folds_xtest = np.load('./data/folds/new_folds/xtest.npy', allow_pickle=True)
folds_ytrain = np.load('./data/folds/new_folds/ytrain.npy', allow_pickle=True)
folds_ytest = np.load('./data/folds/new_folds/ytest.npy', allow_pickle=True)

xtrain = folds_xtrain[1]
ytrain = folds_ytrain[1]
xtest = folds_xtest[1]
ytest = folds_ytest[1]

print('-'*30)
print(f"Fold {0}")


train_set = RSNAdataset(
                        './data/reduced_dataset/',
                        xtrain,  
                        ytrain,
                        n_slices=254,
                        img_size=112,
                        transform=None
                            )
    
test_set = RSNAdataset(
                './data/reduced_dataset/',
                xtest,  
                ytest,
                n_slices=254,
                img_size=112,
                transform=None
                    )


train_loader = DataLoader(
            train_set,    
            batch_size=4,
            shuffle=True,
            num_workers= NUM_WORKERS,
        )

test_loader = DataLoader(
            test_set,    
            batch_size=4,
            shuffle=True,
            num_workers=NUM_WORKERS,
        )

------------------------------
Fold 0


In [4]:
model = RACNet(MODEL, NUM_CLASSES)
model = model.to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = F.cross_entropy
#test_acc = torchmetrics.Accuracy(task="multiclass", num_classes=2)
##test_f1 = torchmetrics.F1Score(task="multiclass", num_classes=2, average='macro')       
#test_auroc = torchmetrics.AUROC(task="multiclass", num_classes=2)

test_pred = []
test_targets = []
preds = []

for idx, batch in enumerate(test_loader):
    
    model.eval()
    with torch.no_grad():
        features = batch['X'].to(DEVICE)
        targets = batch['y'].to(DEVICE)
        
        org = batch['org']
        #print(org)
        
        logits, probs = model(features, org)
        predicted_class = probs.argmax(dim=1)
        
        test_pred.append(predicted_class)
        test_targets.append(targets)
        preds.append(probs)
        print('probs shape:',probs.shape)
        print('targets shape:', targets.shape)
        
        print('------BATCH ENDING-------')

test_pred = torch.cat(test_pred)
test_targets = torch.cat(test_targets)
preds = torch.cat(preds)

print('test preds:', test_pred)
print('test preds:', test_pred.shape)
print('test preds flatten:', test_pred.flatten().shape)

print('test_target:', test_targets)
print('test_target:', test_targets.shape)
print('test_target flatten:', test_targets.flatten().shape)

print('preds :', preds)
print('preds :', preds.shape)
#preds = torch.cat(preds)

#acc = test_acc(probs, targets)
#f1 = test_f1(test_pred, test_targets)
#auroc = test_auroc(preds, test_targets)             
        
#print(f"Testing Time: {(time.time() - test_time)/60:.2f} min | Accuracy: {acc:.2f}% | F1 Score: {f1:.4f} | AUROC: {auroc:.4f}")

reshape input torch.Size([1016, 1, 112, 112])
CNN ouput torch.Size([1016, 512])
reshaped rnn_in torch.Size([4, 254, 512])
RNN ouput torch.Size([4, 254, 64])
mask ouput torch.Size([4, 254, 64])
reshaped masked output torch.Size([4, 16256])
fc ouput torch.Size([4, 32])
logits torch.Size([4, 2])
classifier ouput torch.Size([4, 2])
probs shape: torch.Size([4, 2])
targets shape: torch.Size([4])
------BATCH ENDING-------
reshape input torch.Size([1016, 1, 112, 112])
CNN ouput torch.Size([1016, 512])
reshaped rnn_in torch.Size([4, 254, 512])
RNN ouput torch.Size([4, 254, 64])
mask ouput torch.Size([4, 254, 64])
reshaped masked output torch.Size([4, 16256])
fc ouput torch.Size([4, 32])
logits torch.Size([4, 2])
classifier ouput torch.Size([4, 2])
probs shape: torch.Size([4, 2])
targets shape: torch.Size([4])
------BATCH ENDING-------
reshape input torch.Size([1016, 1, 112, 112])
CNN ouput torch.Size([1016, 512])
reshaped rnn_in torch.Size([4, 254, 512])
RNN ouput torch.Size([4, 254, 64])
mask 

In [9]:
xtrain.shape

(468,)

In [15]:
model = RACNet(NUM_CLASSES)
model = model.to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = F.cross_entropy


In [None]:
for epoch in range(NUM_EPOCHS):
    #t = time.time()
    model.train()
    train_loss = LossMeter()
    #train_acc = torchmetrics.Accuracy(task="multiclass", num_classes=2).to(self.device)
    #train_f1 = torchmetrics.F1Score(task="multiclass", num_classes=2, average='macro').to(self.device)
    #train_auroc = torchmetrics.AUROC(task="multiclass", num_classes=2).to(self.device)
    
    for idx, batch in enumerate(train_loader):
        
        features = batch['X'].to(DEVICE)
        targets = batch['y'].type(torch.cuda.LongTensor).to(DEVICE)
        org = batch['org']

        print('features shape:',features.shape)
        print('targets shape:',targets.shape)
        print('org shape:',org)
        
        ### FORWARD AND BACK PROP
        logits, probs = model(features, org)
        loss = criterion(logits, targets)

        print('logits shape:', logits.shape)
        print('probs shape:', probs.shape)
        
        train_loss.update(loss.detach().item())
        #train_acc.update(probs, targets)
        #train_f1.update(probs, targets)
        #train_auroc.update(probs, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(f'-----------loss: {loss}--------------')
    
        #print(f'Epoch: {epoch+1}/{epochs} | Loss: {loss:.5f} | Accuracy: {train_acc:.4f}% | F1 Score: {train_f1:.4f} | AUROC: {train_roc:.4f} | Time: {int(time.time() - t)}')
    
    _loss = train_loss.avg
    #_acc = train_acc.compute
    #_f1 = train_f1.compute
    #_roc = train_auroc.compute
    
    #self.hist['train_loss'].append(_loss)
    ##self.hist['train_acc'].append(_acc)
    #self.hist['train_f1'].append(_f1)
    #self.hist['train_auroc'].append(_roc)
    
    print(f'Epoch: {epoch+1}/{epochs} | Loss: {_loss:.5f}')
    
    
avg_loss = np.mean(self.hist['train_loss'])
#avg_acc = np.mean(self.hist['train_acc'])
#avg_f1 = np.mean(self.hist['train_f1'])
#avg_auroc = np.mean(self.hist['train_auroc'])

#print(f'Epoch Training Time: {(train.time() - start_time)/60} min | Avg Loss: {avg_loss:.5f} | Avg Accuracy: {avg_acc:.4f}% | Avg F1 Score: {avg_f1:.4f} | Avg AUROC: {avg_auroc:.4f}')


# Trainer

In [8]:
class Trainer():
    def __init__(
        self, 
        model, 
        device, 
        optimizer, 
        criterion,
        epochs,
        loss_meter, 
        fold
    ):
        self.model = model
        self.device = device
        self.optimizer = optimizer
        self.criterion = criterion
        self.loss_meter = loss_meter
        self.hist = {'test_acc':[],
                     'test_f1':[],
                     'test_roc':[],
                     'train_loss':[],
                     'train_acc':[],
                     'train_f1': [],
                     'train_roc': [],
                    }
        
        self.best_valid_score = -np.inf
        self.best_valid_loss = np.inf
        self.best_f_score = 0
        self.n_patience = 0
        self.fold = fold

        self.record = {'test_loss':[],
                    'test_acc':[],
                     'test_f1':[],
                     'test_roc':[],
                     'train_loss':[],
                     'train_acc':[],
                     'train_f1': [],
                     'train_roc': [],
                    }
        
        
    def fit(self, epochs, train_loader, save_path, patience):
        train_time = time.time()
        
        for epoch in range(epochs):
            t = time.time()
            self.model.train()
            train_loss = self.loss_meter()
            train_acc = torchmetrics.Accuracy(task="multiclass", num_classes=2).to(self.device)
            train_f1 = torchmetrics.F1Score(task="multiclass", num_classes=2, average='macro').to(self.device)
            train_auroc = torchmetrics.AUROC(task="multiclass", num_classes=2).to(self.device)
            
            for idx, batch in enumerate(train_loader):
                
                features = batch['X'].to(self.device)
                targets = batch['y'].type(torch.cuda.LongTensor).to(self.device)
                org = batch['org']
                    
                ### FORWARD AND BACK PROP
                logits, probs = self.model(features, org)
                loss = self.criterion(logits, targets)
                
                train_loss.update(loss.detach().item())
                train_acc.update(probs, targets)
                train_f1.update(probs, targets)
                train_auroc.update(probs, targets)
                
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()  
            
                #print(f'Epoch: {epoch+1}/{epochs} | Loss: {loss:.5f} | Accuracy: {train_acc:.4f}% | F1 Score: {train_f1:.4f} | AUROC: {train_roc:.4f} | Time: {int(time.time() - t)}')
            
            _loss = train_loss.avg
            _acc = train_acc.compute
            _f1 = train_f1.compute
            _roc = train_auroc.compute
            
            self.hist['train_loss'].append(_loss)
            self.hist['train_acc'].append(_acc)
            self.hist['train_f1'].append(_f1)
            self.hist['train_auroc'].append(_roc)
            
            print(f'Epoch: {epoch+1}/{epochs} | Loss: {_loss:.5f} | Accuracy: {_acc:.4f}% | F1 Score: {_f1:.4f} | AUROC: {_roc:.4f} | Time: {time.time() - t}')
            
            
        avg_loss = np.mean(self.hist['train_loss'])
        avg_acc = np.mean(self.hist['train_acc'])
        avg_f1 = np.mean(self.hist['train_f1'])
        avg_auroc = np.mean(self.hist['train_auroc'])

        print(f'Epoch Training Time: {(train.time() - start_time)/60} min | Avg Loss: {avg_loss:.5f} | Avg Accuracy: {avg_acc:.4f}% | Avg F1 Score: {avg_f1:.4f} | Avg AUROC: {avg_auroc:.4f}')
        
        
        
        #return avg_loss, avg_acc, avg_f1, avg_auroc
        
        
        #testing------------------
    
    def test(self, test_loader):
        test_time = time.time()
        test_loss = self.loss_meter()
        test_acc = torchmetrics.Accuracy(task="multiclass", num_classes=2)
        test_f1 = torchmetrics.F1Score(task="multiclass", num_classes=2, average='macro')        
        test_auroc = torchmetrics.AUROC(task="multiclass", num_classes=2)
        
        for idx, batch in enumerate(test_loader):
            
            self.model.eval()
            with torch.no_grad():
                        
                for idx, batch in enumerate(test_loader):
                    
                    features = batch['X'].to(self.device)
                    #targets = batch['y'].type(torch.cuda.LongTensor).to(self.device)
                    targets = batch['y'].type(torch.cuda.LongTensor).to(self.device)
                    
                    org = batch['org']
                    
                    logits, probas = self.model(features, org)
                    loss = self.criterion(logits, targets)
                
                    test_loss.update(loss.detach().item())
                    test_acc.update(probs, targets)
                    test_f1.update(probs, targets)
                    test_auroc.update(probs, targets)

                _loss = train_loss.avg
                _acc = train_acc.compute
                _f1 = train_f1.compute
                _roc = train_auroc.compute  

                self.hist['test_loss'].append(_loss)
                self.hist['test_acc'].append(_acc)
                self.hist['test_f1'].append(_f1)
                self.hist['test_auroc'].append(_roc)
                

                #print(f'Total Training Time: {(train.time() - start_time)/60} min | Avg Loss: {avg_loss:.5f} | Avg Accuracy: {avg_acc:.4f}% | Avg F1 Score: {avg_f1:.4f} | Avg AUROC: {avg_auroc:.4f}')


            avg_loss = np.mean(self.hist['test_loss'])
            avg_acc = np.mean(self.hist['test_acc'])
            avg_f1 = np.mean(self.hist['test_f1'])
            avg_auroc = np.mean(self.hist['test_auroc'])

            print(f'Testing Time: {(time.time() - test_time)/60} min | Avg Loss: {avg_loss:.5f} | Avg Accuracy: {avg_acc:.4f}% | Avg F1 Score: {avg_f1:.4f} | Avg AUROC: {avg_auroc:.4f}')
            
        return avg_loss, avg_acc, avg_f1, avg_auroc
                        
                
    def save_model(self, n_epoch, save_path):
            torch.save(
                {
                    "model_state_dict": self.model.state_dict(),
                    "optimizer_state_dict": self.optimizer.state_dict(),
                    "best_valid_score": self.best_valid_score,
                    "best_f1_score": self.best_f_score,
                    "n_epoch": n_epoch,
                },
                save_path,
            )

# Training

In [9]:
def train(path, epochs, n_fold, batch_size, num_worker, device):
    
    fold_acc = []
    fold_loss = []
    fold_auroc = []
    fold_f1 = []

    start_time = time.time()
    for _ in range(n_fold):
        fold = _+1
        folds_xtrain = np.load('./data/folds/xtrain.npy', allow_pickle=True)
        folds_xtest = np.load('./data/folds/xtest.npy', allow_pickle=True)
        folds_ytrain = np.load('./data/folds/ytrain.npy', allow_pickle=True)
        folds_ytest = np.load('./data/folds/ytest.npy', allow_pickle=True)
        
        xtrain = folds_xtrain[_]
        ytrain = folds_ytrain[_]
        xtest = folds_xtest[_]
        ytest = folds_ytest[_]
        
        print('-'*30)
        print(f"Fold {fold}")

        train_set = RSNAdataset(
                        './data/reduced_dataset/',
                        xtrain,  
                        ytrain,
                        n_slices=254,
                        img_size=112,
                        transform=None
                            )
    
        test_set = RSNAdataset(
                        './data/reduced_dataset/',
                        xtest,  
                        ytest,
                        n_slices=254,
                        img_size=112,
                        transform=None
                            )
        
        train_loader = DataLoader(
                    train_set,    
                    batch_size=1,
                    shuffle=True,
                    num_workers=8,
                )
        
        train_loader = DataLoader(
                    train_set,    
                    batch_size=1,
                    shuffle=True,
                    num_workers=8,
                )
            
        model = RACNet(NUM_CLASSES)
        model = model.to(DEVICE)
        optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
        criterion = F.cross_entropy
        
        trainer = Trainer(
            model, 
            device, 
            optimizer, 
            criterion,
            epochs,
            LossMeter, 
            fold
        )
        
        #trainer.fit(epochs,
        #            train_loader,
        #            './checkpoints/f"best-model-{fold}.pth',
        #            5)
                        
        #trainer.plot_loss()
        #trainer.plot_score()
        #trainer.plot_fscore()
                
        #test
        loss, test_acc, test_f1, test_auroc = Trainer.test(test_loader)
        #fold_loss.append(loss)
        #fold_acc.append(test_acc)
        #fold_f1.append(test_f1)
        #fold_auroc.append(test_auroc)    
    
    elapsed_time = time.time() - start_time
    '''wandb.log({
         'Avg Test f1 score': np.mean(test_fscore),
         'Avg Train f1 score': np.mean(f_scores)
         })'''
    print('\nTraining complete in {:.0f}m {:.0f}s'.format(elapsed_time // 60, elapsed_time % 60))
    print('Avg loss {:.5f}'.format(np.mean(losses)))
    print('Avg score {:.5f}'.format(np.mean(scores)))
    print('Avg Train f1_score {:.5f}'.format(np.mean(f_scores)))
    print('Avg Test f1_score {:.5f}'.format(np.mean(test_fscore)))

In [None]:
train(PATH, NUM_EPOCHS, KFOLD, BATCH_SIZE, NUM_WORKERS, DEVICE)

In [4]:
folds_xtrain = np.load('./data/folds/xtrain.npy', allow_pickle=True)
folds_xtest = np.load('./data/folds/xtest.npy', allow_pickle=True)
folds_ytrain = np.load('./data/folds/ytrain.npy', allow_pickle=True)
folds_ytest = np.load('./data/folds/ytest.npy', allow_pickle=True)

xtrain = folds_xtrain[4]
ytrain = folds_ytrain[4]
xtest = folds_xtest[4]
ytest = folds_ytest[4]

print('-'*30)
print(f"Fold {'3'}")

------------------------------
Fold 3


In [5]:
train_retriever = RSNAdataset(
    'data/reduced_dataset/',
    xtrain,  
    ytrain,
    n_slices=254,
    img_size=112,
    transform=None
        )

test_retriever = RSNAdataset(
    'data/reduced_dataset/',
    xtest,  
    ytest,
    n_slices=254,
    img_size=112,
    transform=None
        )

train_loader = DataLoader(
            train_retriever,    
            batch_size=1,
            shuffle=True,
            num_workers=8,
        )

train_loader = DataLoader(
            train_retriever,    
            batch_size=1,
            shuffle=True,
            num_workers=8,
        )

# Checking the dataset
for batch in train_loader:  
    print('Image batch dimensions:', batch['X'].shape)
    print('Image Class dimensions:', batch['y'].shape)
    break

# Training

In [None]:
model = RACNet(NUM_CLASSES)
model = model.to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [None]:
train(PATH, NUM_EPOCH, N_kFOLD, BATCH_SIZE, NUM_WORKERS)

# EVALUATION

In [None]:
with torch.set_grad_enabled(False): # save memory during inference
    print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader, device=DEVICE)))

In [None]:
model = RecNet()
model.to(device='cuda')
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = F.cross_entropy
model.train()

In [None]:
for i, batch in enumerate(train_loader, 1):
    X = batch[0]['X'].to(device='cuda')
    print('train_loader output',X.shape)
    y = batch[0]['y'].to(device='cuda')
    print('train_loader targets',y.shape)
    org = batch[1][0]
    print('train org', org)
    #count += 1
    
    #optimizer.zero_grad()
    outputs = model(X, org).squeeze(1)
    break
    #print('prob outputs', outputs.shape)
    
    #loss = criterion(outputs, y)
    #loss.backward()

    #train_loss.update(loss.detach().item())
    #train_score.update(targets, outputs.detach())
    
    #self.optimizer.step()
    
    #_loss, _score = train_loss.avg, train_score.avg
    #message = 'Train Step {}/{}, train_loss: {:.5f}, train_score: {:.5f}, train_f1: {:.5f}'
    #self.info_message(message, step, len(train_loader), _loss, _score, ff, end="\r")

    #f_score = ff_score.get_score()
    #return train_loss.avg, train_score.avg, f_score, int(time.time() - t)
    

In [6]:
dict, org = train_retriever[0]

In [7]:
batch = dict['X']
targets = dict['y']

In [8]:
batch.shape
#targets.shape


torch.Size([254, 1, 112, 112])

In [9]:
targets

tensor(1.)

In [10]:
org

[33]

In [None]:
X = batch.to(device='cuda')
X = X.unsqueeze(0)
print(X.shape)
y = targets.to(device='cuda')
print(y.shape)

#self.optimizer.zero_grad()
outputs = model(X, org).squeeze()
#outputs = outputs.squeeze()
print(outputs.shape)
print(outputs)

In [None]:
timm.list_models('*convnext*', pretrained=True)

In [None]:
res

In [26]:
res = timm.create_model('resnet50', pretrained=True, num_classes=0, in_chans=1)
#m = res(torch.randn(2, 3, 224, 224))
res.reset_classifier(0)
o = res(torch.randn(2, 1, 112, 112))
print(f'Pooled shape: {o.shape[1]}')
print(res.fc)
in_features = res(torch.randn(2, 1, 112, 112)).shape[1]
print(in_features)

Pooled shape: 2048
Identity()
2048


In [21]:
images = [torch.tensor(frame, dtype=torch.float32) for frame in images]

In [23]:
images = torch.stack(images)

In [24]:
images.shape

torch.Size([254, 112, 112])

In [None]:
%reload_ext watermark
%watermark -a 'Karanjot Vendal' -v -p torch --iversion