In [4]:
import pandas as pd
import numpy as np
import os

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
import glob

import albumentations as A
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset

from albumentations.pytorch import ToTensorV2
from torch.optim import lr_scheduler
import re
import time
import cv2

import monai
from tqdm import tqdm

import torchmetrics

# hyperparameters

In [5]:
NUM_IMAGES_3D = 64
TRAINING_BATCH_SIZE = 2
TEST_BATCH_SIZE = 2
IMAGE_SIZE = 112
N_EPOCHS = 2
do_valid = True
n_workers = 0
type_ = "FLAIR"
device = "cuda"

In [6]:
def load_image(path, size=(IMAGE_SIZE, IMAGE_SIZE)):
    image = cv2.imread(path, 0)
    if image is None:
        return np.zeros(IMAGE_SIZE)
    
    image = cv2.resize(image, size) / 255
    return image.astype('f')

# dataset

In [7]:
class BrainRSNADataset(Dataset):
    def __init__(
        self, patient_path, paths, targets, transform=None, mri_type="FLAIR", is_train=True, ds_type="forgot", do_load=True
    ):
        
        self.patient_path = patient_path
        self.paths = paths   
        self.targets = targets
        self.type = mri_type

        self.transform = transform
        self.is_train = is_train
        self.folder = "train" if self.is_train else "test"
        self.do_load = do_load
        self.ds_type = ds_type        
        
        '''self.target = target
        self.data = data
        self.type = mri_type

        self.transform = transform
        self.is_train = is_train
        self.folder = "train" if self.is_train else "test" '''

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, index):
        _id = self.paths[index]
        target = self.targets[index]
        _3d_images = self.load_images_3d(_id)
        _3d_images = torch.tensor(_3d_images).float()
        if self.is_train:
            return {"image": _3d_images, "target": target}
        else:
            return {"image": _3d_images, "target": target}
        
        
        
        '''row = self.data.loc[index]
        case_id = int(row.BraTS21ID)
        target = int(row[self.target])
        _3d_images = self.load_dicom_images_3d(case_id)
        _3d_images = torch.tensor(_3d_images).float()
        if self.is_train:
            return {"image": _3d_images, "target": target}
        else:
            return {"image": _3d_images, "case_id": case_id}'''


    def load_images_3d(
        self,
        case_id,
        num_imgs=NUM_IMAGES_3D,
        img_size=IMAGE_SIZE,
        rotate=0,
    ):
        case_id = str(case_id).zfill(5)

        path = f"./input/reduced_dataset/{case_id}/{self.type}/*.png"
        files = sorted(
            glob.glob(path),
            key=lambda var: [
                int(x) if x.isdigit() else x for x in re.findall(r"[^0-9]|[0-9]+", var)
            ],
        )

        middle = len(files) // 2
        if len(files) <= 64:
            image_stack = [load_image(f) for f in files]
        else:
            p1 = middle - 32 #max(0, middle - num_imgs2)
            p2 = middle + 32 #min(len(files), middle + num_imgs2)
            image_stack = [load_image(f) for f in files[p1:p2]]
            
            
            
        '''num_imgs2 = num_imgs // 2
        p1 = max(0, middle - num_imgs2)
        p2 = min(len(files), middle + num_imgs2)
        image_stack = [load_dicom_image(f, rotate=rotate) for f in files[p1:p2]]'''
        
        img3d = np.stack(image_stack).T
        if img3d.shape[-1] < num_imgs:
            n_zero = np.zeros((img_size, img_size, num_imgs - img3d.shape[-1]))
            img3d = np.concatenate((img3d, n_zero), axis=-1)

        if np.min(img3d) < np.max(img3d):
            img3d = img3d - np.min(img3d)
            img3d = img3d / np.max(img3d)

        return np.expand_dims(img3d, 0)

# training

In [9]:
mod = ['FLAIR', 'T1w', 'T1wCE', 'T2w']

dlt = []
empty_fld = [109, 123, 709]
df = pd.read_csv("./input/train_labels.csv")
skf = StratifiedKFold(n_splits=2)
X = df['BraTS21ID'].values
Y = df['MGMT_value'].values

for i in empty_fld:
    j = np.where(X == i)
    dlt.append(j)
    X = np.delete(X, j)
    
Y = np.delete(Y,dlt)

for m in mod:
    '''wandb.init(
    project="Kaggle-LB-1 sanity check",
    notes = "running sanity check",
    name=f"experiment_{m}", 
    config={
    "learning_rate": 0.0001,
    "architecture": "monai resnet10",
    "dataset": "MICAA MRI",
    "epochs": N_EPOCHS,
    "Batch size": TRAINING_BATCH_SIZE
    })'''
    
    print("--------------------deleted the empty patient data--------------------------")
    
    
    for fold, (train_idx, test_idx) in enumerate(skf.split(np.zeros(len(Y)), Y), 1):  
    
        losses = []
        train_f_score = []
        test_fscore = []
        start_time = time.time()
        
        xtrain = X[train_idx]
        ytrain = Y[train_idx]
        xtest = X[test_idx]
        ytest = Y[test_idx]
            
        print(f"train_{m}_{fold}")
        
        train_dataset = BrainRSNADataset(
                                        patient_path='./input/reduced_dataset/',
                                        paths=xtrain, 
                                        targets= ytrain,
                                        mri_type=m,
                                        ds_type=f"train_{m}_{fold}"
                                        )
    
        valid_dataset = BrainRSNADataset(
                                        patient_path='./input/reduced_dataset/',
                                        paths=xtest,
                                        targets=ytest,
                                        mri_type=m,
                                        is_train=False,
                                        ds_type=f"val_{m}_{fold}"
                                        )
        #print("----------------------created dataset successfully-------------------------------")
    
        train_dl = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=TRAINING_BATCH_SIZE,
            shuffle=True,
            num_workers=n_workers,
            drop_last=False,
            pin_memory=True,
        )
    
        validation_dl = torch.utils.data.DataLoader(
            valid_dataset,
            batch_size=TEST_BATCH_SIZE,
            shuffle=False,
            num_workers=n_workers,
            pin_memory=True,
        )
        
        
        #print("-----------------loaded the dataloader successfully---------------------------")
    
        # Checking the dataset
        '''for batch in train_dl:  
            print('Image batch dimensions:', batch['image'].shape)
            print('Image Class dimensions:', batch['target'].shape)
            break'''
    
    
        model = monai.networks.nets.resnet10(spatial_dims=3, n_input_channels=1, num_classes=1)
        #print("--------------------------loaded the model succeffully-------------------------")
        optimizer = optim.Adam(model.parameters(), lr=0.0001)
    
        scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[10], gamma=0.5, last_epoch=-1, verbose=True)
    
        model.zero_grad()
        model.to(device)
        best_loss = 9999
        best_auc = 0
        criterion = nn.BCEWithLogitsLoss()
        #print("-------------------------------starting the trianing loop-------------------------")
        train_acc = torchmetrics.Accuracy(task="binary")
        train_f1 = torchmetrics.F1Score(task="binary")
        
        test_acc = torchmetrics.Accuracy(task="binary")
        test_f1 = torchmetrics.F1Score(task="binary")     
        
        for counter in range(N_EPOCHS):
    
            #epoch_iterator_train = tqdm(train_dl)
            tr_loss = 0.0
            preds = []
            true_labels = []
            #case_ids = []
            for step, batch in enumerate(train_dl):
                model.train()
                images, targets = batch["image"].to(device), batch["target"].to(device)
                #print("feature shape:", images.shape)
                #print("targets shape:", targets.shape)
    
                outputs = model(images)
                targets = targets  # .view(-1, 1)
                loss = criterion(outputs.squeeze(1), targets.float())
    
                #print("output shape:", outputs.shape)
                #print("targtes shape", targets.shape)
    
                loss.backward()
                optimizer.step()
                model.zero_grad()
                optimizer.zero_grad()
    
                tr_loss += loss.item()
                
                preds.append(outputs.sigmoid().detach().cpu().numpy())
                true_labels.append(targets.cpu().numpy())
                #case_ids.append(batch["case_id"])
            
            preds = np.vstack(preds).T[0].tolist()
            true_labels = np.hstack(true_labels).tolist()
            #case_ids = np.hstack(case_ids).tolist()
            #print("preds shape:", len(preds))
            #print("true_labels shape:", len(true_labels))
            #print("preds:", preds)
            #print("true labels:", true_labels)
            _auc_score = roc_auc_score(true_labels, preds)
            _acc = train_acc(torch.tensor(preds), torch.tensor(true_labels))
            _f1 = train_f1(torch.tensor(preds),torch.tensor(true_labels))
    
            
            '''wandb.log({
                'train loss': tr_loss / (step+1),
                'train AUC': _auc_score, 
                'train F1 score': _f1.item(),
                'train acc': _acc.item()
            })'''
    
            print(
                f"Train EPOCH {counter+1}/{N_EPOCHS}: average loss: {tr_loss/(step+1)}, Acc: {_acc}, F1 score: {_f1}  AUC SCORE = {_auc_score}"
            )
    
            scheduler.step()  # Update learning rate schedule
    
            if do_valid:
                with torch.no_grad():
                    val_loss = 0.0
                    preds = []
                    true_labels = []
                    #case_ids = []
                    #epoch_iterator_val = tqdm(validation_dl)
                    for step, batch in enumerate(validation_dl):
                        model.eval()
                        images, targets = batch["image"].to(device), batch["target"].to(device)
    
                        #print("test features images:", images.shape)
                        #print("test targets:", targets.shape)
    
                        outputs = model(images)
                        targets = targets  # .view(-1, 1)
                        #print("test outputs:", outputs.shape)
                        #print("test targets:", targets.shape)
                        loss = criterion(outputs.squeeze(1), targets.float())
                        val_loss += loss.item()
                        preds.append(outputs.sigmoid().detach().cpu().numpy())
                        true_labels.append(targets.cpu().numpy())
                        #case_ids.append(batch["case_id"])
                preds = np.vstack(preds).T[0].tolist()
                true_labels = np.hstack(true_labels).tolist()
                #case_ids = np.hstack(case_ids).tolist()
                auc_score = roc_auc_score(true_labels, preds)
                acc_ = test_acc(torch.tensor(preds), torch.tensor(true_labels))
                f1_ = test_f1(torch.tensor(preds),torch.tensor(true_labels))
            
            '''wandb.log({
                'test loss': val_loss / (step+1),
                'test AUC': auc_score, 
                'test F1 score': f1_.item(),
                'test acc': acc_.item()
            })'''

            train_acc.reset()
            train_f1.reset()
            test_acc.reset()
            test_f1.reset()
            
            auc_score_adj_best = 0
            for thresh in np.linspace(0, 1, 50):
                auc_score_adj = roc_auc_score(true_labels, list(np.array(preds) > thresh))
                if auc_score_adj > auc_score_adj_best:
                    best_thresh = thresh
                    auc_score_adj_best = auc_score_adj
    
            print(
                f"Test EPOCH {counter+1}/{N_EPOCHS}: Validation average loss: {val_loss/(step+1)}, Accuracy: {acc_} F1 score: {f1_}, + AUC SCORE = {auc_score} + AUC SCORE THRESH {best_thresh} = {auc_score_adj_best}"
            )
            
            if auc_score > best_auc:
                print("Saving the model...")
    
                all_files = os.listdir("weights/")
    
                for f in all_files:
                    if f"resnet10_{m}_fold{fold}" in f:
                        os.remove(f"weights/{f}")
    
                best_auc = auc_score
                torch.save(
                    model.state_dict(),
                    f"weights/resnet10_{m}_fold{fold}.pth",
                )
    
    elapsed_time = time.time() - start_time
    #wandb.finish()
    print("best auc:", best_auc)
    print('\nTraining complete in {:.0f}m {:.0f}s'.format(elapsed_time // 60, elapsed_time % 60))


--------------------deleted the empty patient data--------------------------
train_FLAIR_1
Adjusting learning rate of group 0 to 1.0000e-04.
Train EPOCH 18/2: average loss: 0.7257766565435553, Acc: 0.5292096138000488, F1 score: 0.5861027240753174  AUC SCORE = 0.5460053010223401
Adjusting learning rate of group 0 to 1.0000e-04.
Test EPOCH 1/2: Validation average loss: 0.707089439034462, Accuracy: 0.5085910558700562 F1 score: 0.3914893567562103, + AUC SCORE = 0.5373212086767074 + AUC SCORE THRESH 0.5714285714285714 = 0.5498721227621485
Saving the model...
Train EPOCH 19/2: average loss: 0.7037133703084841, Acc: 0.5292096138000488, F1 score: 0.5910447835922241  AUC SCORE = 0.5333207118515714
Adjusting learning rate of group 0 to 1.0000e-04.
Test EPOCH 2/2: Validation average loss: 0.7183867561286443, Accuracy: 0.5326460599899292 F1 score: 0.4285714328289032, + AUC SCORE = 0.5515297906602254 + AUC SCORE THRESH 0.7551020408163265 = 0.5592497868712702
Saving the model...
train_FLAIR_2
Adjust

KeyboardInterrupt: 

In [3]:

!python ./working/train.py


[34m[1mwandb[0m: Currently logged in as: [33mkaranjot[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/karanjotvendal/.netrc
[34m[1mwandb[0m: wandb version 0.15.8 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
[34m[1mwandb[0m: Tracking run with wandb version 0.15.7
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/home/karanjotvendal/karanjot/thesis/RSNA/3 Establishing baseline result/1/rsna-resnet10/wandb/run-20230820_191228-fhb71alu[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mexperiment_FLAIR[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/karanjot/Kaggle_LB-1%20sanity%20check[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/karanjot/Kaggle_LB-1%20sanity%20check/runs/fhb71alu[0m
-----------------train_FLAIR_1-------------------
Adjusting