In [8]:
import pandas as pd
import numpy as np
import os

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
import glob

import albumentations as A
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset

from albumentations.pytorch import ToTensorV2
from torch.optim import lr_scheduler
import re
import time
import cv2

import monai
from tqdm import tqdm

# hyperparameters

In [6]:
NUM_IMAGES_3D = 64
TRAINING_BATCH_SIZE = 2
TEST_BATCH_SIZE = 2
IMAGE_SIZE = 112
N_EPOCHS = 2
do_valid = True
n_workers = 0
type_ = "FLAIR"
device = "cuda"

In [3]:
def load_image(path, size=(IMAGE_SIZE, IMAGE_SIZE)):
    image = cv2.imread(path, 0)
    if image is None:
        return np.zeros(IMAGE_SIZE)
    
    image = cv2.resize(image, size) / 255
    return image.astype('f')

# dataset

In [4]:
class BrainRSNADataset(Dataset):
    def __init__(
        self, patient_path, paths, targets, transform=None, mri_type="FLAIR", is_train=True, ds_type="forgot", do_load=True
    ):
        
        self.patient_path = patient_path
        self.paths = paths   
        self.targets = targets
        self.type = mri_type

        self.transform = transform
        self.is_train = is_train
        self.folder = "train" if self.is_train else "test"
        self.do_load = do_load
        self.ds_type = ds_type        
        
        '''self.target = target
        self.data = data
        self.type = mri_type

        self.transform = transform
        self.is_train = is_train
        self.folder = "train" if self.is_train else "test" '''

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, index):
        _id = self.paths[index]
        target = self.targets[index]
        _3d_images = self.load_images_3d(_id)
        _3d_images = torch.tensor(_3d_images).float()
        if self.is_train:
            return {"image": _3d_images, "target": target}
        else:
            return {"image": _3d_images, "target": target}
        
        
        
        '''row = self.data.loc[index]
        case_id = int(row.BraTS21ID)
        target = int(row[self.target])
        _3d_images = self.load_dicom_images_3d(case_id)
        _3d_images = torch.tensor(_3d_images).float()
        if self.is_train:
            return {"image": _3d_images, "target": target}
        else:
            return {"image": _3d_images, "case_id": case_id}'''


    def load_images_3d(
        self,
        case_id,
        num_imgs=NUM_IMAGES_3D,
        img_size=IMAGE_SIZE,
        rotate=0,
    ):
        case_id = str(case_id).zfill(5)

        path = f"./input/reduced_dataset/{case_id}/{self.type}/*.png"
        files = sorted(
            glob.glob(path),
            key=lambda var: [
                int(x) if x.isdigit() else x for x in re.findall(r"[^0-9]|[0-9]+", var)
            ],
        )

        middle = len(files) // 2
        if len(files) <= 64:
            image_stack = [load_image(f) for f in files]
        else:
            p1 = middle - 32 #max(0, middle - num_imgs2)
            p2 = middle + 32 #min(len(files), middle + num_imgs2)
            image_stack = [load_image(f) for f in files[p1:p2]]
            
            
            
        '''num_imgs2 = num_imgs // 2
        p1 = max(0, middle - num_imgs2)
        p2 = min(len(files), middle + num_imgs2)
        image_stack = [load_dicom_image(f, rotate=rotate) for f in files[p1:p2]]'''
        
        img3d = np.stack(image_stack).T
        if img3d.shape[-1] < num_imgs:
            n_zero = np.zeros((img_size, img_size, num_imgs - img3d.shape[-1]))
            img3d = np.concatenate((img3d, n_zero), axis=-1)

        if np.min(img3d) < np.max(img3d):
            img3d = img3d - np.min(img3d)
            img3d = img3d / np.max(img3d)

        return np.expand_dims(img3d, 0)

In [41]:
ls ./input/

[0m[01;34mfolds[0m/  [01;34mreduced_dataset[0m/  train_labels.csv


In [42]:
dlt = []
empty_fld = [109, 123, 709]
df = pd.read_csv("./input/train_labels.csv")
skf = StratifiedKFold(n_splits=10)
X = df['BraTS21ID'].values
Y = df['MGMT_value'].values

for i in empty_fld:
    j = np.where(X == i)
    dlt.append(j)
    X = np.delete(X, j)
    
Y = np.delete(Y,dlt)
print("--------------------deleted the empty patient data--------------------------")

m = "FLAIR"
for fold, (train_idx, test_idx) in enumerate(skf.split(np.zeros(len(Y)), Y), 1):  

    losses = []
    train_f_score = []
    test_fscore = []
    start_time = time.time
    
    xtrain = X[train_idx]
    ytrain = Y[train_idx]
    xtest = X[test_idx]
    ytest = Y[test_idx]
        
    print(f"train_{m}_{fold}")
    
    train_dataset = BrainRSNADataset(
                                    patient_path='./input/reduced_dataset/',
                                    paths=xtrain, 
                                    targets= ytrain,
                                    mri_type=m,
                                    ds_type=f"train_{m}_{fold}"
                                    )

    valid_dataset = BrainRSNADataset(
                                    patient_path='./input/reduced_dataset/',
                                    paths=xtest,
                                    targets=ytest,
                                    mri_type=m,
                                    is_train=False,
                                    ds_type=f"val_{m}_{fold}"
                                    )
    break

--------------------deleted the empty patient data--------------------------
train_FLAIR_1


In [48]:
train_dataset[1]['image'].shape

torch.Size([1, 112, 112, 64])

In [49]:
train_dataset[1]['target']

1

# training

In [1]:
'''mod = ['FLAIR', 'T1w', 'T1wCE', 'T2w']

for m in mod:
    wandb.init(
    project="try runs 1", 
    name=f"experiment_{m}", 
    config={
    "learning_rate": 0.0001,
    "architecture": "ResNet10",
    "dataset": "MICAA MRI",
    "epochs": N_EPOCHS,
    "Batch size": TRAINING_BATCH_SIZE
    })'''


dlt = []
empty_fld = [109, 123, 709]
df = pd.read_csv("./input/train_labels.csv")
skf = StratifiedKFold(n_splits=10)
X = df['BraTS21ID'].values
Y = df['MGMT_value'].values

for i in empty_fld:
    j = np.where(X == i)
    dlt.append(j)
    X = np.delete(X, j)
    
Y = np.delete(Y,dlt)
print("--------------------deleted the empty patient data--------------------------")
m="FLAIR"
for fold, (train_idx, test_idx) in enumerate(skf.split(np.zeros(len(Y)), Y), 1):  

    losses = []
    train_f_score = []
    test_fscore = []
    start_time = time.time
    
    xtrain = X[train_idx]
    ytrain = Y[train_idx]
    xtest = X[test_idx]
    ytest = Y[test_idx]
        
    print(f"train_{m}_{fold}")
    
    train_dataset = BrainRSNADataset(
                                    patient_path='./input/reduced_dataset/',
                                    paths=xtrain, 
                                    targets= ytrain,
                                    mri_type=m,
                                    ds_type=f"train_{m}_{fold}"
                                    )

    valid_dataset = BrainRSNADataset(
                                    patient_path='./input/reduced_dataset/',
                                    paths=xtest,
                                    targets=ytest,
                                    mri_type=m,
                                    is_train=False,
                                    ds_type=f"val_{m}_{fold}"
                                    )
    print("----------------------created dataset successfully-------------------------------")

    train_dl = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=TRAINING_BATCH_SIZE,
        shuffle=True,
        num_workers=n_workers,
        drop_last=False,
        pin_memory=True,
    )

    validation_dl = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=TEST_BATCH_SIZE,
        shuffle=False,
        num_workers=n_workers,
        pin_memory=True,
    )
    
    
    print("-----------------loaded the dataloader successfully---------------------------")

    # Checking the dataset
    for batch in train_dl:  
        print('Image batch dimensions:', batch['image'].shape)
        print('Image Class dimensions:', batch['target'].shape)
        break


    model = monai.networks.nets.resnet10(spatial_dims=3, n_input_channels=1, num_classes=1)
    print("--------------------------loaded the model succeffully-------------------------")
    optimizer = optim.Adam(model.parameters(), lr=0.0001)

    scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[10], gamma=0.5, last_epoch=-1, verbose=True)

    model.zero_grad()
    model.to(device)
    best_loss = 9999
    best_auc = 0
    criterion = nn.BCEWithLogitsLoss()
    print("-------------------------------starting the trianing loop-------------------------")
    for counter in range(N_EPOCHS):

        epoch_iterator_train = tqdm(train_dl)
        tr_loss = 0.0
        preds = []
        true_labels = []
        #case_ids = []
        for step, batch in enumerate(epoch_iterator_train):
            model.train()
            images, targets = batch["image"].to(device), batch["target"].to(device)
            print("feature shape:", images.shape)
            print("targets shape:", targets.shape)

            outputs = model(images)
            targets = targets  # .view(-1, 1)
            loss = criterion(outputs.squeeze(1), targets.float())

            print("output shape:", outputs.shape)
            print("targtes shape", targets.shape)

            loss.backward()
            optimizer.step()
            model.zero_grad()
            optimizer.zero_grad()

            tr_loss += loss.item()
            epoch_iterator_train.set_postfix(
                batch_loss=(loss.item()), loss=(tr_loss / (step + 1))
            )

            preds.append(outputs.sigmoid().detach().cpu().numpy())
            true_labels.append(targets.cpu().numpy())
            #case_ids.append(batch["case_id"])
        
        preds = np.vstack(preds).T[0].tolist()
        true_labels = np.hstack(true_labels).tolist()
        #case_ids = np.hstack(case_ids).tolist()
        print("preds shape:", preds.shape)
        pritn("true_labels shape:", true_labels.shape)
        
        auc_score = roc_auc_score(true_labels, preds)
        
        '''wandb.log({
            'train loss': tr_loss / (step+1),
            'Train AUC': auc_score, 
            'Train F1 score': f1_score
        })'''

        auc_score_adj_best = 0
        for thresh in np.linspace(0, 1, 50):
            auc_score_adj = roc_auc_score(true_labels, list(np.array(preds) > thresh))
            if auc_score_adj > auc_score_adj_best:
                best_thresh = thresh
                auc_score_adj_best = auc_score_adj

        print(
            f"EPOCH {counter}/{N_EPOCHS}: Train average loss: {tr_loss/(step+1)} +  AUC SCORE = {auc_score} + AUC SCORE THRESH {best_thresh} = {auc_score_adj_best}"
        )

        if auc_score > best_auc:
            print("Saving the model...")

            all_files = os.listdir("../weights/checkpoints/")

            for f in all_files:
                if f"resnet10_{mod}_fold{fold}" in f:
                    os.remove(f"../weights/checkpoints/{f}")

            best_auc = auc_score
            torch.save(
                model.state_dict(),
                f"../weights/chechpoints/resnet10_{mod}_fold{fold}.pth",
            )

        scheduler.step()  # Update learning rate schedule

        if config.do_valid:
            model.load_state_dict(torch.load(f"../weights/checkpoints/resnet10_{m}_fold{fold}"))           
            with torch.no_grad():
                val_loss = 0.0
                preds = []
                true_labels = []
                #case_ids = []
                epoch_iterator_val = tqdm(validation_dl)
                for step, batch in enumerate(epoch_iterator_val):
                    model.eval()
                    images, targets = batch["image"].to(device), batch["target"].to(device)

                    outputs = model(images)
                    targets = targets  # .view(-1, 1)
                    loss = criterion(outputs.squeeze(1), targets.float())
                    val_loss += loss.item()
                    epoch_iterator_val.set_postfix(
                        batch_loss=(loss.item()), loss=(val_loss / (step + 1))
                    )
                    preds.append(outputs.sigmoid().detach().cpu().numpy())
                    true_labels.append(targets.cpu().numpy())
                    case_ids.append(batch["case_id"])
            preds = np.vstack(preds).T[0].tolist()
            true_labels = np.hstack(true_labels).tolist()
            case_ids = np.hstack(case_ids).tolist()
            auc_score = roc_auc_score(true_labels, preds)
            
        '''wandb.log({'Test AUC': auc_score,
                    'Test F1 score': f1_score})'''
        
        auc_score_adj_best = 0
        for thresh in np.linspace(0, 1, 50):
            auc_score_adj = roc_auc_score(true_labels, list(np.array(preds) > thresh))
            if auc_score_adj > auc_score_adj_best:
                best_thresh = thresh
                auc_score_adj_best = auc_score_adj

        print(
            f"EPOCH {counter}/{config.N_EPOCHS}: Validation average loss: {val_loss/(step+1)} + AUC SCORE = {auc_score} + AUC SCORE THRESH {best_thresh} = {auc_score_adj_best}"
        )
        
        if auc_score > best_auc:
            print("Saving the model...")

            all_files = os.listdir("../weights/checkpoints/")

            for f in all_files:
                if f"resnet10_{mod}_fold{fold}" in f:
                    os.remove(f"../weights/checkpoints/{f}")

            best_auc = auc_score
            torch.save(
                model.state_dict(),
                f"../weights/chechpoints/resnet10_{mod}_fold{fold}.pth",
            )

elapsed_time = time.time() - start_time

'''wandb.log({
    'Avg Train loss': np.mean(losses),
    'Avg Train F1 Score': np.mean(train_f_score),
    'Avg Test F1 Score': np.mean(test_fscore)
})'''

print("best auc:", best_auc)
print('\nTraining complete in {:.0f}m {:.0f}s'.format(elapsed_time // 60, elapsed_time % 60))
#print('Avg loss {:.5f}'.format(np.mean(losses)))
#print('Avg Train f1_score {:.5f}'.format(np.mean(train_f_score)))
#print('Avg Test f1_score {:.5f}'.format(np.mean(test_fscore)))

NameError: name 'pd' is not defined

In [None]:
fold_files

In [None]:
sample = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv")

In [None]:
tta_true_labels = []
tta_preds = []
test_dataset = BrainRSNADataset(data=sample, mri_type=type_, is_train=False)
test_dl = torch.utils.data.DataLoader(
        test_dataset, batch_size=8, shuffle=False, num_workers=4
    )

preds_f = np.zeros(len(sample))
for fold in range(5):
    image_ids = []
    model.load_state_dict(torch.load(f"../input/resnet10rsna/{fold_files[fold]}"))
    preds = []
    epoch_iterator_test = tqdm(test_dl)
    with torch.no_grad():
        for  step, batch in enumerate(epoch_iterator_test):
            model.eval()
            images = batch["image"].to(device)

            outputs = model(images)
            preds.append(outputs.sigmoid().detach().cpu().numpy())
            image_ids.append(batch["case_id"].detach().cpu().numpy())
    

    preds_f += np.vstack(preds).T[0]/5

    ids_f = np.hstack(image_ids)

In [None]:
sample["BraTS21ID"] = ids_f
sample["MGMT_value"] = preds_f

In [None]:
sample = sample.sort_values(by="BraTS21ID").reset_index(drop=True)

In [None]:
sample.to_csv("submission.csv", index=False)

In [None]:
sample