In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

%pip install monai===0.7.0

%pip install gdown==3.6.4
%pip install segmentation_models_pytorch==0.2.0

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.
import os


Collecting monai===0.7.0
  Downloading monai-0.7.0-202109240007-py3-none-any.whl.metadata (6.1 kB)
Downloading monai-0.7.0-202109240007-py3-none-any.whl (650 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m650.2/650.2 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: monai
Successfully installed monai-0.7.0
Note: you may need to restart the kernel to use updated packages.
Collecting gdown==3.6.4
  Downloading gdown-3.6.4.tar.gz (5.2 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: gdown
  Building wheel for gdown (setup.py) ... [?25ldone
[?25h  Created wheel for gdown: filename=gdown-3.6.4-py3-none-any.whl size=6109 sha256=019562342a774887a94efdded90aac785a826b1534e363bc7aceb1d2386511cb
  Stored in directory: /root/.cache/pip/wheels/73/66/77/99342322fafc3a20e3a83cef3733f122d8a3d2d4be2fa61514
Successfully built gdown
Installing collected packages: gdown
Successfully installe

# Instalize lib

In [None]:
# # import tarfile
# # file = tarfile.open('../input/brats-2021-task1/BraTS2021_Training_Data.tar')

# # file.extractall('./TrainingData')
# # file.close()
# import shutil
# try:
#     shutil.rmtree("/kaggle/working/TrainingData")
# except FileNotFoundError:
#     print("The directory does not exist.")
# except OSError as e:
#     print(f"Error: {e}")

# Segementation

In [None]:
import os
import json

import logging
import pandas as pd 
import numpy as np
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm


from monai.data import DataLoader, ImageDataset

from monai.transforms import (
    AddChannel,
    Compose,
    Resize,
    Transform
)


from sklearn.model_selection import GroupKFold

import torch.nn.functional as F

from multiprocessing import Pool


SETTINGS = {
    "DICOM_DATA_DIR":"/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification",
    "TASK1_DIR":"/kaggle/working/TrainingData", 
    "CLASSIFICATION_RAW_JPG":"/kaggle/input/miccaibraintumorjpgdata",
    "SEGMENT_DATA_DIR":"data/processed_segmentation_data", 
    "CLASSIFICATION_DATA_DIR":"data/processed_classification_data",
    "KFOLD_PATH":"/kaggle/input/sub-file-for-tumor/data/train_stratifiedgroupkfold.csv", 
    "SEGMENT_MODEL_DIR":"/kaggle/working/models/densenet121_2d_segment",
    "CLASSIFICATION_MODEL_DIR":"models/eca_nfnet_l0_2d_classification",
    "TEMP_DATA_DIR":"temp",
    "TEST_PREDICTION_FILE":"data/test_prediction.csv"
}

IM_FOLDER_TASK1 = SETTINGS['TASK1_DIR']

RUN_FOLDS = [0]
KFOLD_PATH = SETTINGS['KFOLD_PATH']

SEED = 67
N_PROCESSES = 4

OUT_FOLDER = SETTINGS['SEGMENT_DATA_DIR']

PLANES = ['sagital', 'coronal', 'axial']
MRI_TYPES = ['t1', 't1ce', 't2', 'flair']

import tarfile




# ============ Helper functions ===========
class ScaleRange(Transform):
    def __init__(self, new_max = 255.0):
        super(ScaleRange, self).__init__()
        self.new_max = new_max
        
    def __call__(self, data):
        dmin, dmax = data.min(), data.max()
        return (data - dmin) / (dmax-dmin) * self.new_max

class ConvertToMultiChannelBasedOnBratsClasses(Transform):
    """
    Convert labels to multi channels based on brats classes:
    label 2 is the peritumoral edema
    label 4 is the GD-enhancing tumor
    label 1 is the necrotic and non-enhancing tumor core
    The possible classes are TC (Tumor core), WT (Whole tumor)
    and ET (Enhancing tumor).
    Ehancing Tumor (ET) = enhancing tumor
    Tumor Core (TC) = enhancing tumor + necrotic
    Whole Tumor (WT) = enhancing tumor + necrotic + edema    
    """

    def __call__(self, masks):
        '''This time we only use 2 label: 0 - WT and 1 - ET'''
        result = []

        # merge labels 1, 2 and 4 to construct WT
        result.append(
            np.logical_or(
                np.logical_or(masks == 1, masks == 2), masks == 4
            )
        )
        # label 4 is ET
        result.append(masks == 4)
        
        return np.stack(result, axis=0).astype(np.float32)

def get_non_0_voxels_and_masks(voxels, masks_2channels, ax=0, min_avg=0.01):
    '''Get non-empty slices from the 3D mask
        A 2D slice is considered to be empty if its mean pixel value < min_avg'''
    masks = np.logical_or(masks_2channels[0], masks_2channels[1])
    remain_axes = tuple([i for i in range(len(voxels.shape)) if i != ax])
    ax_mean = masks.mean(axis=remain_axes)
    ax_non_0_inds = ax_mean > min_avg
    if(ax==0):
        return voxels[ax_non_0_inds], masks_2channels[:, ax_non_0_inds, :, :]
    if(ax==1):
        return voxels[:,ax_non_0_inds,:], masks_2channels[:, :, ax_non_0_inds,:]
    if(ax==2):
        return voxels[:,:,ax_non_0_inds], masks_2channels[:,:,:,ax_non_0_inds]
    
def sampling_slices(non_0_voxels, non_0_masks, ax=0, keep_rate=0.1):
    '''Nearby slices are similar to each other, we use sample to only get the different ones'''
    total_slices = non_0_voxels.shape[ax]
    T = max(round(total_slices * keep_rate), 1)
    sampling_inds = np.arange(0, total_slices, T)
    
    if(ax==0):
        return non_0_voxels[sampling_inds], non_0_masks[:, sampling_inds, :, :]
    if(ax==1):
        return non_0_voxels[:, sampling_inds, :], non_0_masks[:, :, sampling_inds, :]
    if(ax==2):
        return non_0_voxels[:, :, sampling_inds], non_0_masks[:, :, :, sampling_inds]
    
    
def process_one_patient(voxels, masks, patient_id):
    '''Perform slicing 2D images and tumor masks for this patient'''
    current_list_patient_id = []
    current_list_plane = []
    current_list_mri_type = []
    current_list_slice_index = []
    current_list_file_path = []
    current_list_segfile_path = []
    
    for ax, plane in enumerate(PLANES):
        non_0_voxels, non_0_masks = get_non_0_voxels_and_masks(voxels, masks, ax=ax)
        if(non_0_voxels.shape[ax]==0):
            print(f'Cannot get any slice in patient: {patient_id}, plane: {plane} due to the masks are too small')
            continue
        sampled_non_0_voxels, sampled_non_0_masks = sampling_slices(non_0_voxels, non_0_masks, ax=ax)

        for j in range(sampled_non_0_voxels.shape[ax]):
            file_path = os.path.join(OUT_FOLDER + '/2D_slice_data/', 
                                     f'BraTS2021_{patient_id:05d}',
                                     f'BraTS2021_{patient_id:05d}_{mri_type}',
                                    f'BraTS2021_{patient_id:05d}_{mri_type}_{plane}_{j:03d}')
            seg_file_path = os.path.join(OUT_FOLDER + '/2D_slice_data/', 
                                     f'BraTS2021_{patient_id:05d}',
                                    f'BraTS2021_{patient_id:05d}_segmask',
                                    f'BraTS2021_{patient_id:05d}_segmask_{plane}_{j:03d}')

            os.makedirs(os.path.dirname(file_path), exist_ok=True)
            os.makedirs(os.path.dirname(seg_file_path), exist_ok=True)
            
            if(ax==0):
                np.save(file_path, sampled_non_0_voxels[j])
                np.save(seg_file_path, sampled_non_0_masks[:,j])
            elif(ax==1):
                np.save(file_path, sampled_non_0_voxels[:,j,:])
                np.save(seg_file_path, sampled_non_0_masks[:,:,j,:])
            elif(ax==2):
                np.save(file_path, sampled_non_0_voxels[:,:,j])
                np.save(seg_file_path, sampled_non_0_masks[:,:,:,j])
            else:
                raise ValueError('No such ax')

            current_list_patient_id.append(patient_id)
            current_list_plane.append(plane)
            current_list_mri_type.append(mri_type)
            current_list_slice_index.append(j)
            current_list_file_path.append(file_path)
            current_list_segfile_path.append(seg_file_path)

    return current_list_patient_id, current_list_plane, current_list_mri_type,  \
            current_list_slice_index, current_list_file_path, current_list_segfile_path


def update(args):
    global list_patient_id, list_plane, list_mri_type, list_slice_index, list_file_path, list_segfile_path
    pbar.update()
    current_list_patient_id, current_list_plane, current_list_mri_type,  \
            current_list_slice_index, current_list_file_path, current_list_segfile_path = args
    
    list_patient_id += current_list_patient_id
    list_plane += current_list_plane
    list_mri_type += current_list_mri_type
    list_slice_index += current_list_slice_index
    list_file_path += current_list_file_path
    list_segfile_path += current_list_segfile_path


def error(e):
    print(e)
        
# =========================================

# ============ Read meta data =============
fold_df = pd.read_csv(KFOLD_PATH)
fold_df['pfolder'] = fold_df.BraTS21ID.map(lambda x: f'BraTS2021_{x:05d}')

PATIENT_DIRS = []
for p in os.listdir(IM_FOLDER_TASK1):
    try:
       
        int(p.split('_')[-1])
        PATIENT_DIRS.append(p)
    except:
        print('Non patient dir:', p)

df = pd.DataFrame(PATIENT_DIRS, columns=['pfolder'])


df['BraTS21ID'] = df['pfolder'].map(lambda x: int(x.split('_')[-1]))
df = df.dropna()

df = df[~df.BraTS21ID.isin(fold_df.BraTS21ID.tolist())]

for t in MRI_TYPES:
    df[f'{t}_data_path'] = df.pfolder.map(lambda x: os.path.join(IM_FOLDER_TASK1, x, x+f'_{t}.nii.gz'))
df['seg_label_path'] = df.pfolder.map(lambda x: os.path.join(IM_FOLDER_TASK1, x, x+f'_seg.nii.gz'))

# =========================================


# ============ Create a nii gz file loader ==========
transforms = Compose([ScaleRange()])

seg_transforms = Compose([ConvertToMultiChannelBasedOnBratsClasses(),
                         ])

mri_type = MRI_TYPES[0]
# Define nifti dataset, data loader
dataset = ImageDataset(image_files=df[f'{mri_type}_data_path'].tolist(),
                             seg_files = df.seg_label_path.tolist(),
                             seg_transform=seg_transforms,
                            transform=transforms
                      )
# =====================================================



# ========== Perform slicing data and mask ============

for mri_type in MRI_TYPES:
    dataset = ImageDataset(image_files=df[f'{mri_type}_data_path'].tolist(),
                                 seg_files = df.seg_label_path.tolist(),
                                   labels = df['BraTS21ID'].tolist(),
                                 seg_transform=seg_transforms,
                                transform=transforms
                          )
    
    os.makedirs(OUT_FOLDER + '/2D_slice_data/', exist_ok=True)

    list_patient_id = []
    list_plane = []
    list_mri_type = []
    list_slice_index = []
    list_file_path = []
    list_segfile_path = []

    pool = Pool(processes=N_PROCESSES)   

    iterations = range(len(dataset))
    pbar = tqdm(iterations)

    for i in iterations:
        voxels, masks, patient_id = dataset[i]
        pool.apply_async(
            process_one_patient,
            args=(voxels, masks, patient_id),
            callback=update,
            error_callback=error,
        )

    pool.close()
    pool.join()
    pbar.close()
    
out_df = pd.DataFrame({
    'BraTS21ID':list_patient_id,
    'mri_type':list_mri_type,
    'plane':list_plane,
    'slice_index':list_slice_index,
    'file_path':list_file_path,
    'segfile_path':list_segfile_path
})

out_df.to_csv(os.path.join(OUT_FOLDER, 'segment_meta.csv'))



  

In [None]:
import numpy as np
import matplotlib.pyplot as plt

img=np.load("/kaggle/input/processed-segementation/processed_segmentation_data/2D_slice_data/BraTS2021_00000/BraTS2021_00000_segmask/BraTS2021_00000_segmask_axial_000.npy")
mask=np.load("/kaggle/input/processed-segementation/processed_segmentation_data/2D_slice_data/BraTS2021_00000/BraTS2021_00000_t1/BraTS2021_00000_t1_axial_000.npy")
print(img.shape)
print(mask.shape)

plt.figure()
plt.imshow(mask)
plt.imshow(img[1,:,:], cmap="gray", alpha=0.5)



# Models

In [None]:

os.makedirs('models/densenet121_2d_segment', exist_ok=True)
url = 'https://drive.google.com/uc?id=12EVeyHI_kQlryAp6554Au4S1pt1ektnY'
output = 'models/densenet121_2d_segment/Fold0_densenet121_2d_segment.pth'
gdown.download(url, output, quiet=False)



# Utility

In [None]:
import pandas as pd
import numpy as np
import json
import re
import torch
import random
import sys
import os
import matplotlib.pyplot as plt
import logging

from tqdm import tqdm
from copy import deepcopy
from typing import List

def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


def get_logger(name, path, mode='a'):
    logger = logging.getLogger(name)  

    if not logger.hasHandlers():
        # set log level
        logger.setLevel(logging.INFO)

        # define file handler and set formatter
        file_handler = logging.FileHandler(path, mode=mode)
        formatter    = logging.Formatter('%(asctime)s : %(levelname)s : %(name)s : %(message)s')
        file_handler.setFormatter(formatter)

        # add file handler to logger
        logger.addHandler(file_handler)
    
    return logger
    
    
def log_and_print(logger, obj):
    print(obj)
    logger.info(obj)    
    
def init_progress_dict(metrics):
    progress_dict = dict()
    for metric in metrics:
        progress_dict[f'train_{metric}'] = []
        progress_dict[f'valid_{metric}'] = []
    return progress_dict

def log_to_progress_dict(progress_dict, metric_dict):
    for k, v in metric_dict.items():
        progress_dict[k].append(v)
       
    return progress_dict

def save_progress(progress_dict, out_folder, out_folder_name, fold, show=False):
    metric_names = list(progress_dict.keys())
    epochs = len(progress_dict[metric_names[0]])+1
    
    # plot figure and save the progress chart
    n_cols = 4
    n_rows = int(np.ceil(len(metric_names) / 2 / n_cols))
    
    plt.figure(figsize=(7*n_cols, 7*n_rows))
    
    for i in range(0, len(metric_names), 2):
        plt.subplot(n_rows,n_cols,int(i/2+1))

        plt.plot(range(1, epochs), progress_dict[metric_names[i]])
        plt.plot(range(1, epochs), progress_dict[metric_names[i+1]])
        plt.legend([metric_names[i], metric_names[i+1]])
        plt.xlabel('Epoch')
        plt.title(f'{metric_names[i]} and {metric_names[i+1]}')

    save_name = f'training_progress_{out_folder_name}_fold{fold}'
    plt.savefig(os.path.join(out_folder, save_name+'.jpg'))

    if(show):
        plt.show()

    pd.DataFrame({'epoch':range(1, epochs), **progress_dict}).to_csv(os.path.join(out_folder, save_name+'.csv'), index=False)

    
def check_mem(cuda_device):
    devices_info = os.popen('"/usr/bin/nvidia-smi" --query-gpu=memory.total,memory.used --format=csv,nounits,noheader').read().strip().split("\n")
    total, used = devices_info[int(cuda_device)].split(',')
    print(used)
    return total,used

def occumpy_mem(cuda_device):
    total, used = check_mem(cuda_device)
    total = int(total)
    used = int(used)
    max_mem = int(total * 0.9)
    block_mem = max_mem - used
    x = torch.cuda.FloatTensor(256,1024,block_mem)
    del x

# Training

In [None]:

import os
import logging
import pandas as pd 
import numpy as np
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import torch
from torch import nn

from torch.optim import AdamW
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingWarmRestarts, CosineAnnealingLR

import torch.nn.functional as F

import json
import gc
from sklearn.metrics import roc_auc_score, accuracy_score

from segmentation_models_pytorch.unetplusplus.model import UnetPlusPlus
from segmentation_models_pytorch.losses import DiceLoss
from segmentation_models_pytorch.utils.metrics import IoU

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import argparse

# parser = argparse.ArgumentParser(description='Insert some arguments')
# parser.add_argument('--gpu', type=int,
#                     help='GPU ID', default=0)
# parser.add_argument('--batch_size', type=int,
#                     help='Batch size', default=128)
# parser.add_argument('--n_workers', type=int,
#                     help='Number of parrallel workers', default=8)
# args = parser.parse_args()
argv = sys.argv[1:]  # Exclude the first argument (script name)
if '-f' in argv:
    argv = argv[:argv.index('-f')]  # Ignore everything after '-f'

# Set up the parser
parser = argparse.ArgumentParser(description='Insert some arguments')
parser.add_argument('--gpu', type=int, help='GPU ID', default=0)
parser.add_argument('--batch_size', type=int, help='Batch size', default=128)
parser.add_argument('--n_workers', type=int, help='Number of parallel workers', default=8)

# Parse the cleaned arguments
args = parser.parse_args(argv)


# with open('SETTINGS.json', 'r') as f:
SETTINGS = {
    "DICOM_DATA_DIR":"/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification",
    "TASK1_DIR":"/kaggle/working/TrainingData", 
    "CLASSIFICATION_RAW_JPG":"/kaggle/input/miccaibraintumorjpgdata",
    "SEGMENT_DATA_DIR":"/kaggle/input/segmentaed-csv", 
    "CLASSIFICATION_DATA_DIR":"data/processed_classification_data",
    "KFOLD_PATH":"/kaggle/input/sub-file-for-tumor/data/train_stratifiedgroupkfold.csv", 
    "SEGMENT_MODEL_DIR":"/kaggle/working/models/densenet121_2d_segment",
    "CLASSIFICATION_MODEL_DIR":"models/eca_nfnet_l0_2d_classification",
    "TEMP_DATA_DIR":"temp",
    "TEST_PREDICTION_FILE":"data/test_prediction.csv"
        }

FOLDER = SETTINGS['SEGMENT_DATA_DIR']
META_FILE_PATH = os.path.join(SETTINGS['SEGMENT_DATA_DIR'], 'segment_meta_groupkfold.csv')

RUN_FOLDS = [0]
SEED = 67
DIM = (128,128, 3)
# N_WORKERS = args.n_workers
BATCH_SIZE = 5
BASE_LR = 1e-2
NUM_EPOCHS = 100
PATIENT = 10
SAMPLE = None
DEVICE = torch.device(f'cuda:{args.gpu}')

PARENT_OUT_FOLDER = f'models/'    

CANDIDATES = [
    {
        'backbone_name':'densenet121',
        'ver_note':'2d_segment',
        'backbone_pretrained':None,
        'batch_size':BATCH_SIZE,
        'warm_up_epochs':5,
    },
]

import sys
# from utils.general import seed_torch, init_progress_dict, log_to_progress_dict, save_progress, log_and_print, get_logger

# seed every thing
seed_torch(SEED)

# ================= Some helper functions ====================
class BrainSegment2DDataset(torch.utils.data.Dataset):
    
    def __init__(self, csv, transforms=None):
        self.csv = csv.reset_index(drop=True)
        self.augmentations = transforms

    def __len__(self):
        return self.csv.shape[0]

    def __getitem__(self, index):
        row = self.csv.iloc[index]
        image = np.load(row['file_path']+'.npy')
        image = np.stack([image]*3, axis=-1)
        mask = np.load(row['segfile_path']+'.npy')
        mask = np.stack([mask[0], mask[1]], axis=-1)
        
        if self.augmentations:
            augmented = self.augmentations(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']
            mask = mask.permute(2,0,1)
        
        return image, mask
    

def get_train_transforms(candidate):
    dim = candidate.get('dim', DIM)
    return A.Compose(
        [
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.ShiftScaleRotate(p=0.5),
     
            A.Resize(width=dim[1], height=dim[0], always_apply=True),
            A.Normalize(),
            ToTensorV2(p=1.0)
        ]
    )

def get_valid_transforms(candidate):
    dim = candidate.get('dim', DIM)
    return A.Compose(
        [
            A.Resize(width=dim[1], height=dim[0], always_apply=True),
            A.Normalize(),
            ToTensorV2(p=1.0)
        ]
    )

def get_model(candidate):
    model = UnetPlusPlus(
        encoder_name = candidate['backbone_name'],
        encoder_depth = 5,
        encoder_weights = None,
        classes = 2,
        activation = 'sigmoid',
    )

    weight_path = candidate.get('backbone_pretrained')
    if(weight_path is not None):
        print('Load pretrained:', weight_path)
        model.load_state_dict(torch.load(weight_path, map_location='cpu'))
        
    return model


class AverageMeter(object):
    def __init__(self):
        self.reset()
    
    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
    
    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        
def train_valid_fn(dataloader,model,criterion,iou_metric,optimizer=None,device='cuda:0',
                            scheduler=None,epoch=0, mode='train', scaler=None):
    '''Perform model training'''
    if(mode=='train'):
        model.train()
    elif(mode=='valid'):
        model.eval()
    else:
        raise ValueError('No such mode')
        
    loss_score = AverageMeter()
    iou_score = AverageMeter()
    
    tk0 = tqdm(enumerate(dataloader), total=len(dataloader))
    for i, batch in tk0:
        if(mode=='train'):
            optimizer.zero_grad()
            
        # input, gt
        images, gt_masks = batch
        images = images.to(DEVICE)
        gt_masks = gt_masks.to(DEVICE)

        with torch.cuda.amp.autocast():
            # prediction
            pred_masks = model(images)

            # compute loss
            loss = criterion(y_true=gt_masks, y_pred=pred_masks)
            
            # compute metric
            iou = iou_metric(pred_masks, gt_masks)
        
        if(mode=='train'):
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        
        loss_score.update(loss.detach().cpu().item(), dataloader.batch_size)
        iou_score.update(iou.detach().cpu().item(), dataloader.batch_size)
        
        if(mode=='train'):
            tk0.set_postfix(Loss_Train=loss_score.avg, IOU_Train=iou_score.avg, 
                            Epoch=epoch, LR=optimizer.param_groups[0]['lr'])
        elif(mode=='valid'):
            tk0.set_postfix(Loss_Valid=loss_score.avg, IOU_Valid=iou_score.avg, Epoch=epoch)
        
        del batch, images, gt_masks, pred_masks, loss, iou
        torch.cuda.empty_cache()
        
    if(mode=='train'):
        if(scheduler.__class__.__name__ == 'CosineAnnealingWarmRestarts'):
            scheduler.step(epoch=epoch)
        elif(scheduler.__class__.__name__ == 'ReduceLROnPlateau'):
            scheduler.step(loss_score.avg)
    
    return loss_score.avg, iou_score.avg

def dfs_freeze(module):
    for name, child in module.named_children():
        for param in child.parameters():
            param.requires_grad = False
        dfs_freeze(child)
        
def dfs_unfreeze(module):
    for name, child in module.named_children():
        for param in child.parameters():
            param.requires_grad = True
        dfs_unfreeze(child)
# ===========================================================
  
# ===========================================================
        
    
# ================ Read metadata =================
df = pd.read_csv(META_FILE_PATH)
# ================================================


# ============================ Training ==============================
for candidate in CANDIDATES:
    print(f"######################### Candidate: {candidate['backbone_name']} ############################")
    run_folds = candidate.get('run_folds', RUN_FOLDS)
    
    parent_out_folder = candidate.get('parent_out_folder', PARENT_OUT_FOLDER)
    ver_note = candidate['ver_note']
    out_folder_name = f"{candidate['backbone_name']}_{ver_note}"
    out_folder = os.path.join(parent_out_folder, out_folder_name)

    os.makedirs(out_folder, exist_ok=True)
    
    for valid_fold in run_folds:
        # Read data
        if(SAMPLE):
            df = df.sample(SAMPLE, random_state=SEED)

        train_df = df[df.fold!=valid_fold]
        valid_df = df[df.fold==valid_fold]

        print(f'\n\n================= Fold {valid_fold} ==================')
        print(f'Number of training images: {len(train_df)}. Number of valid images: {len(valid_df)}')
        print("filepath",len(valid_df))
        
        train_dataset = BrainSegment2DDataset(train_df, get_train_transforms(candidate))
        valid_dataset = BrainSegment2DDataset(valid_df, get_valid_transforms(candidate))
        
        batch_size = candidate.get('batch_size', BATCH_SIZE)
        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
        valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
        
        # model
        model = get_model(candidate)
        # freeze layer
        dfs_freeze(model.encoder)
        print(' -------- Start warm up process ----------')
        print('Freeze encoder')
        model.to(DEVICE)
        print()
        
        # Optimizer and scheduler
        base_lr = candidate.get('base_lr', BASE_LR)
        optim = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=BASE_LR)

        num_training_steps = NUM_EPOCHS * len(train_loader)
        lr_scheduler = ReduceLROnPlateau(optimizer=optim)

        # loss
        criterion = DiceLoss(mode='binary', from_logits=False)
        iou_metric = IoU()

        # use amp to accelerate training
        scaler = torch.cuda.amp.GradScaler()

        # Logging
        logger = get_logger(
            name = f'training_log_fold{valid_fold}.txt',
            path=os.path.join(out_folder, f'training_log_fold{valid_fold}.txt')
        )

        best_valid_loss = 9999
        best_valid_ep = 0
        patient = PATIENT

        progress_dict = init_progress_dict(['loss', 'IOU'])

        start_ep = candidate.get('warm_start_ep', 1)
        print('Start ep:', start_ep)

        # warm up epochs
        warm_up_epochs = candidate.get('warm_up_epochs', 1)

        
        for epoch in range(start_ep, NUM_EPOCHS+1):
            if (epoch==warm_up_epochs+1):
                print(' -------- Finish warm up process ----------')
                print('Unfreeze encoder')
                dfs_unfreeze(model.encoder)
                optim = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=BASE_LR)
                lr_scheduler = ReduceLROnPlateau(optimizer=optim)
                
            # =============== Training ==============
            train_loss, train_iou = train_valid_fn(train_loader,model, criterion, iou_metric,
                                                    optimizer=optim,device=DEVICE,
                                                    scheduler=lr_scheduler,epoch=epoch,mode='train',
                                                  scaler=scaler)
            valid_loss, valid_iou = train_valid_fn(valid_loader,model, criterion, iou_metric,
                                                     device=DEVICE, 
                                                     epoch=epoch,mode='valid',
                                                  scaler=scaler)

            current_lr = optim.param_groups[0]['lr']
            log_line = f'Model: {out_folder_name}. Epoch: {epoch}. '
            log_line += f'Train loss:{train_loss} - Valid loss: {valid_loss}. '
            log_line += f'Train IOU:{train_iou} - Valid IOU: {valid_iou}. '
            log_line += f'Lr: {current_lr}.'

            log_and_print(logger, log_line)

            metric_dict = {'train_loss':train_loss,'valid_loss':valid_loss,
                           'train_IOU':train_iou, 'valid_IOU':valid_iou,
                       }

            progress_dict = log_to_progress_dict(progress_dict, metric_dict)

            # plot figure and save the progress chart
            save_progress(progress_dict, out_folder, out_folder_name, valid_fold, show=False)

            if(valid_loss < best_valid_loss):
                best_valid_loss = valid_loss
                best_valid_ep = epoch
                patient = PATIENT # reset patient

                # save model
                name = os.path.join(out_folder, 'Fold%d_%s.pth'%(valid_fold, 
                                                                 out_folder_name, 
                                                                ))
                log_and_print(logger, 'Saving model to: ' + name)
                torch.save(model.state_dict(), name)
            else:
                patient -= 1
                log_and_print(logger, 'Decrease early-stopping patient by 1 due valid loss not decreasing. Patient='+ str(patient))

            if(patient == 0):
                log_and_print(logger, 'Early stopping patient = 0. Early stop')
                break

# ======================================================================




# Load Model and Predict

In [3]:
import torch
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
from segmentation_models_pytorch.unetplusplus.model import UnetPlusPlus
import cv2



def get_seg_model(candidate,weight_path):
    model = UnetPlusPlus(
        encoder_name = candidate['backbone_name'],
        encoder_depth = 5,
        encoder_weights = None,
        classes = 2,
        activation = 'sigmoid',
    )
#     print(weight_path)
    
    model.load_state_dict(torch.load(weight_path, map_location='cpu'))
        
    return model

SEG_MODEL = {
        'backbone_name':'densenet121',
        'pretranied_weight':'/kaggle/input/models-for-segementation/Fold0_densenet121_2d_segment (4).pth'

    }

seg_model = get_seg_model(SEG_MODEL,SEG_MODEL['pretranied_weight'])

# print(SEG_MODEL['pretranied_weight'])
seg_model.eval()




  model.load_state_dict(torch.load(weight_path, map_location='cpu'))


UnetPlusPlus(
  (encoder): DenseNetEncoder(
    (features): Sequential(
      (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu0): ReLU(inplace=True)
      (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (denseblock1): _DenseBlock(
        (denselayer1): _DenseLayer(
          (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu1): ReLU(inplace=True)
          (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu2): ReLU(inplace=True)
          (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        )
        (denselayer2): _DenseLayer(
          (norm1): BatchNorm2d(96, eps=1e-05, 

In [None]:


# 3. Define the Preprocessing Pipeline
transform = transforms.Compose([
#     transforms.Resize((224, 224)),  # Adjust size as required
    transforms.ToTensor(),          # Convert to tensor
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalize (if needed)
])

# 4. Load and Prepare the Input Data (e.g., Image)
image_path = "/kaggle/input/processed-segementation/processed_segmentation_data/2D_slice_data/BraTS2021_00003/BraTS2021_00003_t1/BraTS2021_00003_t1_axial_000.npy"


def preprocess_numpy_image(np_image):
    """Preprocess the NumPy image to match the model's input."""
    # Assume the input NumPy array has shape (H, W, C)
    if np_image.ndim == 3:
        np_image = np_image.transpose(2, 0, 1)  # Convert to (C, H, W)
    elif np_image.ndim == 2:
        np_image = np.expand_dims(np_image, axis=0)  # For grayscale (1, H, W)

    # Convert to float32 and normalize (if needed)
    np_image = np_image.astype(np.float32) / 255.0  # Scale to [0, 1]

    # Normalize: Adjust values based on ImageNet stats (for DenseNet)
    mean = np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
    std = np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
    np_image = (np_image - mean) / std

    # Convert to tensor and add batch dimension
    tensor_image = torch.tensor(np_image, dtype=torch.float32).unsqueeze(0)  # Shape: (1, C, H, W)
    print(tensor_image.shape)
    return tensor_image

# 4. Example NumPy Image
image=np.load(image_path)

image_resized = cv2.resize(image, (128,128))

input_tensor = preprocess_numpy_image(image_resized)


# 5. Make Predictions
# with torch.no_grad():
output = seg_model(input_tensor)


In [1]:
# predicted = torch.argmax(output, dim=1).squeeze(0).numpy()  # Shape: (H, W)
# print(f"Predicted Mask Shape: {predicted.shape}")
# plt.imshow(predicted, cmap="gray")
img=np.load("/kaggle/input/processed-segementation/processed_segmentation_data/2D_slice_data/BraTS2021_00003/BraTS2021_00003_t1/BraTS2021_00003_t1_axial_000.npy")
plt.imshow(img)
plt.title("Predicted Mask")
# plt.figure()

plt.imshow(output[0,1,:,:].detach().numpy(), cmap="gray", alpha=0.45)
plt.imshow(output[0,0,:,:].detach().numpy(), cmap="jet", alpha=0.15)
plt.figure()

mask=np.load("/kaggle/input/processed-segementation/processed_segmentation_data/2D_slice_data/BraTS2021_00003/BraTS2021_00003_segmask/BraTS2021_00003_segmask_axial_000.npy")
# mask.resize(256,256)
# print(mask.shape)

plt.figure()
plt.imshow(img)
plt.imshow(mask[:,:], cmap="gray", alpha=0.5)
plt.title("Ground Truth")



NameError: name 'np' is not defined

# Classification

In [None]:
# import pandas as pd

# import numpy as np
# import matplotlib.pyplot as plt
# import cv2
# import torch
# import json
# import os
# import shutil
# from tqdm import tqdm

# import pydicom
# import glob
# import sys
# import argparse
# import time

# from multiprocessing import Pool

# from segmentation_models_pytorch.unetplusplus.model import UnetPlusPlus
# from segmentation_models_pytorch.losses import DiceLoss
# from segmentation_models_pytorch.utils.metrics import IoU


# argv = sys.argv[1:]  # Exclude the first argument (script name)
# if '-f' in argv:
#     argv = argv[:argv.index('-f')]  # Ignore everything after '-f'

# # Set up the parser
# parser = argparse.ArgumentParser(description='Insert some arguments')
# parser.add_argument('--gpu', type=int, help='GPU ID', default=0)
# parser.add_argument('--batch_size', type=int, help='Batch size', default=128)
# parser.add_argument('--n_workers', type=int, help='Number of parallel workers', default=8)

# # Parse the cleaned arguments
# args = parser.parse_args(argv)


# # with open('SETTINGS.json', 'r') as f:
# SETTINGS = {
#     "DICOM_DATA_DIR":"/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification",
#     "TASK1_DIR":"/kaggle/working/TrainingData", 
#     "CLASSIFICATION_RAW_JPG":"/kaggle/input/miccaibraintumorjpgdata/data",
#     "SEGMENT_DATA_DIR":"/kaggle/input/segmentaed-csv", 
#     "CLASSIFICATION_DATA_DIR":"/kaggle/working/classification/data/",
#     "KFOLD_PATH":"/kaggle/input/sub-file-for-tumor/data/train_stratifiedgroupkfold.csv", 
#     "SEGMENT_MODEL_DIR":"/kaggle/working/models/densenet121_2d_segment",
#     "CLASSIFICATION_MODEL_DIR":"models/eca_nfnet_l0_2d_classification",
#     "TEMP_DATA_DIR":"temp",
#     "TEST_PREDICTION_FILE":"data/test_prediction.csv"
#         }

# IM_FOLDER = SETTINGS['CLASSIFICATION_RAW_JPG']
# OUT_FOLDER = SETTINGS['CLASSIFICATION_DATA_DIR']
# SEGMENT_MODEL_DIR = '/kaggle/input/models-for-segementation/Fold0_densenet121_2d_segment (4).pth'

# DEVICE = torch.device(f'cuda:{args.gpu}')

# MRI_TYPES = ['T1w']

# DIM = (224,224,3)
# SEG_BATCH_SIZE = 32
# N_WORKERS = 4

# CANDIDATES = [
#     {
#         'backbone_name':'densenet121',
# 'model_path':f'{SEGMENT_MODEL_DIR}'
#     },
# ]

# # =============== Some helper functions ================

# def get_model(candidate):
#     model = UnetPlusPlus(
#         encoder_name = candidate['backbone_name'],
#         encoder_depth = 5,
#         encoder_weights = None,
#         classes = 2,
#         activation = 'sigmoid',
#     )

#     weight_path = candidate.get('pretrained_weight')
#     if(weight_path is not None):
#         model.load_state_dict(torch.load(weight_path, map_location='cpu'))
        
#     return model

# import albumentations as A
# from albumentations.pytorch.transforms import ToTensorV2

# def get_transform(candidate, spatial_only=False):
#     dim = candidate.get('dim', DIM)
#     list_trans = [
#                 A.Resize(width=int(dim[1]*1.2), height=int(dim[0]*1.2), always_apply=True),
#                 A.CenterCrop(width=dim[1], height=dim[0], always_apply=True),
#                 A.Normalize(), 
#                 ToTensorV2(p=1.0)
#     ]
#     return A.Compose(list_trans)

# def get_inv_transform(original_w, original_h, candidate):
#     dim = candidate.get('dim', DIM)
#     list_trans = [
#                 A.PadIfNeeded(min_height=int(dim[1]*1.2), min_width=int(dim[1]*1.2), always_apply=True),
#                 A.Resize(width=original_w, height=original_h, always_apply=True),
#     ]
#     return A.Compose(list_trans)

# def normalize_voxels(voxels):
#     _min = voxels.min()
#     _max = voxels.max()
#     new_voxels = (voxels - _min) / (_max-_min) * 255.0
#     return new_voxels

# def check_empty(img, min_avg=0.1):
#     _mean = np.where(img>0, 1, 0).mean()
#     if(_mean > min_avg):
#         return True
#     return False


# def find_largest_countours(contours):
#     max_cnt = max(contours, key=lambda cnt: cv2.contourArea(cnt))
#     return max_cnt

# def has_good_features(image, mask, area_mask_over_image_min_ratio=0.1, max_count_mask_contours=5):
#     _, image_thresh = cv2.threshold(image,1,255,cv2.THRESH_BINARY)
#     image_contours, _ = cv2.findContours(image=image_thresh, mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_NONE)
#     max_image_cnt = find_largest_countours(image_contours)
    
#     _, mask_thresh = cv2.threshold(mask,0.5,1,cv2.THRESH_BINARY)
#     mask_contours, _ = cv2.findContours(image=mask_thresh, mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_NONE)
#     count_n_mask_contours = len(mask_contours)
#     if(count_n_mask_contours == 0):
#         return False
#     max_mask_cnt = find_largest_countours(mask_contours)
    
#     area_mask_over_image_ratio = cv2.contourArea(max_mask_cnt) / cv2.contourArea(max_image_cnt)
    
#     if(area_mask_over_image_ratio > area_mask_over_image_min_ratio \
#        and count_n_mask_contours <= max_count_mask_contours):
#         return True
#     else:
#         return False
    
# def batch_predict_mask(data_loader, model):
#     batch_out = []
#     for batch_input in data_loader:
#         batch_input = batch_input.to(DEVICE)

#         batch_out.append(model(batch_input).cpu().detach().numpy())
        
#     batch_out = np.concatenate(batch_out, axis=0)
#     batch_out = (batch_out > 0.5).astype('uint8')
    
#     del batch_input
#     torch.cuda.empty_cache()
    
#     return batch_out


# class BrainSegmentationInferDataset(torch.utils.data.Dataset):
    
#     def __init__(self, all_mri_voxels, transforms):
#         self.all_mri_voxels = all_mri_voxels
#         self.augmentations = transforms

#     def __len__(self):
#         return len(self.all_mri_voxels)

#     def __getitem__(self, index):
#         image = self.all_mri_voxels[index]
#         image = np.stack([image]*3, axis=-1)
        
#         if self.augmentations:
#             augmented = self.augmentations(image=image)
#             image = augmented['image']

#         return image

# def error(e):
#     print(e)
    
# def read_and_preprocess_voxels_update(args):
#     if(args!=[]):
#         voxels, mri_type, images = args
#         global all_transformed_images, corresponding_mri_types, all_images
#         all_transformed_images += [image for image in voxels]
#         corresponding_mri_types += [mri_type]*len(voxels)
#         all_images += images

# def read_and_preprocess_voxels(patient_id, mri_type):
#     paths = glob.glob(os.path.join(IM_FOLDER, patient_id, mri_type, '*.jpg'))
#     paths = sorted(paths, key=lambda x: int(x.replace('.jpg','').split("-")[-1]))
#     positions = []
#     images = []

#     for path in paths:
#     #     print(path)
#     #     img = pydicom.dcmread(str(dcm_path))
#     #     img = img.pixel_array
#         img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
#         if(check_empty(img)):
#             images.append(img)

#     if(len(images) == 0):
#         print("Found no images in case (patient_id, mri, path):", patient_id, mri_type, paths)
#         return []

#     voxels = np.array(images)
#     voxels = normalize_voxels(voxels)  # normalize voxels to range(0,255)
# #     print(len(voxels))
#     return voxels, mri_type, images
        
    
# def sampling_one_image(patient_id, slice_index, image, out, mri_type):

#     mask_0, mask_1 = out[0], out[1]
#     inv_transforms = get_inv_transform(image.shape[1], image.shape[0], candidate)
#     mask_0_original_size = inv_transforms(image=mask_0)['image']
#     mask_1_original_size = inv_transforms(image=mask_1)['image']

#     current_image_has_good_features = has_good_features(image, mask_0_original_size, 
#                                                         area_mask_over_image_min_ratio=0.025)

#     if(not current_image_has_good_features):
#         return None

#     file_path = os.path.join(OUT_FOLDER + '/2D_slice_data/', 
#                                  f'BraTS2021_{patient_id}',
#                                  f'BraTS2021_{patient_id}_{mri_type}',
#                                 f'BraTS2021_{patient_id}_{mri_type}_{slice_index:03d}')
#     os.makedirs(os.path.dirname(file_path), exist_ok=True)

#     mask_0_original_size *= 255  # convert to 255 scale
#     mask_1_original_size *= 255
#     _3channel_data = np.stack([image, mask_0_original_size, mask_1_original_size], axis=-1)

#     np.save(file_path, _3channel_data)

#     return int(patient_id), mri_type, slice_index, file_path+'.npy'


# def sampling_one_image_update(args):
#     global list_patient_id, list_mri_type, list_slice_index, list_file_path
#     if(args is not None):
#         patient_id, mri_type, slice_index, file_path = args
#         list_patient_id.append(patient_id)
#         list_mri_type.append(mri_type)
#         list_slice_index.append(slice_index)
#         list_file_path.append(file_path)
# # =======================================================        
        
    
# # =============== Generate masks combined with image ==============

# #     shutil.rmtree(OUT_FOLDER)  # REMOVE EXISTING DIR. BECARE FULL USING THIS        
               
# candidate = CANDIDATES[0]
# model = get_model(candidate)
# model.load_state_dict(torch.load(candidate['model_path'], map_location='cpu'))
# model.to(DEVICE)

# model.eval()
# print()        
        
# list_patient_id = []
# list_slice_index = []
# list_mri_type = []
# list_file_path = []

# os.makedirs(OUT_FOLDER, exist_ok=True)
# for pi, patient_id in tqdm(enumerate(os.listdir(IM_FOLDER))):
#     fol="BraTS2021_"+patient_id
#     if(os.path.exists(os.path.join(OUT_FOLDER,"2D_slice_data",fol))):
#         continue
#     folders=os.listdir(os.path.join(IM_FOLDER,patient_id))
# #     print(folders)
#     if "T1w" not in folders:
#        continue
#     s1 = time.time()
    
#     all_transformed_images = []
#     corresponding_mri_types = []
#     all_images = []
    
#     pool = Pool(processes=N_WORKERS)   

#     for mri_type in MRI_TYPES:
#         pool.apply_async(
#             read_and_preprocess_voxels,
#             args=(patient_id, mri_type),
#             callback=read_and_preprocess_voxels_update,
#             error_callback=error,
#         )

#     pool.close()
#     pool.join()    
            
#     e1 = time.time()
    
#     s2 = time.time()
    
    
    
    
    
    
    
    
#     transform = get_transform(candidate)  # transform for segmentation input
#     seg_infer_ds = BrainSegmentationInferDataset(all_transformed_images, transform)
#     seg_infer_loader = torch.utils.data.DataLoader(seg_infer_ds, batch_size=SEG_BATCH_SIZE, shuffle=False,
#                         num_workers=N_WORKERS, pin_memory=torch.cuda.is_available())
   
#     batch_out = batch_predict_mask(seg_infer_loader, model)
    
#     e2 = time.time()
    
#     s3 = time.time()

#     # sampling slices by mask area
#     pool = Pool(processes=N_WORKERS)   
    
#     for i in range(len(all_images)):
#         image = all_images[i]
#         out = batch_out[i]
#         mri_type = corresponding_mri_types[i]
        
#         pool.apply_async(
#             sampling_one_image,
#             args=(patient_id, i, image, out, mri_type),
#             callback=sampling_one_image_update,
#             error_callback=error,
#         )
        
#     pool.close()
#     pool.join()   
    
#     del batch_out
#     torch.cuda.empty_cache()
        
#     e3 = time.time()

# #     print(f'Patial time: read time: {e1-s1}. mask pred time: {e2-s2}. sampling time: {e3-s3}')
        
# out_df = pd.DataFrame({
#     'BraTS21ID':list_patient_id,
#     'mri_type':list_mri_type,
#     'slice_index':list_slice_index,
#     'file_path':list_file_path,
# })

# out_df.to_csv(os.path.join(OUT_FOLDER, 'meta_classification.csv'), index=False)

In [None]:
# import shutil

# # Path to the folder you want to zip (inside your Kaggle environment)
# folder_to_zip = "/kaggle/working/classification/data"  # Replace 'my_folder' with your folder name

# # Output path for the zip file
# zip_file_path = "/kaggle/working/my_folder.zip"

# # Create a zip archive of the folder
# shutil.make_archive(zip_file_path.replace('.zip', ''), 'zip', folder_to_zip)

# print(f"Folder zipped successfully at: {zip_file_path}")


In [None]:
import os

import logging
import pandas as pd 
import numpy as np
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from torch import nn

from torch.optim import AdamW
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingWarmRestarts, CosineAnnealingLR

import torch.nn.functional as F

from segmentation_models_pytorch.unetplusplus.model import UnetPlusPlus
from segmentation_models_pytorch.losses import DiceLoss
from segmentation_models_pytorch.utils.metrics import IoU

import pandas as pd
from tqdm import tqdm
import numpy as np
import torch
from torch import nn
import gc
from sklearn.metrics import roc_auc_score, accuracy_score
import json
import argparse

argv = sys.argv[1:]  # Exclude the first argument (script name)
if '-f' in argv:
    argv = argv[:argv.index('-f')]  # Ignore everything after '-f'

# Set up the parser
parser = argparse.ArgumentParser(description='Insert some arguments')
parser.add_argument('--gpu', type=int, help='GPU ID', default=0)
parser.add_argument('--batch_size', type=int, help='Batch size', default=128)
parser.add_argument('--n_workers', type=int, help='Number of parallel workers', default=8)

# Parse the cleaned arguments
args = parser.parse_args(argv)


# with open('SETTINGS.json', 'r') as f:
SETTINGS = {
    "DICOM_DATA_DIR":"/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification",
    "TASK1_DIR":"/kaggle/working/TrainingData", 
    "CLASSIFICATION_RAW_JPG":"/kaggle/input/miccaibraintumorjpgdata/data",
    "SEGMENT_DATA_DIR":"/kaggle/input/segmentaed-csv", 
    "CLASSIFICATION_DATA_DIR":"/kaggle/working/classification/data/",
    "KFOLD_PATH":"/kaggle/input/sub-file-for-tumor/data/train_stratifiedgroupkfold.csv", 
    "SEGMENT_MODEL_DIR":"/kaggle/working/models/densenet121_2d_segment",
    "CLASSIFICATION_MODEL_DIR":"models/eca_nfnet_l0_2d_classification",
    "TEMP_DATA_DIR":"temp",
    "TEST_PREDICTION_FILE":"data/test_prediction.csv"
        }

DATA_FOLDER = SETTINGS['CLASSIFICATION_DATA_DIR']
META_FILE_PATH = '/kaggle/input/pre-processeddata-class/metalfile_classification.csv'
KFOLD_FILE_PATH = SETTINGS['KFOLD_PATH']

RUN_FOLDS = [0]
MRI_TYPES = ['T1w', ]
STRIDE = 5
SEQ_LEN = 35
LSTM_HIDDEN_SIZE = 128
LSTM_LAYERS = 1
SEED = 67
DIM = (224, 224, 3)
N_WORKERS = 4
BATCH_SIZE = 8
BASE_LR = 1e-3
NUM_EPOCHS = 10
PATIENT = 10
SAMPLE = None
DEVICE = torch.device(f'cuda:{args.gpu}')

PARENT_OUT_FOLDER = 'models/'   

CANDIDATES = [
    {
        'backbone_name':'eca_nfnet_l0',
        'ver_note':'2d_classification',
        'backbone_pretrained':None,
        'batch_size':BATCH_SIZE,
        'warm_up_epochs':5,
    },
]


import sys
# from utils.general import seed_torch, init_progress_dict, log_to_progress_dict, save_progress, log_and_print, get_logger

# seed every thing
seed_torch(SEED)


def chunk_slices(list_files):
    list_files = sorted(list_files)
    chunks = []
    n_chunks = max(int(np.ceil((len(list_files) - SEQ_LEN) / STRIDE ) + 1),1)
    for i in range(n_chunks):
        s = i*STRIDE
        e = min(s+SEQ_LEN, len(list_files))
        chunks.append(list_files[s:e])
    return chunks

def expand(row):
    list_files = row['chunk_file_paths']
    return pd.DataFrame({
        'BraTS21ID':[row['BraTS21ID']]*len(list_files),
        'MGMT_value':[row['MGMT_value']]*len(list_files),
        'mri_type':[row['mri_type']]*len(list_files),
        'file_path':list_files,
        'fold':[row['fold']]*len(list_files)
    })

def get_first_value(df, col_name):
    df[col_name] = df[col_name].map(lambda x: list(x)[0])

    
def process_df_mri_type(df_mri):
    df_mri_group = df_mri.groupby('BraTS21ID').agg(list)
    df_mri_group = df_mri_group.reset_index()
    df_mri_group['chunk_file_paths'] = df_mri_group.file_path.map(chunk_slices)
    df_mri_group['chunk_count'] = df_mri_group['chunk_file_paths'].map(lambda x: len(x))
    df_mri_group['chunk_cum_count'] = df_mri_group['chunk_count'].cumsum()
    df_mri_group_expand = df_mri_group.apply(expand, axis=1).tolist()
    df_mri_group_expand = pd.concat(df_mri_group_expand)

    for col_name in ['MGMT_value', 'mri_type', 'fold']:
        get_first_value(df_mri_group_expand, col_name)
        
    return df_mri_group_expand    
    
class BrainClassification2DDataset(torch.utils.data.Dataset):
    
    def __init__(self, csv, transforms=None):
        self.csv = csv.reset_index(drop=True)
        self.augmentations = transforms

    def __len__(self):
        return self.csv.shape[0]

    def __getitem__(self, index):
        row = self.csv.iloc[index]
        list_file_path = row['file_path']
        list_images = []
        label = row['MGMT_value']
        for i, path in enumerate(list_file_path):
            image = np.load(path)
            label = row['MGMT_value']
            list_images.append(image)
                
        images = np.stack(list_images, axis=0)
        if(images.shape[0] < SEQ_LEN):
            n_pad = SEQ_LEN - images.shape[0]
            pad_matrix = np.zeros(shape=(n_pad, images.shape[1], images.shape[2], images.shape[3]))
            images = np.concatenate([images, pad_matrix], axis=0)
            
        if self.augmentations:
            images_dict = dict()
            for i in range(len(images)):
                if(i==0):
                    images_dict['image'] = images[i]
                else:
                    images_dict[f'image{i-1}'] = images[i]
            augmented = self.augmentations(**images_dict)
            
            transformed_images = []
            for i in range(len(images)):
                if(i==0):
                    transformed_images.append(augmented['image'])
                else:
                    transformed_images.append(augmented[f'image{i-1}'])
                    
            transformed_images = np.stack(transformed_images, axis=0)
            return transformed_images, torch.tensor(label)
            
        return images, torch.tensor(label)
    
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

def get_train_transforms(candidate):
    dim = candidate.get('dim', DIM)
    seq_len = candidate.get('seq_len', SEQ_LEN)
    additional_targets = {f'image{i}':'image' for i in range(SEQ_LEN-1)}
    return A.Compose(
        [
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.ShiftScaleRotate(p=0.5),
            
            A.Resize(width=dim[1], height=dim[0], always_apply=True),
            A.Normalize(),
            ToTensorV2(p=1.0)
        ],
        additional_targets=additional_targets
    )

def get_valid_transforms(candidate):
    dim = candidate.get('dim', DIM)
    additional_targets = {f'image{i}':'image' for i in range(SEQ_LEN-1)}
    return A.Compose(
        [
            A.Resize(width=dim[1], height=dim[0], always_apply=True),
            A.Normalize(),
            ToTensorV2(p=1.0)
        ],
        additional_targets=additional_targets
    )    

def dfs_freeze(module):
    for name, child in module.named_children():
        for param in child.parameters():
            param.requires_grad = False
        dfs_freeze(child)
        
def dfs_unfreeze(module):
    for name, child in module.named_children():
        for param in child.parameters():
            param.requires_grad = True
        dfs_unfreeze(child)

import timm

class BrainSequenceModelNFNet(nn.Module):
    def __init__(self, backbone_name, backbone_pretrained,
                 lstm_dim=64, lstm_layers=1, lstm_dropout=0., 
                 n_classes=1):
        super(BrainSequenceModelNFNet, self).__init__()
        self.backbone = timm.create_model(backbone_name, pretrained=False)
#         self.backbone.load_state_dict(torch.load(backbone_pretrained))
        
        lstm_inp_dim = self.backbone.head.fc.in_features
        
        self.backbone.head.fc = nn.Identity()
        
        self.lstm = nn.LSTM(lstm_inp_dim, lstm_dim, num_layers=lstm_layers, 
                            batch_first=True, bidirectional=True,
                            dropout=lstm_dropout)
        
        self.clf_head = nn.Linear(lstm_dim*2*SEQ_LEN, n_classes)
        
    def forward(self, x):
        n = x.shape[0]
        seq_length = x.shape[1]
        concat_x = torch.cat([x[i] for i in range(n)], axis=0)
        concat_x = self.backbone(concat_x)
        
        
        stacked_x = torch.stack([concat_x[i*seq_length:i*seq_length+seq_length] for i in range(n)], axis=0)
        
        seq_features, _ = self.lstm(stacked_x)
        seq_features = seq_features.reshape(n,-1)
        
        logits = self.clf_head(seq_features)
        
        return logits


class AverageMeter(object):
    def __init__(self):
        self.reset()
    
    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
    
    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        
def train_valid_fn(dataloader,model, criterion, scaler, optimizer=None,device='cuda:0',scheduler=None,
                   epoch=0,mode='train', metric='auc'):
    '''Perform model training'''
    if(mode=='train'):
        model.train()
    elif(mode=='valid'):
        model.eval()
    else:
        raise ValueError('No such mode')
        
    loss_score = AverageMeter()
    
    tk0 = tqdm(enumerate(dataloader), total=len(dataloader))
    all_predictions = []
    all_labels = []
    for i, batch in tk0:
        if(mode=='train'):
            optimizer.zero_grad()
            
        # input, gt
        voxels, labels = batch
        voxels = voxels.to(device)
        labels = labels.to(device).float()

        # prediction
        with torch.cuda.amp.autocast():
            logits = model(voxels)
            logits = logits.view(-1)
            probs = logits.sigmoid()
            # compute loss
            loss = criterion(logits, labels)
        
        if(mode=='train'):
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        
        loss_score.update(loss.detach().cpu().item(), dataloader.batch_size)

        # append for metric calculation
        all_predictions.append(probs.detach().cpu().numpy())
        all_labels.append(labels.detach().cpu().numpy())
        
        if(mode=='train'):
            tk0.set_postfix(Loss_Train=loss_score.avg, Epoch=epoch, LR=optimizer.param_groups[0]['lr'])
        elif(mode=='valid'):
            tk0.set_postfix(Loss_Valid=loss_score.avg, Epoch=epoch)
        
        del batch, voxels, labels, logits, probs, loss
        torch.cuda.empty_cache()

    if(mode=='train'):
        if(scheduler.__class__.__name__ == 'CosineAnnealingWarmRestarts'):
            scheduler.step(epoch=epoch)
        elif(scheduler.__class__.__name__ == 'ReduceLROnPlateau'):
            scheduler.step(loss_score.avg)

    all_predictions = np.concatenate(all_predictions)
    all_labels = np.concatenate(all_labels)
    if(metric == 'auc'):
        auc = roc_auc_score(y_true=all_labels, y_score=all_predictions)
        return loss_score.avg, auc 
    
    return loss_score.avg

    
# ============ Read metadata ==============    
df = pd.read_csv(META_FILE_PATH)
kfold_df = pd.read_csv(KFOLD_FILE_PATH)
df = df.merge(kfold_df, on='BraTS21ID')

df_flair = df[df.mri_type=='FLAIR']
df_t1 = df[df.mri_type=='T1w']
df_t1ce = df[df.mri_type=='T1wCE']
df_t2 = df[df.mri_type=='T2w']
# =========================================


# ================================ Training ==================================
for candidate in CANDIDATES:
    print(f"######################### Candidate: {candidate['backbone_name']} ############################")
    run_folds = candidate.get('run_folds', RUN_FOLDS)
    
    parent_out_folder = candidate.get('parent_out_folder', PARENT_OUT_FOLDER)
    ver_note = candidate['ver_note']

    for mri_type in MRI_TYPES:
        out_folder_name = f"{candidate['backbone_name']}_{ver_note}"
        out_folder = os.path.join(parent_out_folder, out_folder_name, mri_type)
        os.makedirs(out_folder, exist_ok=True)
    
        for valid_fold in run_folds:
            # Read data
            if(SAMPLE):
                df = df.sample(SAMPLE, random_state=SEED)
            if(mri_type != 'all'):
                df_mri = df[df.mri_type==mri_type]
            
            # process data
            df_mri = process_df_mri_type(df_mri)
                
            train_df = df_mri[df_mri.fold!=valid_fold]
            valid_df = df_mri[df_mri.fold==valid_fold]

            print(f'\n\n================= Fold {valid_fold}. MRI: {mri_type} ==================')
            print(f'Number of training samples: {len(train_df)}. Number of valid samples: {len(valid_df)}')

            # train and valid transforms
            train_transforms = get_train_transforms(candidate)
            valid_transforms = get_valid_transforms(candidate)

            # create data loader
            train_dataset =  BrainClassification2DDataset(train_df, train_transforms)
            valid_dataset = BrainClassification2DDataset(valid_df, valid_transforms)

            batch_size = candidate.get('batch_size', BATCH_SIZE)
            train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                            num_workers=N_WORKERS, pin_memory=torch.cuda.is_available())
            valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=False,
                            num_workers=N_WORKERS, pin_memory=torch.cuda.is_available())


            # Model
            model = BrainSequenceModelNFNet(candidate['backbone_name'], 
                                            candidate['backbone_pretrained'],
                                           lstm_dim=LSTM_HIDDEN_SIZE,lstm_layers=LSTM_LAYERS)
            model.to(DEVICE)
            print()

            warm_start_weight = candidate.get('warm_start_weight')
            if(warm_start_weight):
                print('Load warm start weight:', warm_start_weight)

            # freeze pretrained layers
#             dfs_freeze(model.backbone)
#             print(' -------- Start warm up process ----------')
#             print('Freeze backbone')
#             model = model.to(DEVICE)
#             print()


            # Optimizer and scheduler
            base_lr = candidate.get('base_lr', BASE_LR)
            optim = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=BASE_LR)

            num_training_steps = NUM_EPOCHS * len(train_loader)
            lr_scheduler = ReduceLROnPlateau(optimizer=optim, factor=0.67, patience=3, verbose=True)

            # loss
            criterion = nn.BCEWithLogitsLoss()


            # use amp to accelerate training
            scaler = torch.cuda.amp.GradScaler()

            # Logging
            logger = get_logger(
                name = f'training_log_fold{valid_fold}.txt',
                path=os.path.join(out_folder, f'training_log_fold{valid_fold}.txt')
            )

            best_valid_loss = 9999
            best_valid_ep = 0
            patient = PATIENT

            progress_dict = init_progress_dict(['loss', 'AUC'])

            start_ep = candidate.get('warm_start_ep', 1)
            print('Start ep:', start_ep)

            # warm up epochs
            warm_up_epochs = candidate.get('warm_up_epochs', 0)


            for epoch in range(start_ep, NUM_EPOCHS+1):
                if(epoch==warm_up_epochs+1):
                    print(' -------- Finish warm up process ----------')
                    print('Unfreeze backbone')
                    dfs_unfreeze(model.backbone)
                    optim = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=BASE_LR)
                    lr_scheduler = ReduceLROnPlateau(optimizer=optim)

                # =============== Training ==============
                train_loss, train_auc = train_valid_fn(train_loader,model,criterion, scaler, optimizer=optim,device=DEVICE,
                                            scheduler=lr_scheduler,epoch=epoch,mode='train', metric='auc')
                valid_loss, valid_auc = train_valid_fn(valid_loader,model,criterion, scaler, device=DEVICE,epoch=epoch,mode='valid', metric='auc')

                current_lr = optim.param_groups[0]['lr']
                log_line = f'Model: {out_folder_name}. Epoch: {epoch}. '
                log_line += f'Train loss:{train_loss} - Valid loss: {valid_loss}. '
                log_line += f'Train AUC:{train_auc} - Valid AUC: {valid_auc}. '
                log_line += f'Lr: {current_lr}.'

                log_and_print(logger, log_line)

                metric_dict = {'train_loss':train_loss,'valid_loss':valid_loss,
                               'train_AUC':train_auc, 'valid_AUC':valid_auc,
                           }

                progress_dict = log_to_progress_dict(progress_dict, metric_dict)

                # plot figure and save the progress chart
                save_progress(progress_dict, out_folder, out_folder_name, valid_fold, show=False)

                if(valid_loss < best_valid_loss):
                    best_valid_loss = valid_loss
                    best_valid_ep = epoch
                    patient = PATIENT # reset patient

                    # save model
                    name = os.path.join(out_folder, f'%s_Fold%d_%s.pth'%(mri_type, valid_fold, out_folder_name))
                    log_and_print(logger, 'Saving model to: ' + name)
                    torch.save(model.state_dict(), name)
                else:
                    patient -= 1
                    log_and_print(logger, 'Decrease early-stopping patient by 1 due valid loss not decreasing. Patient='+ str(patient))

                if(patient == 0):
                    log_and_print(logger, 'Early stopping patient = 0. Early stop')
                    break
# =============================================================================

In [None]:
# img=np.load("/kaggle/working/classification/data/2D_slice_data/BraTS2021_00185/BraTS2021_00185_T1w/BraTS2021_00185_T1w_030.npy")
# plt.imshow(img)
# plt.figure()