In [23]:
import os
import sys
import re
import random
import pandas as pd
import numpy as np
import cv2

import time

import sklearn
from sklearn import model_selection as sk_model_selection
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score

import matplotlib.pyplot as plt

In [24]:
SEED = 42
FOLD = range(5)
MRI_TYPES = ['flair','t1','t1ce','t2']
PREDICTION_PATH = './predictions/ET_90_90_90'
LABEL_PATH = './train_labels.csv'

In [25]:
def get_train_valid_split(label_path):
    train_df = pd.read_csv(label_path,dtype = {'BraTS21ID':'str','MGMT_value':'int'})
    index_name = train_df[(train_df['BraTS21ID'] == '00109') | (train_df['BraTS21ID'] == '00123') | (train_df['BraTS21ID'] == '00709')].index
    train_df = train_df.drop(index_name).reset_index(drop=True)

    X = train_df['BraTS21ID'].values
    y = train_df['MGMT_value'].values
    
    kfold =  StratifiedKFold(n_splits=5,shuffle = True,random_state = SEED)
    return X,y,list(kfold.split(X,y))


def average_predictions(mri_types,fold):
    df = pd.read_csv(f'{PREDICTION_PATH}/{mri_types[0]}/{mri_types[0]}_fold{fold}.csv')
    id_column = df['BraTS21ID'].values
    sum_column = df['MGMT_value']
    
    for mri_type in mri_types[1:]:
        df = pd.read_csv(f'{PREDICTION_PATH}/{mri_type}/{mri_type}_fold{fold}.csv')
        sum_column += df['MGMT_value']
        
    probs = (sum_column.values)/4
    df_average = pd.DataFrame({'BraTS21ID':id_column,'MGMT_value':probs})
    
    _,y,SPLIT = get_train_valid_split(LABEL_PATH)
    _,valid_idx = SPLIT[fold]
    y_valid = y[valid_idx]
    
    auc = roc_auc_score(y_valid, probs)
    preds = [1 if x > 0.5 else 0 for x in probs]
    f1 = f1_score(y_valid, preds)
    re = recall_score(y_valid, preds)
    pr = precision_score(y_valid, preds)
    
    return df_average,auc,f1,re,pr

### Average Predictions

In [26]:
def five_fold_result():
    auc_sum = []
    f1_sum = []
    re_sum = []
    pr_sum = []
    for fold in FOLD:
        _, auc,f1,re,pr = average_predictions(MRI_TYPES,fold)
        auc_sum.append(auc)
        f1_sum.append(f1)
        re_sum.append(re)
        pr_sum.append(pr)
    return {'AUC':f'{round(np.mean(auc_sum),3)} ({round(np.std(auc_sum),3)})','F1':f'{round(np.mean(f1_sum),3)} ({round(np.std(f1_sum),3)})',\
            'Recall':f'{round(np.mean(re_sum),3)} ({round(np.std(re_sum),3)})','Precision':f'{round(np.mean(pr_sum),3)} ({round(np.std(pr_sum),3)})'}

avg_df = pd.DataFrame(five_fold_result(),index=[0])
avg_df.head()

Unnamed: 0,AUC,F1,Recall,Precision
0,0.605 (0.052),0.651 (0.059),0.732 (0.116),0.592 (0.037)


In [27]:
def majority_vote(mri_types,fold):
    df = pd.read_csv(f'{PREDICTION_PATH}/{mri_types[0]}/{mri_types[0]}_fold{fold}.csv')
    full_df = df[['BraTS21ID']].copy()
    full_df[f'{mri_types[0]}'] = df['MGMT_value'].values
    
    for mri_type in mri_types[1:]:
        df = pd.read_csv(f'{PREDICTION_PATH}/{mri_type}/{mri_type}_fold{fold}.csv')
        full_df[f'{mri_type}'] = df['MGMT_value'].values
    
    major = []
    mri_types = ['flair','t1','t1ce','t2']
    for index, row in full_df.iterrows():
        postive_count = 0
        prob_sum = 0
        for mri_type in mri_types:
            prob_sum += row[f'{mri_type}']
            if row[f'{mri_type}'] > 0.5:
                postive_count +=1
        
        if postive_count > 2:
            major.append(1)
        elif postive_count == 2:
            if prob_sum/4 > 0.5:
                major.append(1)
            else:
                major.append(0)
        else:
            major.append(0)
            
    _,y,SPLIT = get_train_valid_split(LABEL_PATH)
    _,valid_idx = SPLIT[fold]
    y_valid = y[valid_idx]
    
    auc = roc_auc_score(y_valid, major)
    f1 = f1_score(y_valid, major)
    re = recall_score(y_valid, major)
    pr = precision_score(y_valid, major)
    
            
    return full_df,auc,f1,re,pr

### Majority Vote

In [28]:
def five_fold_result():
    auc_sum = []
    f1_sum = []
    re_sum = []
    pr_sum = []
    for fold in FOLD:
        _, auc,f1,re,pr = majority_vote(MRI_TYPES,fold)
        auc_sum.append(auc)
        f1_sum.append(f1)
        re_sum.append(re)
        pr_sum.append(pr)
    return {'AUC':f'{round(np.mean(auc_sum),3)} ({round(np.std(auc_sum),3)})','F1':f'{round(np.mean(f1_sum),3)} ({round(np.std(f1_sum),3)})',\
            'Recall':f'{round(np.mean(re_sum),3)} ({round(np.std(re_sum),3)})','Precision':f'{round(np.mean(pr_sum),3)} ({round(np.std(pr_sum),3)})'}

maj_df = pd.DataFrame(five_fold_result(),index=[0])
maj_df.head()

Unnamed: 0,AUC,F1,Recall,Precision
0,0.575 (0.053),0.639 (0.073),0.719 (0.136),0.582 (0.046)
