In [1]:
import time
from os import listdir, path

import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split

In [2]:
p = r'output_8bit_new_Filter_Reduced/reduced_dataset'

In [3]:
data = []
for id in listdir(p):
    list = []
    list.append(id)
    f_path = path.join(p,id)
    
    for modality in listdir(f_path):
        m_path = path.join(f_path,modality)
        count = len(listdir(m_path))
        list.append(int(count))
    data.append(list)    

In [4]:
df = pd.DataFrame(data, columns=['patient_id', 'flair', 't1w', 't1wCE', 't2w'])

In [5]:
df.describe()

Unnamed: 0,flair,t1w,t1wCE,t2w
count,585.0,585.0,585.0,585.0
mean,84.692308,69.998291,84.962393,64.261538
std,74.736416,45.096301,40.890791,48.902702
min,0.0,0.0,10.0,0.0
25%,32.0,20.0,59.0,32.0
50%,35.0,96.0,98.0,35.0
75%,188.0,101.0,103.0,101.0
max,254.0,203.0,203.0,250.0


In [6]:
df.to_csv('metadata.csv',index=False)

In [2]:
df = pd.read_csv('metadata.csv')
df

Unnamed: 0,patient_id,flair,t1w,t1wCE,t2w
0,123,0,0,15,14
1,516,193,20,96,100
2,110,14,45,45,14
3,454,16,96,96,16
4,784,18,50,18,17
...,...,...,...,...,...
580,513,91,167,169,110
581,797,16,17,17,17
582,613,184,96,95,100
583,132,65,32,32,32


In [3]:
flair = []
t1w = []
t1wce = []
t2w = []
for idx, row in df.iterrows():
    if row['flair'] == 0:
        flair.append(row['patient_id'])
    if row['t1w'] == 0:
        t1w.append(row['patient_id'])
    if row['t1wCE'] == 0:
        t1wce.append(row['patient_id'])
    if row['t2w'] == 0:
        t2w.append(row['patient_id'])

print(flair)
print(t1w)
print(t1wce)
print(t2w)

[123]
[123]
[]
[109, 709]


In [4]:
print(df['flair'].max())
print(df['t1w'].max())
print(df['t1wCE'].max())
print(df['t2w'].max())

254
203
203
250


In [5]:
print(df['flair'].min())
print(df['t1w'].min())
print(df['t1wCE'].min())
print(df['t2w'].min())

0
0
10
0


# prepare the dataset

In [2]:
data = pd.read_csv('train_labels.csv')
X = data['BraTS21ID'].values
Y = data['MGMT_value'].values

In [3]:
kfold = []
x_train = []
x_test = []
y_train =[]
y_test = []
for _ in range(10):
    xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.2, shuffle=True, stratify=Y)
    kfold.append([_+1, xtrain, xtest, ytrain, ytest])
    x_train.append(xtrain)
    x_test.append(xtest)
    y_train.append(ytrain)
    y_test.append(ytest)
np.save('folds/xtrain.npy', x_train)
np.save('folds/xtest.npy', x_test)
np.save('folds/ytrain.npy', y_train)
np.save('folds/ytest.npy', y_test)

In [42]:
train = np.load('folds/xtrain.npy', allow_pickle=True)
train

array([[758,  61, 746, ..., 690, 759, 628],
       [222, 687, 656, ..., 246, 481, 739],
       [376, 112, 615, ..., 819, 113, 140],
       ...,
       [329, 622, 830, ..., 478, 839, 321],
       [714, 383,  19, ..., 169, 301, 740],
       [233,   8, 154, ..., 805, 367, 654]])

In [45]:
test = np.load('folds/ytrain.npy', allow_pickle=True)
test

# Deleting the patient ids of empty folders

In [2]:
data = pd.read_csv('train_labels.csv')
X = data['BraTS21ID'].values
Y = data['MGMT_value'].values

In [3]:
X.shape


(585,)

In [4]:
Y.shape

(585,)

In [5]:
np.where(X==109)

(array([71]),)

In [6]:
np.where(X==123)

(array([81]),)

In [7]:
np.where(X==709)

(array([488]),)

In [8]:
X = np.delete(X, np.where(X == 109))
X = np.delete(X, np.where(X == 123))
X = np.delete(X, np.where(X == 709))

Y = np.delete(Y, [71, 81, 488])

In [9]:
X.shape

(582,)

In [10]:
Y.shape

(582,)

# split the dataset into kfolds

In [11]:
kfold = []
x_train = []
x_test = []
y_train =[]
y_test = []
for _ in range(10):
    xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.2, shuffle=True, stratify=Y)
    kfold.append([_+1, xtrain, xtest, ytrain, ytest])
    x_train.append(xtrain)
    x_test.append(xtest)
    y_train.append(ytrain)
    y_test.append(ytest)
np.save('folds/new_folds/xtrain.npy', x_train)
np.save('folds/new_folds/xtest.npy', x_test)
np.save('folds/new_folds/ytrain.npy', y_train)
np.save('folds/new_folds/ytest.npy', y_test)

In [12]:
train = np.load('folds/new_folds/xtrain.npy', allow_pickle=True)
train

array([[  88,  390,  140, ...,  266,  456,   78],
       [  31,  273,  395, ...,  707,  656,  150],
       [  43,  777,  445, ...,  547,  318,  285],
       ...,
       [ 661,  791,  587, ...,  250,  690,  144],
       [ 400,  250,  667, ...,  739,  328,  389],
       [1010,  124,  794, ...,  373,  383,  397]])

(array([2, 3, 4, 5, 6, 7, 8]), array([399, 268, 464, 216,  96, 150, 378]))