In [76]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pydicom
from tensorflow.keras.preprocessing.image import load_img,img_to_array
from sklearn.metrics import f1_score,classification_report,confusion_matrix
from tensorflow.keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout
from tensorflow.keras.models import Sequential,save_model
from sklearn.ensemble import VotingClassifier

In [14]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
df=pd.read_csv('../input/merge-labels-and-paths/merge_labels_and_paths.csv')
df.drop(['files_num'],axis=1,inplace=True)
df.head()

Unnamed: 0,BraTS21ID,MGMT_value,best_flair_images,best_t1w_images,best_t1wce_images,best_t2w_images
0,0,1,../input/rsna-miccai-brain-tumor-radiogenomic-...,../input/rsna-miccai-brain-tumor-radiogenomic-...,../input/rsna-miccai-brain-tumor-radiogenomic-...,../input/rsna-miccai-brain-tumor-radiogenomic-...
1,2,1,../input/rsna-miccai-brain-tumor-radiogenomic-...,../input/rsna-miccai-brain-tumor-radiogenomic-...,../input/rsna-miccai-brain-tumor-radiogenomic-...,../input/rsna-miccai-brain-tumor-radiogenomic-...
2,3,0,../input/rsna-miccai-brain-tumor-radiogenomic-...,../input/rsna-miccai-brain-tumor-radiogenomic-...,../input/rsna-miccai-brain-tumor-radiogenomic-...,../input/rsna-miccai-brain-tumor-radiogenomic-...
3,5,1,../input/rsna-miccai-brain-tumor-radiogenomic-...,../input/rsna-miccai-brain-tumor-radiogenomic-...,../input/rsna-miccai-brain-tumor-radiogenomic-...,../input/rsna-miccai-brain-tumor-radiogenomic-...
4,6,1,../input/rsna-miccai-brain-tumor-radiogenomic-...,../input/rsna-miccai-brain-tumor-radiogenomic-...,../input/rsna-miccai-brain-tumor-radiogenomic-...,../input/rsna-miccai-brain-tumor-radiogenomic-...


In [64]:
def load_images(list_paths):
    images=[]
    for path in list_paths:
        img=pydicom.dcmread(path)
        img=img_to_array(img.pixel_array)
        img=tf.image.resize(img,(256,256))
        images.append(img)
    return np.array(images)

In [15]:
flair_images=load_images(df['best_flair_images'])
t1w_images=load_images(df['best_t1w_images'])
t1wce_images=load_images(df['best_t1wce_images'])
t2w_images=load_images(df['best_t2w_images'])

In [16]:
print(flair_images.shape,type(flair_images))

(582, 256, 256, 1) <class 'numpy.ndarray'>


In [17]:
labels=df['MGMT_value']
labels = labels.astype(np.int32)
print(labels.shape)
labels.head()

(582,)


0    1
1    1
2    0
3    1
4    1
Name: MGMT_value, dtype: int32

In [39]:
def prepare_train_val_test_input_pipelines(images,labels):
    AUTOTUNE=tf.data.AUTOTUNE
    ds=tf.data.Dataset.from_tensor_slices((images,labels)).shuffle(500)
    size=len(ds)
    #train_set
    train_ds=ds.take(int(size*0.7))
    test_val_ds=ds.skip(int(size*0.7))
    #validation_set
    val_ds=test_val_ds.take(int(0.5*len(test_val_ds)))
    #test_set
    test_ds=test_val_ds.skip(int(0.5*len(test_val_ds)))
    #input_pipelines
    train_ds=train_ds.map(lambda x,y:(x/256,y)).shuffle(200).batch(1).cache().prefetch(AUTOTUNE)
    val_ds=val_ds.map(lambda x,y:(x/256,y)).batch(1).cache().prefetch(AUTOTUNE)
    test_ds=test_ds.map(lambda x,y:(x/256,y)).batch(1).cache().prefetch(AUTOTUNE)
    
    return train_ds,val_ds,test_ds

In [60]:
def train(train_ds,val_ds,epochs=15):
    print('preparing...')
    cnn=Sequential([
        Conv2D(64,kernel_size=(3,3),activation='relu',padding='same',input_shape=(256,256,1)),
        MaxPooling2D(pool_size=(2,2),strides=(1,1),padding='valid'),
        Conv2D(64,kernel_size=(3,3),activation='relu',padding='same'),
        MaxPooling2D(pool_size=(2,2),strides=(1,1),padding='valid'),
        Conv2D(64,kernel_size=(3,3),activation='relu',padding='same'),
        MaxPooling2D(pool_size=(2,2),strides=(1,1),padding='valid'),
        Conv2D(64,kernel_size=(3,3),activation='relu',padding='same'),
        MaxPooling2D(pool_size=(2,2),strides=(1,1),padding='valid'),
        Flatten(),
        Dense(1,activation='sigmoid')
    ])
    print('compiling...')
    cnn.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
    print('training...')
    cnn.fit(train_ds,validation_data=val_ds,epochs=epochs,verbose=2)
    print('end training...')
    return cnn

In [94]:
def train_v2(train_ds,val_ds,epochs=20):
    print('preparing...')
    cnn=Sequential([
        Conv2D(64,kernel_size=(3,3),activation='relu',padding='same',input_shape=(256,256,1)),
        MaxPooling2D(pool_size=(2,2),strides=(1,1),padding='valid'),
        Dropout(0.4),
        Conv2D(64,kernel_size=(3,3),activation='relu',padding='same'),
        MaxPooling2D(pool_size=(2,2),strides=(1,1),padding='valid'),
        Dropout(0.4),
        Conv2D(64,kernel_size=(3,3),activation='relu',padding='same'),
        MaxPooling2D(pool_size=(2,2),strides=(1,1),padding='valid'),
        Dropout(0.4),
        Conv2D(64,kernel_size=(3,3),activation='relu',padding='same'),
        MaxPooling2D(pool_size=(2,2),strides=(1,1),padding='valid'),
        Dropout(0.4),
        Flatten(),
        Dense(1,activation='sigmoid')
    ])
    print('compiling...')
    cnn.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
    print('training...')
    cnn.fit(train_ds,validation_data=val_ds,epochs=epochs,verbose=2)
    print('end training...')
    return cnn

In [129]:
def score(model,test_ds):
    y_pred=[np.int(np.round(i)) for i in model.predict(test_ds)]
    y_true=[]
    for im,y in test_ds:
        y_true.append(y)
    y_true=np.array(y_true)
    score=f1_score(y_true,y_pred)
    return score

In [40]:
train_flair_ds,val_flair_ds,test_flair_ds=prepare_train_val_test_input_pipelines(flair_images,labels)

In [14]:
#flair_cnn=train(train_flair_ds,val_flair_ds)

In [15]:
#score(flair_cnn,test_flair_ds)

In [96]:
flair_cnn_v2=train_v2(train_flair_ds,val_flair_ds)

preparing...
compiling...
training...
Epoch 1/20
407/407 - 6s - loss: 2.7415 - accuracy: 0.5184 - val_loss: 0.7243 - val_accuracy: 0.5632
Epoch 2/20
407/407 - 6s - loss: 0.7055 - accuracy: 0.5700 - val_loss: 0.6873 - val_accuracy: 0.5862
Epoch 3/20
407/407 - 6s - loss: 0.6801 - accuracy: 0.6093 - val_loss: 0.6726 - val_accuracy: 0.6437
Epoch 4/20
407/407 - 6s - loss: 0.6404 - accuracy: 0.6339 - val_loss: 0.6157 - val_accuracy: 0.7126
Epoch 5/20
407/407 - 6s - loss: 0.5907 - accuracy: 0.7052 - val_loss: 0.6287 - val_accuracy: 0.7126
Epoch 6/20
407/407 - 6s - loss: 0.6072 - accuracy: 0.7371 - val_loss: 0.5350 - val_accuracy: 0.7471
Epoch 7/20
407/407 - 6s - loss: 0.4515 - accuracy: 0.7985 - val_loss: 0.5632 - val_accuracy: 0.7011
Epoch 8/20
407/407 - 6s - loss: 0.7273 - accuracy: 0.8084 - val_loss: 0.5145 - val_accuracy: 0.7356
Epoch 9/20
407/407 - 6s - loss: 0.4318 - accuracy: 0.8059 - val_loss: 0.4326 - val_accuracy: 0.7241
Epoch 10/20
407/407 - 6s - loss: 0.3379 - accuracy: 0.8624 - v

In [97]:
score(flair_cnn_v2,test_flair_ds)

0.8478260869565216

In [20]:
#train_t1w_ds,val_t1w_ds,test_t1w_ds=prepare_train_val_test_input_pipelines(t1w_images,labels)
#t1w_cnn=train(train_t1w_ds,val_t1w_ds,30)
#score(t1w_cnn,test_t1w_ds)

In [112]:
train_t1w_ds,val_t1w_ds,test_t1w_ds=prepare_train_val_test_input_pipelines(t1w_images,labels)
t1w_cnn_v2=train_v2(train_t1w_ds,val_t1w_ds)

preparing...
compiling...
training...
Epoch 1/20
407/407 - 7s - loss: 5.7909 - accuracy: 0.5209 - val_loss: 0.6949 - val_accuracy: 0.4368
Epoch 2/20
407/407 - 6s - loss: 0.7015 - accuracy: 0.5971 - val_loss: 0.6954 - val_accuracy: 0.4368
Epoch 3/20
407/407 - 6s - loss: 0.6919 - accuracy: 0.6093 - val_loss: 0.6919 - val_accuracy: 0.4828
Epoch 4/20
407/407 - 6s - loss: 0.7101 - accuracy: 0.6216 - val_loss: 0.6870 - val_accuracy: 0.4713
Epoch 5/20
407/407 - 6s - loss: 0.6499 - accuracy: 0.6265 - val_loss: 0.6392 - val_accuracy: 0.5977
Epoch 6/20
407/407 - 6s - loss: 0.6032 - accuracy: 0.6708 - val_loss: 0.6028 - val_accuracy: 0.6437
Epoch 7/20
407/407 - 6s - loss: 0.5613 - accuracy: 0.6830 - val_loss: 0.6030 - val_accuracy: 0.6552
Epoch 8/20
407/407 - 6s - loss: 0.4807 - accuracy: 0.7494 - val_loss: 0.5792 - val_accuracy: 0.6667
Epoch 9/20
407/407 - 6s - loss: 0.5002 - accuracy: 0.7617 - val_loss: 0.4985 - val_accuracy: 0.7816
Epoch 10/20
407/407 - 6s - loss: 0.4058 - accuracy: 0.8280 - v

In [115]:
score(t1w_cnn_v2,test_t1w_ds)

0.9574468085106383

In [121]:
train_t1wce_ds,val_t1wce_ds,test_t1wce_ds=prepare_train_val_test_input_pipelines(t1wce_images,labels)
t1wce_cnn_v2=train_v2(train_t1wce_ds,val_t1wce_ds)

preparing...
compiling...
training...
Epoch 1/20
407/407 - 6s - loss: 6.9982 - accuracy: 0.5111 - val_loss: 0.6927 - val_accuracy: 0.5632
Epoch 2/20
407/407 - 6s - loss: 0.6764 - accuracy: 0.5356 - val_loss: 0.6691 - val_accuracy: 0.6207
Epoch 3/20
407/407 - 6s - loss: 0.6439 - accuracy: 0.6167 - val_loss: 0.6534 - val_accuracy: 0.6207
Epoch 4/20
407/407 - 6s - loss: 0.5654 - accuracy: 0.7150 - val_loss: 0.6622 - val_accuracy: 0.6437
Epoch 5/20
407/407 - 6s - loss: 0.6404 - accuracy: 0.7469 - val_loss: 0.5634 - val_accuracy: 0.6667
Epoch 6/20
407/407 - 6s - loss: 0.4632 - accuracy: 0.7838 - val_loss: 0.6982 - val_accuracy: 0.7931
Epoch 7/20
407/407 - 6s - loss: 0.6538 - accuracy: 0.7912 - val_loss: 0.7289 - val_accuracy: 0.6782
Epoch 8/20
407/407 - 6s - loss: 0.3346 - accuracy: 0.8354 - val_loss: 0.6874 - val_accuracy: 0.7701
Epoch 9/20
407/407 - 6s - loss: 0.5428 - accuracy: 0.8329 - val_loss: 0.6157 - val_accuracy: 0.7701
Epoch 10/20
407/407 - 6s - loss: 0.3502 - accuracy: 0.8600 - v

TypeError: 'numpy.float64' object is not callable

In [124]:
print('score test :',score(t1wce_cnn_v2,test_t1wce_ds))

score test : 0.8817204301075269


In [None]:
train_t2w_ds,val_t2w_ds,test_t2w_ds=prepare_train_val_test_input_pipelines(t2w_images,labels)
t2w_cnn_v2=train_v2(train_t2w_ds,val_t2w_ds)
print('test score',score(t2w_cnn,test_t2w_ds))

In [None]:
print('test score',score(t2w_cnn,test_t2w_ds))

In [130]:
def final_prediction(test_flair,test_t1w,test_t1wce,test_t2w):
    y_flair_pred=[np.int(np.round(i)) for i in flair_cnn_v2.predict(test_flair)]
    y_t1w_pred=[np.int(np.round(i)) for i in t1w_cnn_v2.predict(test_t1w)]
    y_t1wce_pred=[np.int(np.round(i)) for i in t1wce_cnn_v2.predict(test_t1wce)]
    y_t2w_pred=[np.int(np.round(i)) for i in t2w_cnn.predict(test_t1w)]
    sum_y=[y_flair_pred[i] + y_t1w_pred[i] + y_t1wce_pred[i] + y_t2w_pred[i] for i in range(len(y_flair_pred)) ]
    y_pred=[np.int(np.round(i/4)) for i in sum_y]
    return y_pred

In [131]:
def final_prediction_2(test_flair,test_t1w,test_t1wce,test_t2w):
    y_flair_pred=flair_cnn_v2.predict(test_flair)
    y_t1w_pred=t1w_cnn_v2.predict(test_t1w)
    y_t1wce_pred=t1wce_cnn_v2.predict(test_t1wce)
    y_t2w_pred=t2w_cnn_v2.predict(test_t1w)
    sum_y=[y_flair_pred[i] + y_t1w_pred[i] + y_t1wce_pred[i] + y_t2w_pred[i] for i in range(len(y_flair_pred)) ]
    y_pred=[np.int(np.round(i/4)) for i in sum_y]
    return y_pred

In [132]:
y_pred=final_prediction_2(test_flair_ds,test_t1w_ds,test_t1wce_ds,test_t2w_ds)

In [133]:
y_true=[]
for im,y in test_flair_ds:
    y_true.append(y)
y_true=np.array(y_true)
score=f1_score(y_true,y_pred)
score

0.711111111111111

In [51]:
print(classification_report(y_true,y_pred))

              precision    recall  f1-score   support

           0       0.49      0.72      0.58        39
           1       0.65      0.41      0.50        49

    accuracy                           0.55        88
   macro avg       0.57      0.56      0.54        88
weighted avg       0.58      0.55      0.54        88



In [None]:
sample_submission_df=pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv')
sample_submission_df.head()

In [None]:
def bytes_to_str(byte_filename):
    return str(byte_filename).split('\'')[1]

In [None]:
def best_image(ds_paths):
    
    s=0
    filep=''
    for filename in ds_paths.as_numpy_iterator():
        img=pydicom.dcmread(bytes_to_str(filename))
        sp=np.array(img.pixel_array).sum()
        if(sp>s):
            s=sp
            filep=bytes_to_str(filename)
    #best_img=pydicom.dcmread(filep)
    return filep

In [None]:
def image_selection(test_ds_files):
    i=0
    files_num=[]
    best_flair_images=[]
    best_t1w_images=[]
    best_t1wce_images=[]
    best_t2w_images=[]
    for filename in test_ds_files.as_numpy_iterator():
        path=bytes_to_str(filename)
        FLAIR_PATH=path+'/FLAIR/*'
        T1w_PATH=path+'/T1w/*'
        T1wCE_PATH=path+'/T1wCE/*'
        T2w_PATH=path+'/T2w/*'
        ds_files_FLAIR=tf.data.Dataset.list_files(FLAIR_PATH)
        ds_files_T1w=tf.data.Dataset.list_files(T1w_PATH)
        ds_files_T1wCE=tf.data.Dataset.list_files(T1wCE_PATH)
        ds_files_T2w=tf.data.Dataset.list_files(T2w_PATH)
        file_num=path.split('/')[-1]
        files_num.append(int(file_num))
        best_flair_images.append(best_image(ds_files_FLAIR))
        best_t1w_images.append(best_image(ds_files_T1w))
        best_t1wce_images.append(best_image(ds_files_T1wCE))
        best_t2w_images.append(best_image(ds_files_T2w))
    return files_num,best_flair_images,best_t1w_images,best_t1wce_images,best_t2w_images

In [None]:
ds_test_files=tf.data.Dataset.list_files('../input/rsna-miccai-brain-tumor-radiogenomic-classification/test/*')
files_num,best_flair_images,best_t1w_images,best_t1wce_images,best_t2w_images=image_selection(ds_test_files.batch(1))

In [None]:
dict={
    'files_num':files_num,
    'best_flair_images':best_flair_images,
    'best_t1w_images':best_t1w_images,
    'best_t1wce_images':best_t1wce_images,
    'best_t2w_images':best_t2w_images
}
df=pd.DataFrame(dict)
df.head()
df.to_csv('test_best_images_paths.csv',index=False)

In [None]:
test_best_images_paths=pd.read_csv('./test_best_images_paths.csv')
test_best_images_paths.head()

In [None]:
ds = pydicom.dcmread(test_best_images_paths['best_flair_images'][0])
plt.imshow(ds.pixel_array, cmap=plt.cm.bone) 

In [None]:
ds=pd.merge(sample_submission_df,test_best_images_paths,left_on=['BraTS21ID'],right_on=['files_num'])
ds.head()

In [None]:
ds=ds.dropna(axis=0)
ds.drop(['files_num'],axis=1,inplace=True)
ds.to_csv('test_merge_labels_and_paths.csv',index=False)
ds.info()

In [None]:
sub_flair_images=load_images(df['best_flair_images'])
sub_t1w_images=load_images(df['best_t1w_images'])
sub_t1wce_images=load_images(df['best_t1wce_images'])
sub_t2w_images=load_images(df['best_t2w_images'])

In [None]:
print(sub_flair_images.shape,type(sub_flair_images))

In [None]:
def prepare_submission_input_pipeline(images):
    AUTOTUNE=tf.data.AUTOTUNE
    test_ds=tf.data.Dataset.from_tensor_slices(images)
    test_ds=test_ds.map(lambda x:x/256).batch(1).cache().prefetch(AUTOTUNE)
    return test_ds

In [None]:
submission_pipeline_flair=prepare_submission_input_pipeline(sub_flair_images)
submission_pipeline_t1w=prepare_submission_input_pipeline(sub_t1w_images)
submission_pipeline_t1wce=prepare_submission_input_pipeline(sub_t1wce_images)
submission_pipeline_t2w=prepare_submission_input_pipeline(sub_t2w_images)

In [None]:
y_pred_submittion=final_prediction(submission_pipeline_flair,submission_pipeline_t1w,submission_pipeline_t1wce,submission_pipeline_t2w)

In [None]:
sample_submission_df['MGMT_value']=y_pred_submittion

In [None]:
sample_submission_df.to_csv('submission.csv',index=False)

In [None]:
sample_submission_df.head()