# Importing Libraries

In [1]:
import os
import glob

import pandas as pd
import numpy as np
from pathlib import Path

import random
from tqdm.notebook import tqdm
import pydicom # Handle MRI images

import cv2  # OpenCV - https://docs.opencv.org/master/d6/d00/tutorial_py_root.html

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score


import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers

In [2]:
print(tf.__version__)

# Laoding Data

In [3]:
data_dir = Path('../input/rsna-miccai-brain-tumor-radiogenomic-classification/')

mri_types = ["FLAIR", "T1w", "T2w", "T1wCE"]

test_df = pd.read_csv(data_dir / "sample_submission.csv")
train_df = pd.read_csv(data_dir / "train_labels.csv")
sample_submission = pd.read_csv(data_dir / "sample_submission.csv")

In [4]:
def load_dicom(path, size = 224):
    ''' 
    Reads a DICOM image, standardizes so that the pixel values are between 0 and 1, then rescales to 0 and 255
    
    Not super sure if this kind of scaling is appropriate, but everyone seems to do it. 
    '''
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    # transform data into black and white scale / grayscale
#     data = data - np.min(data)
    if np.max(data) != 0:
        data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return cv2.resize(data, (size, size))

def get_all_image_paths(brats21id, image_type, folder='train'): 
    '''
    Returns an arry of all the images of a particular type for a particular patient ID
    '''
    assert(image_type in mri_types)
    
    patient_path = os.path.join(
        "../input/rsna-miccai-brain-tumor-radiogenomic-classification/%s/" % folder, 
        str(brats21id).zfill(5),
    )

    paths = sorted(
        glob.glob(os.path.join(patient_path, image_type, "*")), 
        key=lambda x: int(x[:-4].split("-")[-1]),
    )
    
    num_images = len(paths)
    
    start = int(num_images * 0.25)
    end = int(num_images * 0.75)

    interval = 3
    
    if num_images < 10: 
        interval = 1
        
    
    return np.array(paths[start:end:interval])

def get_all_images(brats21id, image_type, folder='train', size=225):
    return [load_dicom(path, size) for path in get_all_image_paths(brats21id, image_type, folder)]

def get_all_data_for_train(image_type, image_size=32):
    global train_df
    
    X = []
    y = []
    train_ids = []

    for i in tqdm(train_df.index):
        x = train_df.loc[i]
        images = get_all_images(int(x['BraTS21ID']), image_type, 'train', image_size)
        label = x['MGMT_value']

        X.append(images)
        y.append(label)
        train_ids.append(int(x['BraTS21ID']))
        assert(len(X) == len(y))
    return np.array(X), np.array(y), np.array(train_ids)

def get_all_data_for_test(image_type, image_size=32):
    global test_df
    
    X = []
    test_ids = []

    for i in tqdm(test_df.index):
        x = test_df.loc[i]
        images = get_all_images(int(x['BraTS21ID']), image_type, 'test', image_size)
        X.append(images)
        test_ids.append(int(x['BraTS21ID']))

    return np.array(X), np.array(test_ids)


In [5]:
X_train, y_train, trainidt = get_all_data_for_train('T1wCE', image_size=64)
X_test, testidt = get_all_data_for_test('T1wCE', image_size=64)

In [6]:
X_train= np.array(X_train)
X_train.shape

In [7]:
X_test= np.array(X_test)
X_test.shape

# Loading the Trained model

In [9]:
from tensorflow import keras
model = keras.models.load_model('../input/64-t1wce/final_model_64.h5')

In [10]:
model.summary()

# Create a new model which gives trained model hidden layer as output 

In [13]:
layer_name = 'dense_2'
model2= keras.Model(inputs=model.input, outputs=model.get_layer(layer_name).output)

In [14]:
model2.summary()

###  RNN needs input data as (batch, lenght of data(here number of images in one 3D DICOM), feature of data (here output of hidden layer of shape 64)

Get prediction from the hidden layer and save it in list and reshape it according to need to feed into RNN

In [15]:
FC_train = []

for x in X_train:
    lis = []
    for im in x:
       # print(im.shape)
        im = tf.expand_dims(im, axis=2)
        im = tf.expand_dims(im, axis=0)
        #print(im.shape)
        lis.append(model2.predict(im))
    FC_train.append(lis)

In [16]:
FC_val = []

for x in X_test:
    lis = []
    for im in x:
       # print(im.shape)
        im = tf.expand_dims(im, axis=2)
        im = tf.expand_dims(im, axis=0)
        #print(im.shape)
        lis.append(model2.predict(im))
    FC_val.append(lis)

In [19]:
print(len(FC_train))
print(len(FC_train[0]))
print(FC_train[0][0].shape)

In [20]:
#  Create array with 0 value to use it as padding of window size  
temp = np.zeros((1,64), dtype="float32")

In [21]:
# Padding with window size 64

In [22]:
new_emb_val = []
for f in FC_val:
    if len(f)< 64:
        for i in range(0, (64-len(f))):
            f.append(temp)
    else:
        for i in range(0, (len(f)- 64)):
            f.pop()
    new_emb_val.append(f)
    
            

In [23]:
new_emb_train = []
for f in FC_train:
    if len(f)< 64:
        for i in range(0, (64-len(f))):
            f.append(temp)
    else:
        for i in range(0, (len(f)- 64)):
            f.pop()
    new_emb_train.append(f)
    
            

# Converting into array and get desired shape for RNN

In [24]:
new_emb_val = np.array(new_emb_val)
new_emb_train = np.array(new_emb_train)

In [25]:
new_emb_val.shape

In [26]:
new_emb_val = np.squeeze(new_emb_val, axis=2)
new_emb_train = np.squeeze(new_emb_train, axis=2)

In [27]:
new_emb_val.shape

In [None]:
 new_emb_train.shape

In [None]:
y_train = to_categorical(y_train)

# Creating Model

In [29]:
# Model 1 
tf.random.set_seed(42)
model_rnn = keras.Sequential()
model_rnn.add(keras.layers.Bidirectional(keras.layers.LSTM(16, return_sequences=True), input_shape=(64, 64)))
model_rnn.add(keras.layers.Bidirectional(keras.layers.LSTM(8)))
model_rnn.add(keras.layers.Dense(2))
model_rnn.add(keras.layers.Activation('softmax'))
roc_auc = tf.keras.metrics.AUC(name='roc_auc', curve='ROC')
model_rnn.compile(
        loss="categorical_crossentropy", optimizer="adam", metrics=[roc_auc])


# Compiling and training model

In [33]:
model_rnn.fit(new_emb_train, y_train, validation_split=0.2, epochs=50)

# Save the model

In [34]:
model_rnn.save("final.h5")

# Prediction and Creating submission CSV

In [35]:
y_pred = model_rnn.predict(new_emb_val)
y_pred

pred = np.argmax(y_pred, axis=1) #

y_pred = model_rnn.predict(new_emb_val)
pred = np.argmax(y_pred, axis=1) #

result = pd.DataFrame(testidt)
result[1] = pred
pred

In [36]:
result.columns=['BraTS21ID','MGMT_value']
result['BraTS21ID'] = sample_submission['BraTS21ID']

result['MGMT_value'] = result['MGMT_value'].apply(lambda x:round(x*10)/10)
result.to_csv('submission.csv',index=False)
result

# Other models

In [None]:
"""model1 = keras.Sequential()
model1.add(keras.layers.Conv1D(filters=32, kernel_size=7, activation='relu', input_shape=(64, 64)) )
model1.add(keras.layers.MaxPool1D(pool_size=5))
model1.add(keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu'))
model1.add(keras.layers.GRU(32, dropout=0.1, recurrent_dropout=0.5))
model1.add(keras.layers.Dense(2))
model1.add(keras.layers.Activation('softmax'))
roc_auc = tf.keras.metrics.AUC(name='roc_auc', curve='ROC')
model1.compile(
        loss="categorical_crossentropy", optimizer="adam", metrics=[roc_auc])
model1.summary()
model1.fit(new_emb, y_train, validation_split=0.2, epochs=50)
"""