- Data source: https://www.kaggle.com/mateuszbuda/lgg-mri-segmentation

# IMPORT LIBRARIES AND DATASETS

In [None]:
# Configuration and paths
import os
from pathlib import Path

# Set base directory (notebooks are in notebooks/, data and models are in parent directory)
BASE_DIR = Path.cwd().parent
DATA_DIR = BASE_DIR / 'data'
MODELS_DIR = BASE_DIR / 'models'

# Ensure directories exist
DATA_DIR.mkdir(exist_ok=True)
MODELS_DIR.mkdir(exist_ok=True)

# Configuration constants
BATCH_SIZE = 16
LEARNING_RATE = 1e-4
IMAGE_SIZE = (256, 256)
EPOCHS = 50

# Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import zipfile
import cv2
from skimage import io
import tensorflow as tf
from tensorflow.python.keras import Sequential
from tensorflow.keras import layers, optimizers
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.initializers import glorot_uniform
from keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from IPython.display import display
from tensorflow.keras import backend as K
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, roc_auc_score,RocCurveDisplay, precision_score, f1_score
import random

# Configure matplotlib for better visualization
plt.style.use('default')
%matplotlib inline

In [None]:
# Google Colab code removed - running locally
# Data should be in the 'data' directory relative to the notebook

In [None]:
# Google Colab directory change removed - running locally
# Working directory is already set to the project root


In [None]:
# Load data from data directory
brain_df = pd.read_csv(DATA_DIR / 'route_label.csv', index_col=0)
brain_df.head(10)

# RESNET50 CLASSIFIER MODEL 

https://arxiv.org/pdf/1512.03385.pdf

In [None]:
# Drop the patient id column
brain_df.shape

In [None]:
# Convert the data in mask column to string format, to use categorical mode in flow_from_dataframe

brain_df['mask'] = brain_df['mask'].apply(lambda x: str(x))

In [None]:
brain_df.info()

In [None]:
# split the data into train and test data

from sklearn.model_selection import train_test_split

train, test = train_test_split(brain_df, test_size = 0.15,random_state=42)

In [None]:
train.to_csv('train.csv')
test.to_csv('test.csv')

In [None]:

# Create a data generator which scales the data from 0 to 1 and makes validation split of 0.15
datagen = ImageDataGenerator(
    rescale=1./255.,
    validation_split = 0.15,
    rotation_range=10,
    width_shift_range=0.05,
    height_shift_range=0.05,
    shear_range=0.05,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest')


In [None]:
train_generator=datagen.flow_from_dataframe(
dataframe=train,
directory=str(BASE_DIR),
x_col='image_path',
y_col='mask',
subset="training",
batch_size=BATCH_SIZE,
shuffle=True,
class_mode="categorical",
target_size=IMAGE_SIZE
)


valid_generator=datagen.flow_from_dataframe(
dataframe=train,
directory=str(BASE_DIR),
x_col='image_path',
y_col='mask',
subset="validation",
batch_size=BATCH_SIZE,
shuffle=True,
class_mode="categorical",
target_size=IMAGE_SIZE)

# Create a data generator for test images
test_datagen=ImageDataGenerator(rescale=1./255.)

test_generator=test_datagen.flow_from_dataframe(
dataframe=test,
directory=str(BASE_DIR),
x_col='image_path',
y_col='mask',
batch_size=BATCH_SIZE,
shuffle=False,
class_mode='categorical',
target_size=IMAGE_SIZE)


In [None]:
# Get the ResNet50 base model
basemodel = ResNet50(weights = 'imagenet', include_top = False, input_tensor = Input(shape=(256, 256, 3)))

In [None]:
basemodel.summary()

In [None]:
# freeze the model weights

for layer in basemodel.layers:
  layers.trainable = False

In [None]:
# Add classification head to the base model

headmodel = basemodel.output
headmodel = AveragePooling2D(pool_size = (4,4))(headmodel)
headmodel = Flatten(name= 'flatten')(headmodel)
headmodel = Dense(256, activation = "relu")(headmodel)
headmodel = Dropout(0.3)(headmodel)#
headmodel = Dense(256, activation = "relu")(headmodel)
headmodel = Dropout(0.3)(headmodel)
headmodel = Dense(256, activation = "relu")(headmodel)
headmodel = Dropout(0.3)(headmodel)
headmodel = Dense(2, activation = 'softmax')(headmodel)

model = Model(inputs = basemodel.input, outputs = headmodel)

In [None]:
model.summary()

In [None]:
# compile the model

model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics= ["accuracy"])

In [None]:
# use early stopping to exit training if validation loss is not decreasing even after certain epochs (patience)
earlystopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=20)

# save the best model with least validation loss
checkpointer = ModelCheckpoint(filepath=str(MODELS_DIR / "classifier-resnet-model2.keras"), verbose=1,save_best_only=True)


In [None]:
history = model.fit(train_generator, steps_per_epoch= train_generator.n // 16, epochs = 1, validation_data= valid_generator, validation_steps= valid_generator.n // 16, callbacks=[checkpointer, earlystopping])

In [None]:
# save the model architecture to json file for future use
checkpointer = ModelCheckpoint(filepath=str(MODELS_DIR / "classifier-resnet-model3.keras"), verbose=1,save_best_only=True)
model.load_weights(str(MODELS_DIR / "classifier-resnet-model3.keras"))
history = model.fit(train_generator, steps_per_epoch= train_generator.n // 16, epochs = 100, validation_data= valid_generator, validation_steps= valid_generator.n // 16, callbacks=[checkpointer, earlystopping])

In [None]:

model.load_weights(str(MODELS_DIR / "classifier-resnet-model3.keras"))
model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics= ["accuracy"])

checkpointer = ModelCheckpoint(filepath=str(MODELS_DIR / "classifier-resnet-model4.keras"), verbose=1,save_best_only=True)

lr_reduce=ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, min_lr=1e-11),
history = model.fit(train_generator, steps_per_epoch= train_generator.n // 16, epochs = 100, validation_data= valid_generator, validation_steps= valid_generator.n // 16, callbacks=[checkpointer, earlystopping,lr_reduce])

MINI CHALLENGE #5:
- Change the network architecture by adding more/less dense layers, neurons or dropout.
- print out the model summary and compare the total number of trainable parameters between the original and new model

In [None]:

model.load_weights(str(MODELS_DIR / "classifier-resnet-model4.keras"))
model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics= ["accuracy"])

checkpointer = ModelCheckpoint(filepath=str(MODELS_DIR / "classifier-resnet-model5.keras"), verbose=1,save_best_only=True)

lr_reduce=ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, min_lr=1e-12),
history = model.fit(train_generator, steps_per_epoch= train_generator.n // 16, epochs = 100, validation_data= valid_generator, validation_steps= valid_generator.n // 16, callbacks=[checkpointer, earlystopping,lr_reduce])

In [None]:
model.load_weights(str(MODELS_DIR / "classifier-resnet-model5.keras"))
model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics= ["accuracy"])

checkpointer = ModelCheckpoint(filepath=str(MODELS_DIR / "classifier-resnet-model6.keras"), verbose=1,save_best_only=True)

lr_reduce=ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, min_lr=1e-12),
history = model.fit(train_generator, steps_per_epoch= train_generator.n // 16, epochs = 100, validation_data= valid_generator, validation_steps= valid_generator.n // 16, callbacks=[checkpointer, earlystopping,lr_reduce])

In [None]:
model=load_model(str(MODELS_DIR / "classifier-resnet-model7.keras"))
model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics= ["accuracy"])

checkpointer = ModelCheckpoint(filepath=str(MODELS_DIR / "classifier-resnet-model8.keras"), verbose=1,save_best_only=True)

lr_reduce=ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, min_lr=1e-12),
history = model.fit(train_generator, steps_per_epoch= train_generator.n // 16, epochs = 100, validation_data= valid_generator, validation_steps= valid_generator.n // 16, callbacks=[checkpointer, earlystopping,lr_reduce])

In [None]:
model=load_model(str(MODELS_DIR / "classifier-resnet-model8.keras"))
model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics= ["accuracy"])

checkpointer = ModelCheckpoint(filepath=str(MODELS_DIR / "classifier-resnet-model9.keras"), verbose=1,save_best_only=True)

lr_reduce=ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, min_lr=1e-12),
history = model.fit(train_generator, steps_per_epoch= train_generator.n // 16, epochs = 100, validation_data= valid_generator, validation_steps= valid_generator.n // 16, callbacks=[checkpointer, earlystopping,lr_reduce])

# TRAINED MODEL TESTING

In [None]:
# Load pretrained modeL

model=load_model(str(MODELS_DIR / "classifier-resnet-model9.keras"))


In [None]:
test_predict = model.predict(test_generator, steps = test_generator.n // 16, verbose =1)

In [None]:
# Obtain the predicted class from the model prediction
predict = []

for i in test_predict:
  predict.append(str(np.argmax(i)))

predict = np.asarray(predict)
predict = predict.astype("object")

In [None]:
# since we have used test generator, it limited the images to len(predict), due to batch size
original = np.asarray(test['mask'])[:len(predict)]
len(original)

In [None]:
# Obtain the accuracy of the model

print("accuracy_score: ", accuracy_score(original, predict))
print("f1_score: ", f1_score(original, predict,pos_label="1"))
print("precision_score: ", precision_score(original, predict,pos_label="1"))
print("recall_score: ", recall_score(original, predict,pos_label="1"))

In [None]:
from sklearn.metrics import classification_report

report = classification_report(original, predict, labels = [0,1])
print(report)

In [None]:
# plot the confusion matrix
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(original, predict)
plt.figure(figsize = (7,7))
sns.heatmap(cm, annot=True, fmt="d");

In [None]:
original=original.astype("int")
predict=predict.astype("int")
RocCurveDisplay.from_predictions(original, predict);
