# Cropped ROI Model

### Imports

In [None]:
import os

import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import splitfolders
import plotly.graph_objects as go
import seaborn as sns
from sklearn.metrics import confusion_matrix
from cropped_sort_images import sort_images_cropped
from tensorflow.keras import Sequential, layers
from tensorflow.keras.applications.resnet import ResNet50
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.regularizers import L2
from plotly.subplots import make_subplots


###  Preprocessing

After data is downloaded from Kaggle. Set the appropriate paths to the data.

In [None]:
df = pd.read_csv("../data/csv/meta.csv")

In [None]:
IMAGE_PATH = '../data/jpeg'
CSV_PATH = '../data/csv'
TRAIN_IMAGE_PATH = '../data/train'
TEST_IMAGE_PATH = '../data/test'
DATA_SPLIT_PATH = '../data_split'

sort_images_cropped(IMAGE_PATH, CSV_PATH, TRAIN_IMAGE_PATH, TEST_IMAGE_PATH)

# split the train data into train and validation in a new folder
splitfolders.ratio(TRAIN_IMAGE_PATH, output="../data_split", seed=1337, ratio=(.8, .2))

In [None]:
IMG_SIZE = 256

In [None]:
# Loads training data
train_datagen = ImageDataGenerator(
    rescale=1/255,  # rescale the tensor values to [0,1]
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
train_generator = train_datagen.flow_from_directory(
    directory=f'{DATA_SPLIT_PATH}/train',
    target_size=(IMG_SIZE, IMG_SIZE),
    color_mode="rgb",
    class_mode="binary",
    batch_size=32,
    shuffle=True,
)

In [None]:
train_generator.class_indices

In [None]:
# Load validation data
val_datagen = ImageDataGenerator(
    rescale=1/255  # rescale the tensor values to [0,1]
)
val_generator = val_datagen.flow_from_directory(
    directory=f'{DATA_SPLIT_PATH}/val',
    target_size=(IMG_SIZE, IMG_SIZE),
    color_mode="rgb",
    class_mode="binary",
    batch_size=32,
    shuffle=True
)

In [None]:
test_datagen = ImageDataGenerator(
    rescale=1/255  # rescale the tensor values to [0,1]
)
test_generator = test_datagen.flow_from_directory(
    directory=f'{TEST_IMAGE_PATH}',
    classes=['BENIGN', 'MALIGNANT'],
    target_size=(IMG_SIZE, IMG_SIZE),
    color_mode="rgb",
    class_mode=None,
    batch_size=1,
    shuffle=True,
    seed=123
)

#### Implementation of cropped ROI images classifcation model

If you already have a pretrained model, you can skip this step.

In [None]:
MODEL_FILE = 'cropped_model.h5' # file to save the model
EPOCHS = 50

In [None]:
# Load the ResNet50 model
pretrained_model = ResNet50(
    include_top=False,
    weights="imagenet",
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    pooling='max',
    classes=2,
)

model_layers = [
    pretrained_model,
    layers.Dense(units=512, activation='relu', kernel_regularizer=L2(
        0.0001), bias_regularizer=L2(0.0001)),
    layers.Dense(units=128, activation='relu', kernel_regularizer=L2(
        0.0001), bias_regularizer=L2(0.0001)),
    layers.Dense(units=1, activation='sigmoid'),
]



In [None]:
model = Sequential(layers=model_layers)

In [None]:
# compile the model
model.compile(optimizer = Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# create a callback to save the model and stop training if the validation loss does not improve
custom_callbacks = [
    EarlyStopping(
        monitor='val_loss',
        mode='min',
        patience=5,
        verbose=1
    ),
    ModelCheckpoint(
        filepath=MODEL_FILE,
        monitor='val_loss',
        mode='min',
        verbose=1,
        save_best_only=True
    ),
]

In [None]:
# train the model
history = model.fit(
    train_generator,
    epochs=EPOCHS,
    steps_per_epoch=len(train_generator),
    validation_data=val_generator,
    validation_steps=len(val_generator),
    callbacks=custom_callbacks,
    verbose=2
)


In [None]:
# plot the training and validation accuracy and loss at each epoch using plotly
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(y=history.history['accuracy'], name='train accuracy'), secondary_y=True)
fig.add_trace(go.Scatter(y=history.history['val_accuracy'], name='val accuracy'), secondary_y=True)
fig.add_trace(go.Scatter(y=history.history['val_loss'], name='val loss'))
fig.add_trace(go.Scatter(y=history.history['loss'], name='loss'), )
fig.update_layout(title='Accuracy and Loss', xaxis_title='Epoch', yaxis_title='Accuracy/Loss')
fig.update_layout(yaxis_range=[0,2])
fig.show()

#### Testing

In [None]:
# load model from file
import tensorflow as tf
SAVED_MODEL_FILE = 'cropped_model.h5'
model = tf.keras.models.load_model(SAVED_MODEL_FILE)

In [None]:
# predict on test data
preds = model.predict(test_generator, verbose=1)

In [None]:
# create a confusion matrix
matrix = confusion_matrix(test_generator.classes, preds.round())
sns.heatmap(matrix, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.savefig('confusion_matrix_test.png')


In [None]:
# calculate the accuracy, recall, precision and f1 score
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

print('Accuracy: ', accuracy_score(test_generator.classes, preds.round()))
print('Recall: ', recall_score(test_generator.classes, preds.round()))
print('Precision: ', precision_score(test_generator.classes, preds.round()))
print('F1 Score: ', f1_score(test_generator.classes, preds.round()))


In [None]:
# print number of true positives, true negatives, false positives and false negatives
tn, fp, fn, tp = confusion_matrix(test_generator.classes, preds.round()).ravel()
print('True Positives: ', tp)
print('True Negatives: ', tn)
print('False Positives: ', fp)
print('False Negatives: ', fn)


In [None]:
# calculate the accuracy, recall, precision and f1 score for each class
from sklearn.metrics import classification_report

res = classification_report(test_generator.classes, preds.round(), target_names=['BENIGN', 'MALIGNANT'], output_dict=True)

In [None]:
df = pd.DataFrame(res).T
df.drop(columns='support', inplace=True)
df.drop(['macro avg', 'weighted avg'], inplace=True)
df.head()

In [None]:
# create a roc curve
from sklearn.metrics import roc_curve, auc

fpr, tpr, thresholds = roc_curve(test_generator.classes, preds)
roc_auc = auc(fpr, tpr)

plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange',
            lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.savefig('roc_curve.png')
