<a href="https://colab.research.google.com/github/AllergictoCrustaceans/ML/blob/main/tl_finetune_patch_camelyon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Detect Metastatic Tissue 

Prompt: Given histopathologic scans of lymph node sections, classify whether a particular image has the presence of metastatic tissue. 

Where is data from: https://patchcamelyon.grand-challenge.org/

ML Type: CNN, Image Classification, Transfer Learning, Fine tuning



***

NOTE: 

This ML project is particularly long to train, given that the dataset has over 300k data samples. This dataset may not be ideal to train under free Google Colab conditions. However, I will still upload my code, even though I have no idea how well this model is set up to learn. 

Meanwhile, I will find a solution. 

In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, LeakyReLU, Dropout
from tensorflow.keras.layers.experimental.preprocessing import Resizing, Rescaling
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import pandas as pd
import zipfile
import requests
import io

## Load

In [None]:
(train_ds, test_ds, val_ds), info = tfds.load('patch_camelyon',
                     split=['train', 'test', 'validation'],
                     shuffle_files=True,
                     as_supervised=True,
                     with_info=True)

## Inspect

In [None]:
num_classes = info.features['label'].num_classes
print(num_classes)

fig = tfds.show_examples(train_ds, info)

## Clean

Nothing to clean. 

## Split

It's already split from tfds.load(...)

In [None]:
# checked tensor shapes from info.

## Preprocess

In [None]:
IMG_SIZE=96

resize_rescale = tf.keras.Sequential([
                 Resizing(IMG_SIZE, IMG_SIZE),
                 Rescaling(1./255.0)                     
])

data_augmentation = tf.keras.Sequential([
                                         tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal_and_vertical'),
                                         tf.keras.layers.experimental.preprocessing.RandomRotation(0.2)])

In [None]:
# What's important about the order of cache, batch, and prefetch?

batch_size=128
AUTOTUNE=tf.data.AUTOTUNE

# Build training pipeline
def prepare(ds, shuffle=False, augment=False):
    ds = ds.map(lambda x, y: (resize_rescale(x), y),
                num_parallel_calls=AUTOTUNE)
    
    if shuffle:
        ds = ds.shuffle(1000)
    
    ds = ds.batch(batch_size)

    if augment:
        ds = ds.map(lambda x, y: (data_augmentation(x), y),
                    num_parallel_calls=AUTOTUNE)
    
    return ds.prefetch(buffer_size=AUTOTUNE)

In [None]:
train_ds = prepare(train_ds, shuffle=True, augment=True)
test_ds = prepare(test_ds)
val_ds = prepare(val_ds)

In [None]:
# REMINDER: Since this is image classification, you don't need to one-hot NOTHING.

## Create Plain Model

In [None]:
METRICS = [
           tf.keras.metrics.TruePositives(name='tp'),
           tf.keras.metrics.TrueNegatives(name='tn'),
           tf.keras.metrics.FalsePositives(name='fp'),
           tf.keras.metrics.FalseNegatives(name='fn'),
           tf.keras.metrics.BinaryAccuracy(name='accuracy'),
           tf.keras.metrics.Precision(name='precision'),
           tf.keras.metrics.Recall(name='recall'),
           tf.keras.metrics.AUC(name='AUC')
]

def build(metrics=METRICS):
    model = tf.keras.Sequential([
                                 Conv2D(16, 3, padding='same',
                                        activation=LeakyReLU(alpha=0.3),
                                        input_shape=(96, 96, 3)),
                                MaxPooling2D(),
                                Conv2D(64, 3, padding='same',
                                        activation=LeakyReLU(alpha=0.3)),
                                MaxPooling2D(),
                                Conv2D(128, 3, padding='same',
                                        activation=LeakyReLU(alpha=0.3)),
                                MaxPooling2D(),
                                Flatten(),
                                Dense(256, activation=LeakyReLU(alpha=0.3)),
                                Dropout(0.2),
                                Dense(1, activation='sigmoid')
    ])

    model.compile(
        optimizer='adam',
        loss=tf.keras.losses.BinaryCrossentropy(),
        metrics=metrics
    )

    return model

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_AUC',
    verbose=1,
    patience=10,
    mode='max',
    restore_best_weights=True
)

epochs = 10
batch_size = 128

plain_model = build()
plain_history = plain_model.fit(
    train_ds,
    epochs=epochs,
    batch_size=batch_size,
    callbacks=[early_stopping],
    validation_data=val_ds,
)

In [None]:
loss, acc = model.evaluate(test_ds)
print("Accuracy", acc)

### Plots and Evaluation Metrics

In [None]:
acc = plain_history.history['accuracy']
val_acc = plain_history.history['val_accuracy']

loss = plain_history.history['loss']
val_loss = plain_history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

There's a high bias problem. 

### With Transfer Learning

In [None]:
IMG_SHAPE= (96, 96, 3)
tl_model = tf.keras.applications.ResNet50(
    input_shape=IMG_SHAPE,
    include_top=False,
    weights='imagenet'
)

In [None]:
image_batch, label_batch = next(iter(train_ds))
feature_batch = tl_model(image_batch)
print(feature_batch.shape)

In [None]:
tl_model.trainable = False

In [None]:
tl_model.summary()

In [None]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
print(feature_batch_average.shape)

In [None]:
prediction_layer = tf.keras.layers.Dense(1)
prediction_batch = prediction_layer(feature_batch_average)
print(prediction_batch.shape)

In [None]:
preprocess_input = tf.keras.applications.resnet50.preprocess_input
inputs = tf.keras.Input(shape=(96, 96, 3))
x = data_augmentation(inputs)
x = preprocess_input(x)
x = tl_model(x, training=False)
x = global_average_layer(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = prediction_layer(x)

model = tf.keras.Model(inputs, outputs)

In [None]:
METRICS = [
           tf.keras.metrics.TruePositives(name='tp'),
           tf.keras.metrics.TrueNegatives(name='tn'),
           tf.keras.metrics.FalsePositives(name='fp'),
           tf.keras.metrics.FalseNegatives(name='fn'),
           tf.keras.metrics.BinaryAccuracy(name='accuracy'),
           tf.keras.metrics.Precision(name='precision'),
           tf.keras.metrics.Recall(name='recall'),
           tf.keras.metrics.AUC(name='AUC')
]

base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_AUC',
    verbose=1,
    patience=10,
    mode='max',
    restore_best_weights=True
)

epoch=10

history = model.fit(train_ds,
                    epochs=epoch,
                    callbacks=[early_stopping],
                    validation_data=(val_ds))

### Fine Tuning

In [None]:
tl_model.trainable = True

In [None]:
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(tl_model.layers))

# Fine-tune from this layer onwards
fine_tune_at = 100

# Freeze all the layers before the `fine_tune_at` layer
for layer in tl_model.layers[:fine_tune_at]:
  layer.trainable =  False

In [None]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer = tf.keras.optimizers.RMSprop(lr=base_learning_rate/10),
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
len(model.trainable_variables)

In [None]:
fine_tune_epochs = 10
initial_epochs = 10
total_epochs =  initial_epochs + fine_tune_epochs

history_fine = model.fit(train_ds,
                         epochs=total_epochs,
                         initial_epoch=history.epoch[-1],
                         validation_data=val_ds)

In [None]:
acc += history_fine.history['accuracy']
val_acc += history_fine.history['val_accuracy']

loss += history_fine.history['loss']
val_loss += history_fine.history['val_loss']

In [None]:
plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.ylim([0.8, 1])
plt.plot([initial_epochs-1,initial_epochs-1],
          plt.ylim(), label='Start Fine Tuning')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.ylim([0, 1.0])
plt.plot([initial_epochs-1,initial_epochs-1],
         plt.ylim(), label='Start Fine Tuning')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [None]:
loss, accuracy = model.evaluate(test_dataset)
print('Test accuracy :', accuracy)