In [None]:
# Import libraries
import tensorflow as tf
from tensorflow import keras
from keras import layers
from tensorflow.keras.applications.resnet50 import preprocess_input, ResNet50
import os
import pandas as pd
import numpy as np
import zipfile
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import ConfusionMatrixDisplay, classification_report

In [None]:
# Import and prepare the first split of datasets
path = os.path.join("images/archive/bloodcells_dataset")

BATCH_SIZE = 32
IMG_HEIGHT = 224
IMG_WIDTH = 224
dataset, test_dataset = keras.utils.image_dataset_from_directory(directory=path, image_size=(IMG_WIDTH, IMG_HEIGHT), batch_size=BATCH_SIZE, label_mode="int", validation_split=0.2, subset="both", shuffle=True, seed=42)

# Get class names
class_names = dataset.class_names
print(class_names)

In [None]:
# Plot 16 images from the dataset as examples
fig, ax = plt.subplots(4, 4, figsize=(12,12))
ax = ax.flat
for images, labels in dataset.take(1):
  for i in range(16):
    ax[i].set_title(class_names[labels[i].numpy()])
    ax[i].set_xticks([])
    ax[i].set_yticks([])
    ax[i].imshow(images[i].numpy().astype("uint8"))

# Check if classes are balanced
labels = np.concatenate([label for image, label in dataset], axis=0)
unique, counts = np.unique(labels, return_counts=True)
plt.pie(x=counts, labels=class_names, autopct='%.1f%%', textprops={'size': 'smaller'},
        colors=sns.color_palette('pastel')[0:8])
plt.title("Class distribution")
plt.show()

In [None]:
# Split dataset in train and validation set
num_elements = len(dataset)
train_size = int(0.8 * num_elements)
val_dataset = dataset.skip(train_size).prefetch(tf.data.AUTOTUNE)
train_dataset = dataset.take(train_size).prefetch(tf.data.AUTOTUNE)

# Check length
len(train_dataset), len(val_dataset), len(test_dataset)




In [None]:
# Check format
for image_batch, labels_batch in train_dataset.take(1):
  print(f"Train data: {image_batch.shape}")
  print(f"Train labels: {labels_batch.shape}")

for image_batch, labels_batch in val_dataset.take(1):
  print(f"Validation data: {image_batch.shape}")
  print(f"Validation labels: {labels_batch.shape}")

for image_batch, labels_batch in test_dataset.take(1):
  print(f"Test data: {image_batch.shape}")
  print(f"Test labels: {labels_batch.shape}")

In [None]:
# Check pixel intesities
for image, label in train_dataset.take(1):
  print(tf.reduce_max(image))
  print(tf.reduce_min(image))

In [None]:
# Create a lab-book to track the different experiments
lab_book = {}

# Create early stopping callback
early_cb = keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True,
                                         monitor="val_accuracy", min_delta=0.005)

In [None]:
# Set up data augmentation layers
data_augmentation = keras.Sequential([
    keras.layers.RandomFlip("horizontal_and_vertical"),
    keras.layers.RandomRotation(0.4)
    ])

In [None]:
# Prepare data for ResNet50 input
train_dataset_resnet = train_dataset.map(lambda X, y: (preprocess_input(X), y)).prefetch(tf.data.AUTOTUNE)
val_dataset_resnet = val_dataset.map(lambda X, y: (preprocess_input(X), y)).prefetch(tf.data.AUTOTUNE)
test_dataset_resnet = test_dataset.map(lambda X, y: (preprocess_input(X), y)).prefetch(tf.data.AUTOTUNE)

# Check format
for image_batch, labels_batch in train_dataset_resnet.take(1):
  print(f"Train data: {image_batch.shape}")
  print(f"Train labels: {labels_batch.shape}")

for image_batch, labels_batch in val_dataset_resnet.take(1):
  print(f"Validation data: {image_batch.shape}")
  print(f"Validation labels: {labels_batch.shape}")

for image_batch, labels_batch in test_dataset_resnet.take(1):
  print(f"Test data: {image_batch.shape}")
  print(f"Test labels: {labels_batch.shape}")

In [None]:
# Check pixel intesities
for image, label in train_dataset_resnet.take(1):
  print(tf.reduce_max(image))
  print(tf.reduce_min(image))

In [None]:
# Name the model
name = "Model_5"

# Download the pretrained resnet model and save it as a layer
feature_extraction_layer = ResNet50(include_top=False,
                                    input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))

# Freeze all layers of base model
feature_extraction_layer.trainable = False

# Build model
tf.random.set_seed(42)
model_5 = keras.Sequential([data_augmentation,
                            feature_extraction_layer,
                            layers.GlobalAvgPool2D(),
                            layers.Dense(8, activation="softmax")
                            ], name=name)

# Compile model
model_5.compile(loss=keras.losses.SparseCategoricalCrossentropy(),
                optimizer=keras.optimizers.Adam(),
                metrics=["accuracy"])

# Fit model
history_5 = model_5.fit(train_dataset_resnet,
                        validation_data=val_dataset_resnet,
                        epochs=1,
                        callbacks=[early_cb])

# Write lab-book
train_accuracy = model_5.evaluate(train_dataset_resnet)[1]
val_accuracy = model_5.evaluate(val_dataset_resnet)[1]
lab_book[name] = {"train_accuracy": train_accuracy, "val_accuracy": val_accuracy}

In [None]:
# Check test set accuracy of model 5 since base model will be changed afterwards
test_scores = {}
test_accuracy = model_5.evaluate(test_dataset_resnet)[1]
test_scores["Model_5"] = test_accuracy

In [None]:
# Name the model
name = "Model_6"

# Unfreeze all layers of base model
feature_extraction_layer.trainable = True

# Build model
tf.random.set_seed(42)
model_6 = keras.Sequential([data_augmentation,
                            feature_extraction_layer,
                            layers.GlobalAvgPool2D(),
                            layers.Dense(8, activation="softmax")
                            ], name=name)


# Compile model
model_6.compile(loss=keras.losses.SparseCategoricalCrossentropy(),
                optimizer=keras.optimizers.Adam(learning_rate = 0.0001),
                metrics=["accuracy"])

# Fit model
history_6 = model_6.fit(train_dataset_resnet,
                        validation_data=val_dataset_resnet,
                        epochs=1,
                        callbacks=[early_cb])

# Write lab-book
train_accuracy = model_6.evaluate(train_dataset_resnet)[1]
val_accuracy = model_6.evaluate(val_dataset_resnet)[1]
lab_book[name] = {"train_accuracy": train_accuracy, "val_accuracy": val_accuracy}