In [None]:
# Mount Google Drive to access dataset
from google.colab import drive
drive.mount('/content/drive')

# Install required packages (if not already installed)
!apt-get install -y libopenslide0 libopenslide-dev
!pip install openslide-python albumentations tensorflow streamlit matplotlib scikit-learn opencv-python tqdm

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libopenslide-dev is already the newest version (3.4.1+dfsg-5build1).
libopenslide0 is already the newest version (3.4.1+dfsg-5build1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from PIL import Image
import openslide

import tensorflow as tf
from tensorflow.keras.applications import ResNet50, EfficientNetB0
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical

import albumentations as A

In [None]:
import random
seed = 42
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

In [None]:
# Define paths
dataset_path = "/content/drive/MyDrive/PANDA_dataset/train_images"
labels_path = "/content/drive/MyDrive/PANDA_dataset/train.csv"

# Load labels
labels_df = pd.read_csv(labels_path)
print(labels_df.head())
print(labels_df.shape)
labels_df.columns

                           image_id data_provider  isup_grade gleason_score
0  0005f7aaab2800f6170c399693a96917    karolinska           0           0+0
1  000920ad0b612851f8e01bcc880d9b3d    karolinska           0           0+0
2  0018ae58b01bdadc8e347995b69f99aa       radboud           4           4+4
3  001c62abd11fa4b57bf7a6c603a11bb9    karolinska           4           4+4
4  001d865e65ef5d2579c190a0e0350d8f    karolinska           0           0+0
(10616, 4)


Index(['image_id', 'data_provider', 'isup_grade', 'gleason_score'], dtype='object')

In [None]:
labels_df = labels_df.sample(3000,random_state=42)

In [None]:
"""
# Define augmentation pipeline
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.ColorJitter(p=0.5)
])

def augment_patch(patch):
    return transform(image=patch)["image"]
"""

'\n# Define augmentation pipeline\ntransform = A.Compose([\n    A.HorizontalFlip(p=0.5),\n    A.VerticalFlip(p=0.5),\n    A.RandomRotate90(p=0.5),\n    A.ColorJitter(p=0.5)\n])\n\ndef augment_patch(patch):\n    return transform(image=patch)["image"]\n'

In [None]:
from tqdm import tqdm

IMG_SIZE = 256   # resize for model
X = []
y = []

for idx, row in tqdm(labels_df.iterrows(), total=len(labels_df)):
    img_id = row["image_id"]
    label = row["isup_grade"]

    img_path = os.path.join(dataset_path, f"{img_id}.png")

    if not os.path.exists(img_path):
        continue  # skip missing

    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))

    X.append(img)
    y.append(label)

X = np.array(X) / 255.0
y = np.array(y)

print("Dataset shape:", X.shape, y.shape)

100%|██████████| 3000/3000 [38:05<00:00,  1.31it/s]


Dataset shape: (3000, 256, 256, 3) (3000,)


In [None]:
from sklearn.model_selection import train_test_split

seed = 42 # Define the random state seed

trainval_df, test_df = train_test_split(labels_df, test_size=0.10, stratify=labels_df["isup_grade"], random_state=seed)
train_df, val_df   = train_test_split(trainval_df, test_size=0.20, stratify=trainval_df["isup_grade"], random_state=seed)
print(len(train_df), len(val_df), len(test_df))

2160 540 300


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

IMG_SIZE = (256, 256)
BATCH = 16

#augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    vertical_flip=True
)

val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_df["image_id"] = train_df["image_id"].astype(str) + ".png"
val_df["image_id"]   = val_df["image_id"].astype(str) + ".png"
test_df["image_id"]  = test_df["image_id"].astype(str) + ".png"

train_df["isup_grade"] = train_df["isup_grade"].astype(str)
val_df["isup_grade"]   = val_df["isup_grade"].astype(str)
test_df["isup_grade"]  = test_df["isup_grade"].astype(str)

# Generators
train_gen = train_datagen.flow_from_dataframe(
    train_df,
    directory="/content/drive/MyDrive/PANDA_dataset/train_images",
    x_col="image_id",
    y_col="isup_grade",
    target_size=IMG_SIZE,
    class_mode="categorical",
    batch_size=BATCH,
    shuffle=True,
    seed=seed
)

val_gen = val_datagen.flow_from_dataframe(
    val_df,
    directory="/content/drive/MyDrive/PANDA_dataset/train_images",
    x_col="image_id",
    y_col="isup_grade",
    target_size=IMG_SIZE,
    class_mode="categorical",
    batch_size=BATCH,
    shuffle=False
)

test_gen = test_datagen.flow_from_dataframe(
    test_df,
    directory="/content/drive/MyDrive/PANDA_dataset/train_images",
    x_col="image_id",
    y_col="isup_grade",
    target_size=IMG_SIZE,
    class_mode="categorical",
    batch_size=BATCH,
    shuffle=False
)


Found 2160 validated image filenames belonging to 6 classes.
Found 540 validated image filenames belonging to 6 classes.
Found 300 validated image filenames belonging to 6 classes.


In [None]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
import tensorflow as tf

def build_resnet(input_shape, num_classes, fine_tune_at=30, lr=1e-4):
    base_model = ResNet50(weights="imagenet", include_top=False, input_shape=input_shape)

    #Freeze the whole base model
    base_model.trainable = False

    # Custom classification head
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation="relu")(x)
    x = Dropout(0.5)(x)
    output = Dense(num_classes, activation="softmax")(x)

    model = Model(inputs=base_model.input, outputs=output)

    # Compile for feature extraction
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model, base_model


In [None]:
resnet_model, base_model = build_resnet((256,256,3), num_classes=6, lr=1e-3)

history_stage1 = resnet_model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=20,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True)
    ]
)


Epoch 1/20
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 445ms/step - accuracy: 0.2344 - loss: 1.9559 - val_accuracy: 0.2685 - val_loss: 1.7236
Epoch 2/20
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 395ms/step - accuracy: 0.2824 - loss: 1.7348 - val_accuracy: 0.2685 - val_loss: 1.7167
Epoch 3/20
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 386ms/step - accuracy: 0.2624 - loss: 1.7274 - val_accuracy: 0.2685 - val_loss: 1.7206
Epoch 4/20
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 381ms/step - accuracy: 0.2613 - loss: 1.7417 - val_accuracy: 0.2685 - val_loss: 1.7362
Epoch 5/20
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 397ms/step - accuracy: 0.2709 - loss: 1.7450 - val_accuracy: 0.2685 - val_loss: 1.7154
Epoch 6/20
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 386ms/step - accuracy: 0.2671 - loss: 1.7326 - val_accuracy: 0.2685 - val_loss: 1.7199
Epoch 7/20

In [None]:
# Unfreeze last layers
for layer in base_model.layers[-40:]:
    layer.trainable = True

# Recompile with a smaller LR
resnet_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

history_stage2 = resnet_model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=20,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=3)
    ]
)


In [None]:
from tensorflow.keras.applications import EfficientNetB3

def build_efficientnet(input_shape, num_classes):
    base_model = EfficientNetB3(weights="imagenet", include_top=False, input_shape=input_shape)

    for layer in base_model.layers[-30:]:
        layer.trainable = True

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation="relu")(x)
    x = Dropout(0.5)(x)
    output = Dense(num_classes, activation="softmax")(x)

    model = Model(inputs=base_model.input, outputs=output)
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5)
    model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])
    return model

effnet_model = build_efficientnet((256, 256, 3), num_classes=6)




In [None]:
# Train ResNet50
history_resnet = resnet_model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=10
)


In [None]:
# Train EfficientNet
history_effnet = effnet_model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=10
)

In [None]:
import matplotlib.pyplot as plt

def plot_history(history, title):
    plt.plot(history.history['accuracy'], label='train_acc')
    plt.plot(history.history['val_accuracy'], label='val_acc')
    plt.title(title)
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

plot_history(history_stage1, "ResNet50 Accuracy")
plot_history(history_stage2, "EfficientNet Accuracy")