In [1]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
df = pd.read_csv("/content/drive/MyDrive/Breast Cancer Project/new_master_dataset.csv")
df

Mounted at /content/drive


Unnamed: 0,mag,path,filename,class,slide_id,tumor_type
0,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_A-14-22549CD-100-001.png,benign,22549CD,A
1,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_A-14-22549CD-100-002.png,benign,22549CD,A
2,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_A-14-22549CD-100-003.png,benign,22549CD,A
3,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_A-14-22549CD-100-004.png,benign,22549CD,A
4,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_A-14-22549CD-100-005.png,benign,22549CD,A
...,...,...,...,...,...,...
7904,400,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-15704-400-031.png,malignant,15704,PC
7905,400,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-15704-400-032.png,malignant,15704,PC
7906,400,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-15704-400-033.png,malignant,15704,PC
7907,400,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-15704-400-034.png,malignant,15704,PC


In [2]:
groupd_df = df.groupby("tumor_type")

# You can then perform various operations on the grouped data, such as getting the count of each tumor type
tumor_type_counts = groupd_df.size()
tumor_type_counts

tumor_type
A      444
DC    3451
F     1014
LC     626
MC     792
PC     560
PT     453
TA     569
dtype: int64

In [3]:
df_train_100 = pd.read_csv("/content/drive/MyDrive/Breast Cancer Project/Mag100/df_mag100_augmented_final.csv")
df_test_100 = pd.read_csv("/content/drive/MyDrive/Breast Cancer Project/Mag100/test_df_100.csv")
df_val_100 = pd.read_csv("/content/drive/MyDrive/Breast Cancer Project/Mag100/val_df_100.csv")

In [4]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint

# Define augmentation parameters
rotation_range = 20
width_shift_range = 0.2
height_shift_range = 0.2
horizontal_flip = True
vertical_flip = True
shear_range = 0.2
zoom_range = 0.2

# Train for Mag 100
train_data = df_train_100
test_data = df_test_100
val_data = df_val_100

# Image preprocessing with augmentation for training
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=rotation_range,
    width_shift_range=width_shift_range,
    height_shift_range=height_shift_range,
    horizontal_flip=horizontal_flip,
    vertical_flip=vertical_flip,
    shear_range=shear_range,
    zoom_range=zoom_range,
    rescale=1./255
)

# Image preprocessing without augmentation for testing and validation
test_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

# Flow training images in batches using the training generator
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_data,
    x_col='path',
    y_col='tumor_type',
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical',
    shuffle=True  # Shuffle training data
)

# Flow testing images in batches using the testing generator (without augmentation)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_data,
    x_col='path',
    y_col='tumor_type',
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical',
    shuffle=False
)

# Flow validation images in batches using the validation generator (without augmentation)
val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_data,
    x_col='path',
    y_col='tumor_type',
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical',
    shuffle=False
)

Found 4344 validated image filenames belonging to 8 classes.
Found 598 validated image filenames belonging to 8 classes.
Found 397 validated image filenames belonging to 8 classes.


In [5]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Lambda, Conv2D, MaxPooling2D, Concatenate, BatchNormalization, Flatten, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications.resnet50 import preprocess_input

In [None]:
def resnet50_builder():
    # Define the base ResNet50 model without the top (classification) layers
    base_model = ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

    # Choose a specific layer for feature extraction
    layer_name = 'conv4_block6_out'
    feature_ex_model = Model(inputs=base_model.input,
                             outputs=base_model.get_layer(layer_name).output,
                             name='resnet50_features')
    feature_ex_model.trainable = False

    # Define the classification layers
    num_classes = 8
    bn1 = BatchNormalization(name='BN')(feature_ex_model.output)
    f = Flatten()(bn1)
    dropout = Dropout(0.4, name='Dropout')(f)
    dense = Dense(num_classes, activation='softmax', name='Predictions')(dropout)

    # Combine feature extraction and classification layers into a single model
    model = Model(inputs=feature_ex_model.input, outputs=dense)
    return model

# Build the ResNet-50 model
model = resnet50_builder()

# Train the classification layers for a few epochs
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(train_generator, epochs=3, validation_data=val_generator)

# Fine-tune the entire model
for layer in model.layers:
    layer.trainable = True

# Compile the model again after fine-tuning
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Define callbacks to save model at every 10 epochs
checkpoint = ModelCheckpoint("/content/drive/MyDrive/resnet50_model_epoch_{epoch:02d}.h5", monitor='val_accuracy', verbose=1, save_best_only=False, mode='max', save_freq='epoch', period=10)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/3
 20/272 [=>............................] - ETA: 15:59 - loss: 126.8588 - accuracy: 0.2313

In [None]:
# Train the model for additional epochs with fine-tuning
history = model.fit(
    train_generator,
    epochs=60,
    validation_data=val_generator,
    callbacks=[checkpoint, reduce_lr]
)

In [None]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_generator, steps=len(test_generator))
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)