In [2]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
df = pd.read_csv("/content/drive/MyDrive/Breast Cancer Project/new_master_dataset.csv")
df

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Unnamed: 0,mag,path,filename,class,slide_id,tumor_type
0,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_A-14-22549CD-100-001.png,benign,22549CD,A
1,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_A-14-22549CD-100-002.png,benign,22549CD,A
2,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_A-14-22549CD-100-003.png,benign,22549CD,A
3,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_A-14-22549CD-100-004.png,benign,22549CD,A
4,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_A-14-22549CD-100-005.png,benign,22549CD,A
...,...,...,...,...,...,...
7904,400,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-15704-400-031.png,malignant,15704,PC
7905,400,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-15704-400-032.png,malignant,15704,PC
7906,400,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-15704-400-033.png,malignant,15704,PC
7907,400,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-15704-400-034.png,malignant,15704,PC


In [3]:
groupd_df = df.groupby("tumor_type")

# You can then perform various operations on the grouped data, such as getting the count of each tumor type
tumor_type_counts = groupd_df.size()
tumor_type_counts

tumor_type
A      444
DC    3451
F     1014
LC     626
MC     792
PC     560
PT     453
TA     569
dtype: int64

In [4]:
df_train_100 = pd.read_csv("/content/drive/MyDrive/Breast Cancer Project/Mag100/df_mag100_augmented_final.csv")
df_test_100 = pd.read_csv("/content/drive/MyDrive/Breast Cancer Project/Mag100/test_df_100.csv")
df_val_100 = pd.read_csv("/content/drive/MyDrive/Breast Cancer Project/Mag100/val_df_100.csv")

In [5]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint

# Define augmentation parameters
rotation_range = 20
width_shift_range = 0.2
height_shift_range = 0.2
horizontal_flip = True
vertical_flip = True
shear_range = 0.2
zoom_range = 0.2

# Train for Mag 100
train_data = df_train_100
test_data = df_test_100
val_data = df_val_100

# Image preprocessing with augmentation for training
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=rotation_range,
    width_shift_range=width_shift_range,
    height_shift_range=height_shift_range,
    horizontal_flip=horizontal_flip,
    vertical_flip=vertical_flip,
    shear_range=shear_range,
    zoom_range=zoom_range,
    rescale=1./255
)

# Image preprocessing without augmentation for testing and validation
test_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

# Flow training images in batches using the training generator
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_data,
    x_col='path',
    y_col='tumor_type',
    target_size=(700, 460),
    batch_size=32,
    class_mode='categorical',
    shuffle=True  # Shuffle training data
)

# Flow testing images in batches using the testing generator (without augmentation)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_data,
    x_col='path',
    y_col='tumor_type',
    target_size=(700, 460),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

# Flow validation images in batches using the validation generator (without augmentation)
val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_data,
    x_col='path',
    y_col='tumor_type',
    target_size=(700, 460),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

Found 4344 validated image filenames belonging to 8 classes.
Found 598 validated image filenames belonging to 8 classes.
Found 397 validated image filenames belonging to 8 classes.


In [6]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Concatenate, Flatten, Dropout, Dense, BatchNormalization, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint

In [8]:
# Define ResNet50 architecture

from tensorflow.keras.applications.resnet50 import preprocess_input


def resnet50_builder():
    base_model = ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

    layer_name = 'conv4_block6_out'
    feature_ex_model = Model(inputs=base_model.input,
                             outputs=base_model.get_layer(layer_name).output,
                             name='resnet50_features')
    feature_ex_model.trainable = False


    # Use the preprocess_input function from Keras ResNet50 module
    p1_layer = Lambda(preprocess_input, name='ResNet_Preprocess')
    image_input = Input((700, 460, 3), name='Image_Input')
    p1_tensor = p1_layer(image_input)

    out = feature_ex_model(p1_tensor)
    feature_ex_model = Model(inputs=image_input, outputs=out)

    def naive_inception_module(layer_in, f1, f2, f3):
        # 1x1 conv
        conv1 = Conv2D(f1, (1,1), padding='same', activation='relu')(layer_in)
        # 3x3 conv
        conv3 = Conv2D(f2, (3,3), padding='same', activation='relu')(layer_in)
        # 5x5 conv
        conv5 = Conv2D(f3, (5,5), padding='same', activation='relu')(layer_in)
        # 3x3 max pooling
        pool = MaxPooling2D((3,3), strides=(1,1), padding='same')(layer_in)
        # concatenate filters, assumes filters/channels last
        layer_out = Concatenate()([conv1, conv3, conv5, pool])
        return layer_out

    out = naive_inception_module(feature_ex_model.output, 64, 128, 32)
    num_classes = 8

    bn1 = BatchNormalization(name='BN')(out)
    f = Flatten()(bn1)
    dropout = Dropout(0.4, name='Dropout')(f)
    dense = Dense(num_classes, activation='softmax', name='Predictions')(dropout)

    model = Model(inputs=feature_ex_model.input, outputs=dense)
    return model

# Build the ResNet-50 model
model = resnet50_builder()

# Compile the model
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Define callbacks
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6)
checkpoint = ModelCheckpoint("/content/drive/MyDrive/resnet50_model.h5", monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

In [None]:
# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=50,
    validation_data=val_generator,
    validation_steps=len(val_generator),
    callbacks=[reduce_lr, checkpoint]
)

Epoch 1/50
  3/136 [..............................] - ETA: 20:54 - loss: 635.4351 - accuracy: 0.2292

In [None]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_generator, steps=len(test_generator))
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)