In [2]:
# --------------------------------------
# Breast Cancer Detection Challenge (VGG16)
# --------------------------------------

import os
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# ======================
# 1️⃣ Dataset directories
# ======================
train_dir = "train"   
test_dir = "test"     

In [3]:
# ======================
# 2️⃣ Data augmentation
# ======================
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    validation_split=0.2  # 80% training, 20% validation
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=16,
    class_mode='binary',
    subset='training',
    shuffle=True
)

val_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=16,
    class_mode='binary',
    subset='validation',
    shuffle=True
)

Found 560 images belonging to 2 classes.
Found 140 images belonging to 2 classes.


In [4]:
# ======================
# 3️⃣ Pretrained Model (VGG16)
# ======================
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze base layers first
for layer in base_model.layers:
    layer.trainable = False

# Add custom classification head
x = base_model.output
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.3)(x)
predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 0us/step


In [5]:
# ======================
# 4️⃣ Compile Model and train 
# ======================
model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
checkpoint = ModelCheckpoint('best_vgg16_model.h5', monitor='val_accuracy', save_best_only=True)

history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=15,
    callbacks=[early_stop, checkpoint]
)

  self._warn_if_super_not_called()


Epoch 1/15
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.5128 - loss: 0.9013



[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 2s/step - accuracy: 0.5127 - loss: 0.8999 - val_accuracy: 0.5071 - val_loss: 0.6833
Epoch 2/15
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.4820 - loss: 0.7788



[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 2s/step - accuracy: 0.4820 - loss: 0.7783 - val_accuracy: 0.5714 - val_loss: 0.6889
Epoch 3/15
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 2s/step - accuracy: 0.5571 - loss: 0.7285 - val_accuracy: 0.5000 - val_loss: 0.6925
Epoch 4/15
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 2s/step - accuracy: 0.5433 - loss: 0.7196 - val_accuracy: 0.5000 - val_loss: 0.6934
Epoch 5/15
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 2s/step - accuracy: 0.5194 - loss: 0.7227 - val_accuracy: 0.5286 - val_loss: 0.6923
Epoch 6/15
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 2s/step - accuracy: 0.4881 - loss: 0.7218 - val_accuracy: 0.5500 - val_loss: 0.6870
Epoch 7/15
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 2s/step - accuracy: 0.5093 - loss: 0.7216 - val_accuracy: 0.5429 - val_loss: 0.6956


In [7]:
# ======================
# 5️⃣ Predict BEFORE Fine-Tuning
# ======================
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    directory='.',  # assumes 'test/' inside current directory
    classes=[test_dir],
    target_size=(224, 224),
    batch_size=1,
    class_mode=None,
    shuffle=False
)

preds_before = model.predict(test_generator)
results_before = pd.DataFrame({
    'image file': [os.path.basename(f) for f in test_generator.filenames],
    'label': ['M' if p > 0.5 else 'N' for p in preds_before]
})
results_before.to_csv('predictions_VGG16.csv', index=False)
print("✅ Predictions saved to predictions_VGG16.csv")

Found 300 images belonging to 1 classes.
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 194ms/step
✅ Predictions saved to predictions_VGG16.csv


In [8]:
# ======================
# 6️⃣ Fine-Tuning — Unfreeze last few layers of VGG16
# ======================
for layer in base_model.layers[-4:]:
    layer.trainable = True  # unfreeze last 4 layers

# Recompile with a smaller learning rate
model.compile(
    optimizer=Adam(learning_rate=1e-5),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

fine_tune_stop = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
fine_tune_checkpoint = ModelCheckpoint('fine_tuned_vgg16_model.h5', monitor='val_accuracy', save_best_only=True)

history_fine = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10,
    callbacks=[fine_tune_stop, fine_tune_checkpoint]
)

Epoch 1/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.4719 - loss: 0.7566



[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 2s/step - accuracy: 0.4716 - loss: 0.7565 - val_accuracy: 0.5000 - val_loss: 0.6894
Epoch 2/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 2s/step - accuracy: 0.5074 - loss: 0.7132 - val_accuracy: 0.4786 - val_loss: 0.6941
Epoch 3/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 2s/step - accuracy: 0.5234 - loss: 0.7007 - val_accuracy: 0.4643 - val_loss: 0.6950
Epoch 4/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.5253 - loss: 0.6900



[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 2s/step - accuracy: 0.5251 - loss: 0.6902 - val_accuracy: 0.5214 - val_loss: 0.6917
Epoch 5/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 2s/step - accuracy: 0.5129 - loss: 0.7034 - val_accuracy: 0.5143 - val_loss: 0.6914
Epoch 6/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 2s/step - accuracy: 0.5407 - loss: 0.6994 - val_accuracy: 0.5214 - val_loss: 0.6944
Epoch 7/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 2s/step - accuracy: 0.5607 - loss: 0.6869 - val_accuracy: 0.4786 - val_loss: 0.6961
Epoch 8/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 2s/step - accuracy: 0.5637 - loss: 0.6810 - val_accuracy: 0.4786 - val_loss: 0.7015
Epoch 9/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 2s/step - accuracy: 0.5214 - loss: 0.6946 - val_accuracy: 0.5000 - val_loss: 0.6975


In [9]:
# ======================
# 7️⃣ Predict on Test Set
# ======================
test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_directory(
    directory='.',  # Trick: place test/ inside current dir
    classes=[test_dir],
    target_size=(224, 224),
    batch_size=1,
    class_mode=None,
    shuffle=False
)

preds = model.predict(test_generator)
pred_labels = ['M' if p > 0.5 else 'N' for p in preds]

Found 300 images belonging to 1 classes.
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 187ms/step


In [10]:
# ======================
# 8️⃣ Save Predictions
# ======================
results = pd.DataFrame({
    'image file': [os.path.basename(f) for f in test_generator.filenames],
    'label': pred_labels
})

results.to_csv('predictions_VGG16_FT.csv', index=False)
print("✅ Predictions saved to predictions_VGG16_16.csv")

✅ Predictions saved to predictions_VGG16_16.csv
