In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.applications.resnet50 import preprocess_input
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

print(f"TensorFlow Version: {tf.__version__}")

TensorFlow Version: 2.20.0


In [None]:
# --- Model Constants ---
IMG_SIZE = 224
BATCH_SIZE = 32
CHANNELS = 3
NUM_CLASSES = 1
BASE_LR = 1e-4
FINE_TUNE_LR = 1e-5

# --- UPDATE THESE PATHS ---
DATA_DIR = 'C:\\Users\\adith\\Downloads\\Eye-Cancer\\Dataset\\' 
TRAIN_CSV = f'{DATA_DIR}train\\train.csv' 
VALID_CSV = f'{DATA_DIR}valid\\valid.csv'
TEST_CSV = f'{DATA_DIR}test\\test.csv'

# --- FIXED COLUMN NAMES ---
IMAGE_COL = 'filename'
# The new calculated binary column name
LABEL_COL = 'is_disease'

# These names are assumed to be correct AFTER stripping whitespace.
DISEASE_COLUMNS = ['bulging_eyes', 'cataracts', 'crossed_eyes', 'uveitis']


def create_binary_target(df):
    """Calculates a new binary target column 'is_disease'."""
    # Sum the '1's across all disease columns for each row
    df['total_disease'] = df[DISEASE_COLUMNS].sum(axis=1)
    
    # If the sum is > 0, the eye is diseased (1). Otherwise, it's considered non-diseased (0).
    df[LABEL_COL] = np.where(df['total_disease'] > 0, 1, 0)
    
    # Drop the temporary column and the original disease columns
    df = df.drop(columns=['total_disease'] + DISEASE_COLUMNS + ['normal_eyes'])
    return df

# --- Load and Transform DataFrames ---
try:
    train_df = pd.read_csv(TRAIN_CSV)
    valid_df = pd.read_csv(VALID_CSV)
    test_df = pd.read_csv(TEST_CSV)
    
    # CRITICAL FIX: Strip whitespace from all column names immediately after loading
    train_df.columns = train_df.columns.str.strip()
    valid_df.columns = valid_df.columns.str.strip()
    test_df.columns = test_df.columns.str.strip()
    
    # Transform all three datasets using the clean column names
    train_df = create_binary_target(train_df)
    valid_df = create_binary_target(valid_df)
    test_df = create_binary_target(test_df)
    
    # Convert the new binary column to string for Keras's flow_from_dataframe
    train_df[LABEL_COL] = train_df[LABEL_COL].astype(str)
    valid_df[LABEL_COL] = valid_df[LABEL_COL].astype(str)
    test_df[LABEL_COL] = test_df[LABEL_COL].astype(str)
    
    print("DataFrames loaded, cleaned, transformed, and labels prepared successfully!")
    print(f"Train samples: {len(train_df)}. First 5 rows of the transformed data:\n{train_df.head()}")

except FileNotFoundError as e:
    print(f"CRITICAL ERROR: File not found. Please check your file paths in Cell 1: {e}")
except KeyError as e:
    print(f"CRITICAL ERROR: A column was not found after cleaning. Please verify the spelling of columns in DISEASE_COLUMNS: {e}")

DataFrames loaded, cleaned, transformed, and labels prepared successfully!
Train samples: 2214. First 5 rows of the transformed data:
                                            filename is_disease
0     15_jpg.rf.b8a6d4c751dd3da5c62861bff997124c.jpg          1
1  0016_0P_-10H_10V_png_jpg.rf.b76aa8767452ebb6f3...          0
2  F_1061_jpg.rf.b777e4afeededd6a5f6a484c2b4029b6...          1
3  images-6_jpeg_jpg.rf.b98a56b0f762e602fccfb9813...          1
4  images-12-_jpeg_jpg.rf.b7f535c63943e5c62b2b97b...          1


In [None]:
# --- Model Constants from previous successful cell ---
IMG_SIZE = 224
BATCH_SIZE = 32

# **Crucial:** Ensure this matches your IMAGE_FOLDER_BASE variable exactly from the previous successful cell.
IMAGE_FOLDER_BASE = 'C:\\Users\\adith\\Downloads\\Eye-Cancer\\Dataset\\' 
IMAGE_COL = 'filename'
LABEL_COL = 'is_disease'



train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

valid_test_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)
print("Initializing Data Generators...")
try:
    train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        directory=f'{IMAGE_FOLDER_BASE}train\\',
        x_col=IMAGE_COL,
        y_col=LABEL_COL,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode='binary'
    )

    valid_generator = valid_test_datagen.flow_from_dataframe(
        dataframe=valid_df,
        directory=f'{IMAGE_FOLDER_BASE}valid\\',
        x_col=IMAGE_COL,
        y_col=LABEL_COL,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode='binary'
    )

    test_generator = valid_test_datagen.flow_from_dataframe(
        dataframe=test_df,
        directory=f'{IMAGE_FOLDER_BASE}test\\',
        x_col=IMAGE_COL,
        y_col=LABEL_COL,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode='binary',
        shuffle=False # CRITICAL: Keep test images in order for final evaluation
    )
    
except FileNotFoundError as e:
    print(f"\nCRITICAL ERROR: Image files not found. Please verify the directory path: {e}")
    print("Ensure the folders 'train', 'valid', and 'test' containing the actual JPG/PNG images are located correctly.")

Initializing Data Generators...
Found 2214 validated image filenames belonging to 2 classes.
Found 363 validated image filenames belonging to 2 classes.
Found 100 validated image filenames belonging to 2 classes.


In [None]:
# --- Model Constants from previous successful cell ---
IMG_SIZE = 224
CHANNELS = 3
NUM_CLASSES = 1

# --- Load the Pre-trained ResNet50 Base Model ---
print("Building ResNet50 Base Model...")
base_model = tf.keras.applications.ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=(IMG_SIZE, IMG_SIZE, CHANNELS)
)

# 2. Freeze the base layers for Phase 1
base_model.trainable = False

# 3. Define the Custom Classification Head (Your binary classifier)
model = tf.keras.models.Sequential([
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(NUM_CLASSES, activation='sigmoid')
])

print("\nModel Summary (Before Training):")
model.summary()

Building ResNet50 Base Model...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 0us/step

Model Summary (Before Training):


In [None]:
# --- Model Constants from previous successful cell ---
BASE_LR = 1e-4 # Learning rate for Phase 1
FINE_TUNE_LR = 1e-5 # Learning rate for Phase 2

# Define callbacks for saving the best model and stopping if performance plateaus
callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=10, monitor='val_loss', restore_best_weights=True),
    tf.keras.callbacks.ModelCheckpoint('best_eye_cancer_model.h5', monitor='val_accuracy', save_best_only=True)
]
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=BASE_LR),
    loss='binary_crossentropy',
    metrics=['accuracy',
              tf.keras.metrics.SensitivityAtSpecificity(0.9, name='sensitivity_at_90_specificity'),
              tf.keras.metrics.SpecificityAtSensitivity(0.9, name='specificity_at_90_sensitivity')]
)

print("\n--- Starting Phase 1: Training Classification Head (10 Epochs) ---")
# Use the generators created in Step 3
history_phase1 = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    epochs=10,
    validation_data=valid_generator,
    validation_steps=valid_generator.samples // BATCH_SIZE,
    callbacks=callbacks
)


--- Starting Phase 1: Training Classification Head (10 Epochs) ---


  self._warn_if_super_not_called()


Epoch 1/10
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.8233 - loss: 0.3786 - sensitivity_at_90_specificity: 0.6368 - specificity_at_90_sensitivity: 0.5509



[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m193s[0m 3s/step - accuracy: 0.9102 - loss: 0.2120 - sensitivity_at_90_specificity: 0.8554 - specificity_at_90_sensitivity: 0.8418 - val_accuracy: 0.9858 - val_loss: 0.0474 - val_sensitivity_at_90_specificity: 1.0000 - val_specificity_at_90_sensitivity: 1.0000
Epoch 2/10
[1m 1/69[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:05[0m 970ms/step - accuracy: 0.9375 - loss: 0.1212 - sensitivity_at_90_specificity: 0.9500 - specificity_at_90_sensitivity: 1.0000



[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 151ms/step - accuracy: 0.9375 - loss: 0.1212 - sensitivity_at_90_specificity: 0.9500 - specificity_at_90_sensitivity: 1.0000 - val_accuracy: 0.9858 - val_loss: 0.0454 - val_sensitivity_at_90_specificity: 1.0000 - val_specificity_at_90_sensitivity: 1.0000
Epoch 3/10
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.9716 - loss: 0.0805 - sensitivity_at_90_specificity: 0.9882 - specificity_at_90_sensitivity: 0.9847



[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 1s/step - accuracy: 0.9757 - loss: 0.0705 - sensitivity_at_90_specificity: 0.9926 - specificity_at_90_sensitivity: 0.9928 - val_accuracy: 0.9915 - val_loss: 0.0247 - val_sensitivity_at_90_specificity: 1.0000 - val_specificity_at_90_sensitivity: 1.0000
Epoch 4/10
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 205ms/step - accuracy: 0.9688 - loss: 0.0449 - sensitivity_at_90_specificity: 1.0000 - specificity_at_90_sensitivity: 1.0000 - val_accuracy: 0.9886 - val_loss: 0.0248 - val_sensitivity_at_90_specificity: 1.0000 - val_specificity_at_90_sensitivity: 1.0000
Epoch 5/10
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.9866 - loss: 0.0481 - sensitivity_at_90_specificity: 0.9967 - specificity_at_90_sensitivity: 0.9936



[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 1s/step - accuracy: 0.9830 - loss: 0.0473 - sensitivity_at_90_specificity: 0.9949 - specificity_at_90_sensitivity: 0.9976 - val_accuracy: 0.9943 - val_loss: 0.0189 - val_sensitivity_at_90_specificity: 1.0000 - val_specificity_at_90_sensitivity: 1.0000
Epoch 6/10
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 204ms/step - accuracy: 1.0000 - loss: 0.0114 - sensitivity_at_90_specificity: 1.0000 - specificity_at_90_sensitivity: 1.0000 - val_accuracy: 0.9943 - val_loss: 0.0194 - val_sensitivity_at_90_specificity: 1.0000 - val_specificity_at_90_sensitivity: 1.0000
Epoch 7/10
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.9865 - loss: 0.0357 - sensitivity_at_90_specificity: 0.9997 - specificity_at_90_sensitivity: 1.0000



[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 2s/step - accuracy: 0.9867 - loss: 0.0360 - sensitivity_at_90_specificity: 0.9994 - specificity_at_90_sensitivity: 1.0000 - val_accuracy: 0.9972 - val_loss: 0.0132 - val_sensitivity_at_90_specificity: 1.0000 - val_specificity_at_90_sensitivity: 1.0000
Epoch 8/10
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 204ms/step - accuracy: 1.0000 - loss: 0.0081 - sensitivity_at_90_specificity: 1.0000 - specificity_at_90_sensitivity: 1.0000 - val_accuracy: 0.9972 - val_loss: 0.0129 - val_sensitivity_at_90_specificity: 1.0000 - val_specificity_at_90_sensitivity: 1.0000
Epoch 9/10
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 2s/step - accuracy: 0.9895 - loss: 0.0280 - sensitivity_at_90_specificity: 0.9983 - specificity_at_90_sensitivity: 1.0000 - val_accuracy: 0.9972 - val_loss: 0.0106 - val_sensitivity_at_90_specificity: 1.0000 - val_specificity_at_90_sensitivity: 1.0000
Epoch 10/10
[1m69/69

In [None]:
# 1. Unfreeze Top Layers
base_model = model.layers[0] # Get the ResNet50 layer object
base_model.trainable = True

# Freeze all layers except the last 20 to focus fine-tuning on relevant features
for layer in base_model.layers[:-20]:
    layer.trainable = False

# 2. Re-compile the model with a tiny learning rate for careful weight adjustment
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=FINE_TUNE_LR),
    loss='binary_crossentropy',
    metrics=['accuracy',
              tf.keras.metrics.SensitivityAtSpecificity(0.9, name='sensitivity_at_90_specificity'),
              tf.keras.metrics.SpecificityAtSensitivity(0.9, name='specificity_at_90_sensitivity')]
)

print("\n--- Starting Phase 2: Fine-Tuning Entire Model (Up to 50 Epochs) ---")
history_phase2 = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    epochs=50,
    validation_data=valid_generator,
    validation_steps=valid_generator.samples // BATCH_SIZE,
    callbacks=callbacks
)


--- Starting Phase 2: Fine-Tuning Entire Model (Up to 50 Epochs) ---
Epoch 1/50
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 2s/step - accuracy: 0.9913 - loss: 0.0321 - sensitivity_at_90_specificity: 1.0000 - specificity_at_90_sensitivity: 0.9952 - val_accuracy: 0.9972 - val_loss: 0.0116 - val_sensitivity_at_90_specificity: 1.0000 - val_specificity_at_90_sensitivity: 1.0000
Epoch 2/50
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 216ms/step - accuracy: 1.0000 - loss: 0.0267 - sensitivity_at_90_specificity: 1.0000 - specificity_at_90_sensitivity: 1.0000 - val_accuracy: 0.9972 - val_loss: 0.0115 - val_sensitivity_at_90_specificity: 1.0000 - val_specificity_at_90_sensitivity: 1.0000
Epoch 3/50
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 2s/step - accuracy: 0.9950 - loss: 0.0190 - sensitivity_at_90_specificity: 1.0000 - specificity_at_90_sensitivity: 1.0000 - val_accuracy: 0.9972 - val_loss: 0.0102 - val_sensitivity_at_90_speci



[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 2s/step - accuracy: 1.0000 - loss: 0.0012 - sensitivity_at_90_specificity: 1.0000 - specificity_at_90_sensitivity: 1.0000 - val_accuracy: 1.0000 - val_loss: 7.4405e-04 - val_sensitivity_at_90_specificity: 1.0000 - val_specificity_at_90_sensitivity: 1.0000
Epoch 24/50
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 216ms/step - accuracy: 1.0000 - loss: 0.0012 - sensitivity_at_90_specificity: 1.0000 - specificity_at_90_sensitivity: 1.0000 - val_accuracy: 0.9972 - val_loss: 0.0062 - val_sensitivity_at_90_specificity: 1.0000 - val_specificity_at_90_sensitivity: 1.0000
Epoch 25/50
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 2s/step - accuracy: 1.0000 - loss: 0.0013 - sensitivity_at_90_specificity: 1.0000 - specificity_at_90_sensitivity: 1.0000 - val_accuracy: 0.9972 - val_loss: 0.0063 - val_sensitivity_at_90_specificity: 1.0000 - val_specificity_at_90_sensitivity: 1.0000
Epoch 26/50
[1m

In [None]:
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# --- Re-using variables from successful steps ---
BATCH_SIZE = 32
IMAGE_COL = 'filename'
LABEL_COL = 'is_disease'

# 1. Load the Best Model
try:
    best_model = tf.keras.models.load_model('best_eye_cancer_model.h5')
    print("Best trained model loaded successfully.")
    
except OSError:
    print("CRITICAL ERROR: Model file not found. Ensure training finished successfully and 'best_eye_cancer_model.h5' was created.")

# 2. Get True Labels
y_true = test_generator.classes

# 3. Get Predicted Probabilities
steps = int(np.ceil(test_generator.samples / BATCH_SIZE))
print("Generating predictions on the Test Set...")
y_pred_proba = best_model.predict(test_generator, steps=steps)

# 4. Convert probabilities to binary classes using a 0.5 threshold
y_pred = (y_pred_proba > 0.5).astype(int)

# Get the class names ('0' and '1') for the report
class_labels = list(train_generator.class_indices.keys())

print("\n--- Final Model Performance on Test Set ---")

# Detailed Classification Report
print("Classification Report:")
print(classification_report(y_true, y_pred, target_names=class_labels, digits=4))

# Confusion Matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_true, y_pred))



Best trained model loaded successfully.
Generating predictions on the Test Set...
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 1s/step

--- Final Model Performance on Test Set ---
Classification Report:
              precision    recall  f1-score   support

           0     1.0000    1.0000    1.0000        21
           1     1.0000    1.0000    1.0000        79

    accuracy                         1.0000       100
   macro avg     1.0000    1.0000    1.0000       100
weighted avg     1.0000    1.0000    1.0000       100


Confusion Matrix:
[[21  0]
 [ 0 79]]
