<a href="https://colab.research.google.com/github/DarthCipher7/DR/blob/main/DR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
#cell 1

from google.colab import drive
print("Mounting Google Drive...")
drive.mount('/content/drive')
print("Drive mounted successfully.")

Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Drive mounted successfully.


In [3]:
#cell 2

import os
import zipfile

# --- Define File Paths ---
zip_path = '/content/drive/MyDrive/Diabetic_Retinopathy_Project/datasets/diabetic-retinopathy-224x224-2019-data.zip'
extract_path = '/content/dr_data_224'
check_folder = os.path.join(extract_path, 'colored_images')

# --- Unzip Only If Not Already Extracted ---
if os.path.exists(check_folder):
    print(f"Dataset already exists in '{extract_path}'. Skipping unzipping.")
else:
    print(f"Dataset not found. Starting extraction from Google Drive...")
    print(f"Source: {zip_path}")
    os.makedirs(extract_path, exist_ok=True)
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print("Data unzipped successfully!")

# --- Verify Contents ---
print("\n--- Data Setup Complete ---")
!ls {check_folder}

Found 2931 images belonging to 5 classes.
Found 731 images belonging to 5 classes.


In [6]:
#cell 3

import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils import class_weight
import os

# --- Parameters ---
DATA_DIR = '/content/dr_data_224/colored_images'
IMG_SIZE = (299, 299)  # Image size for EfficientNetB3
BATCH_SIZE = 16
VALIDATION_SPLIT = 0.2

# --- Create Data Generators with Augmentation ---
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.15,
    height_shift_range=0.15,
    shear_range=0.15,
    zoom_range=0.15,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=VALIDATION_SPLIT
)

validation_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=VALIDATION_SPLIT
)

# --- Flow from Directory ---
train_generator = train_datagen.flow_from_directory(
    directory=DATA_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'
)

validation_generator = validation_datagen.flow_from_directory(
    directory=DATA_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)

# --- Calculate Class Weights ---
class_weights = class_weight.compute_class_weight(
    'balanced',
    classes=np.unique(train_generator.classes),
    y=train_generator.classes
)
class_weights_dict = dict(enumerate(class_weights))

print(f"\nClass weights to handle imbalance: \n{class_weights_dict}")

Found 2931 images belonging to 5 classes.
Found 731 images belonging to 5 classes.
Data Directory: /content/dr_data_224/colored_images
Image Size: (299, 299)
Batch Size: 16

Found 2931 images for training.
Found 731 images for validation.

Number of classes: 5
Class labels: ['Mild', 'Moderate', 'No_DR', 'Proliferate_DR', 'Severe']

Calculated Class Weights to handle imbalance: 
{0: np.float64(1.9804054054054054), 1: np.float64(0.73275), 2: np.float64(0.4059556786703601), 3: np.float64(2.483898305084746), 4: np.float64(3.7819354838709676)}


In [7]:
#cell 4

from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam

def build_model(num_classes):
    base_model = EfficientNetB3(weights='imagenet', include_top=False, input_shape=(*IMG_SIZE, 3))
    base_model.trainable = False  # Freeze the base

    # Add custom head
    x = base_model.output
    x = GlobalAveragePooling2D(name='avg_pool')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)
    outputs = Dense(num_classes, activation='softmax', name='predictions')(x)
    model = Model(inputs=base_model.input, outputs=outputs)
    return model

# --- Build and Compile ---
model = build_model(num_classes=train_generator.num_classes)
model.compile(
    optimizer=Adam(learning_rate=1e-3),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
model.summary()

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb3_notop.h5
[1m43941136/43941136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step
--- Model Summary (Initial: Only Head is Trainable) ---


In [None]:
#cell 5

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

# --- Callbacks ---
model_checkpoint = ModelCheckpoint('best_retinopathy_model_v2.keras', save_best_only=True, monitor='val_accuracy', mode='max', verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-7, verbose=1)

# --- Stage 1: Train the Head ---
print("\n--- STAGE 1: TRAINING THE HEAD ---")
history_head = model.fit(
    train_generator,
    epochs=20,
    validation_data=validation_generator,
    class_weight=class_weights_dict,
    callbacks=[model_checkpoint, early_stopping, reduce_lr]
)

# --- Stage 2: Fine-Tuning ---
print("\n--- STAGE 2: FINE-TUNING ---")
model.layers[0].trainable = True # Unfreeze the base model
model.compile(
    optimizer=Adam(learning_rate=1e-5), # Re-compile with a very low learning rate
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
history_fine_tune = model.fit(
    train_generator,
    epochs=50, # Continue for more epochs
    initial_epoch=history_head.epoch[-1] if history_head.epoch else 0,
    validation_data=validation_generator,
    class_weight=class_weights_dict,
    callbacks=[model_checkpoint, early_stopping, reduce_lr]
)


--- Starting Stage 1: Training the model head ---


  self._warn_if_super_not_called()


Epoch 1/20
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 478ms/step - accuracy: 0.2127 - loss: 2.2566 - precision: 0.2091 - recall: 0.1231
Epoch 1: val_accuracy improved from -inf to 0.08071, saving model to best_retinopathy_model_v2.keras
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m155s[0m 621ms/step - accuracy: 0.2127 - loss: 2.2566 - precision: 0.2090 - recall: 0.1230 - val_accuracy: 0.0807 - val_loss: 1.9891 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 0.0010
Epoch 2/20
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 385ms/step - accuracy: 0.2186 - loss: 2.0885 - precision: 0.1975 - recall: 0.0775
Epoch 2: val_accuracy did not improve from 0.08071
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 402ms/step - accuracy: 0.2186 - loss: 2.0883 - precision: 0.1974 - recall: 0.0775 - val_accuracy: 0.0807 - val_loss: 1.7657 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 

In [None]:
#cell 6

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt

# --- Load the best model ---
print("Loading best saved model for final evaluation...")
final_model = tf.keras.models.load_model('best_retinopathy_model_v2.keras')

# --- Make Predictions ---
predictions = final_model.predict(validation_generator)
y_pred = np.argmax(predictions, axis=1)
y_true = validation_generator.classes
class_labels = list(validation_generator.class_indices.keys())

# --- Display Results ---
print(f"\n--- Final Accuracy: {accuracy_score(y_true, y_pred) * 100:.2f}% ---\n")
print("--- Classification Report ---")
print(classification_report(y_true, y_pred, target_names=class_labels))

print("\n--- Confusion Matrix ---")
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Final Confusion Matrix')
plt.show()

Evaluating the final model...
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 165ms/step - accuracy: 0.8112 - loss: 0.5526

Final Validation Loss: 0.6075456738471985
Final Validation Accuracy: 0.7770177721977234


In [None]:
#cell 7

import numpy as np
from google.colab import files
import tensorflow as tf
from tensorflow.keras.utils import load_img, img_to_array
import matplotlib.pyplot as plt

# --- Load the saved model ---
predictor_model = tf.keras.models.load_model('best_retinopathy_model_v2.keras')

# --- Get the class names from the generator (ensure this is run after Cell 3) ---
class_indices = train_generator.class_indices
# Create a reverse mapping from index to label name
class_labels = {v: k for k, v in class_indices.items()}

# --- Upload file ---
uploaded = files.upload()

# --- Process and predict each uploaded file ---
for fn in uploaded.keys():
  # Load and prepare the image
  path = '/content/' + fn
  img = load_img(path, target_size=IMG_SIZE) # Resize to model's expected input
  img_array = img_to_array(img)
  img_array /= 255.0  # Rescale pixel values
  img_batch = np.expand_dims(img_array, axis=0) # Add batch dimension

  # Make prediction
  prediction = predictor_model.predict(img_batch)
  predicted_class_index = np.argmax(prediction[0])
  predicted_class_label = class_labels[predicted_class_index]
  confidence = np.max(prediction[0]) * 100

  # Display results
  plt.figure(figsize=(6, 6))
  plt.imshow(img)
  plt.axis('off')
  plt.title(f"Predicted: {predicted_class_label}\nConfidence: {confidence:.2f}%")
  plt.show()