<a href="https://colab.research.google.com/github/DarthCipher7/DR/blob/main/DR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
### Cell 1: Mount Drive and Unzip Data ###

# Import necessary libraries
import os
import zipfile
from google.colab import drive

# Mount your Google Drive (this will require authentication)
drive.mount('/content/drive')

# Define the path to your zip file
zip_path = '/content/drive/MyDrive/Colab Notebooks/data.zip'

# Define the directory to extract the data to
extract_path = '/content/data'

# Create the extraction directory if it doesn't exist
os.makedirs(extract_path, exist_ok=True)

# Unzip your data
print("Starting to unzip the data...")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)
print("Data unzipped successfully!")

# You can add this to see the contents of your unzipped folder
print("\nContents of the data folder:")
!ls /content/data

In [None]:
### MODIFIED CELL 2: Load Labels and Prepare Data Generators (with Augmentation and Defined Order) ###

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

main_data_dir = '/content/data/data/gaussian_filtered_images/gaussian_filtered_images'

# Define the class order based on severity
# IMPORTANT: This assumes your subdirectories are named '0', '1', '2', '3', '4'
# If they have names like 'Mild', 'Moderate', etc., list them in the desired order.
CLASS_ORDER = ['Mild', 'Moderate', 'No_DR', 'Proliferate_DR', 'Severe']

# This is for the training data - we apply lots of random transformations
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,    # Still split the data
    rotation_range=20,       # Randomly rotate images
    width_shift_range=0.1,   # Randomly shift images horizontally
    height_shift_range=0.1,  # Randomly shift images vertically
    shear_range=0.1,         # Apply shear transformations
    zoom_range=0.1,          # Randomly zoom in on images
    horizontal_flip=True,    # Randomly flip images horizontally
    fill_mode='nearest'
)

# This is for the validation data - we ONLY rescale it.
validation_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

# --- Create the generators using the new datagens and defined class order ---

train_generator = train_datagen.flow_from_directory(
    directory=main_data_dir,
    target_size=(300, 300), # EfficientNetB3 works well with 300x300 images
    batch_size=32,
    class_mode='categorical',
    subset='training',
    classes=CLASS_ORDER, # Enforce the class order
    shuffle=True
)

validation_generator = validation_datagen.flow_from_directory(
    directory=main_data_dir,
    target_size=(300, 300), # Use the same size for validation
    batch_size=32,
    class_mode='categorical',
    subset='validation',
    classes=CLASS_ORDER, # Enforce the class order
    shuffle=False # DO NOT shuffle validation data for evaluation
)


In [None]:
### ADDED CELL 2a: Calculate Class Weights to Handle Imbalance ###

import numpy as np
from sklearn.utils.class_weight import compute_class_weight

# Get the class labels from the generator
# These will be sorted alphabetically by folder name (0, 1, 2, 3, 4)
class_labels = sorted(train_generator.class_indices.keys())

# Get the class indices for the training data
training_classes = train_generator.classes

# Calculate class weights
# The 'balanced' mode automatically adjusts weights inversely proportional to class frequencies
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(training_classes),
    y=training_classes
)

# Create a dictionary mapping class indices to their calculated weights
# The model.fit() function expects the weights in this format
class_weight_dict = dict(zip(np.unique(training_classes), class_weights))

print("Class labels are:", class_labels)
print("Calculated class weights are:", class_weights)
print("Class weight dictionary for the model:", class_weight_dict)


In [None]:
### UPGRADED CELL 3: Build the Model with EfficientNetB3 ###

from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam

# --- Step 1: Load the EfficientNetB3 base model ---
# EfficientNet is a more modern and powerful architecture
base_model = EfficientNetB3(
    weights='imagenet',
    include_top=False,
    input_shape=(300, 300, 3)
)

# --- Step 2: Allow the whole base model to be fine-tuned ---
# This allows the model to better adapt its learned features to our specific task
base_model.trainable = True

# --- Step 3: Build the final model ---
# Using GlobalAveragePooling2D is often more effective than Flatten
model = Sequential([
    base_model,
    GlobalAveragePooling2D(), # Reduces dimensions and parameters
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(5, activation='softmax')  # Output layer with 5 units for 5 classes
])


In [None]:
# --- Step 4: Compile the model with a slightly higher initial learning rate ---
# We can start a bit higher because we will use a learning rate scheduler
model.compile(
    optimizer=Adam(learning_rate=1e-4), # Start with 1e-4
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Print a summary to see the new architecture
model.summary()


### UPGRADED CELL 4: Train the Model with an Advanced Callback ###

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

# Stop training if val_loss doesn't improve for 5 epochs
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    verbose=1,
    restore_best_weights=True
)

# Save the best model found
model_checkpoint = ModelCheckpoint(
    'best_retinopathy_efficientnet_model.keras',
    monitor='val_loss',
    save_best_only=True,
    verbose=1
)

# *** NEW: Reduce learning rate when a metric has stopped improving ***
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5, # Reduce LR by half
    patience=2, # If val_loss doesn't improve for 2 epochs
    min_lr=1e-6, # Don't let the LR go too low
    verbose=1
)

# --- Train the model with the new callback and class weights ---
history = model.fit(
    train_generator,
    epochs=30,
    validation_data=validation_generator,
    # ADD all callbacks here
    callbacks=[early_stopping, model_checkpoint, reduce_lr],
    class_weight=class_weight_dict
)


In [None]:
### Cell 5: Evaluate the Model ###

import matplotlib.pyplot as plt

# Plot the training and validation accuracy
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

# Plot the training and validation loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

plt.show()

# Load your best fine-tuned model
final_model = tf.keras.models.load_model('best_retinopathy_efficientnet_model.keras')

# Evaluate its performance on the validation set
print("Evaluating the final model...")
final_scores = final_model.evaluate(validation_generator)

print(f"\nFinal Validation Loss: {final_scores[0]}")
print(f"Final Validation Accuracy: {final_scores[1]}")


In [None]:
### ADDED CELL 6: Generate and Visualize the Confusion Matrix ###

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report

# Ensure the validation generator is not shuffled to keep labels in order
validation_generator.shuffle = False

# Predict the classes for the validation set
Y_pred = final_model.predict(validation_generator)
y_pred = np.argmax(Y_pred, axis=1)

# Get the true classes
y_true = validation_generator.classes

# Get the class labels from the generator
class_labels = list(validation_generator.class_indices.keys())

# --- Generate the Confusion Matrix ---
cm = confusion_matrix(y_true, y_pred)

# --- Plot the Confusion Matrix ---
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_labels, yticklabels=class_labels)

plt.title('Confusion Matrix', fontsize=16)
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.show()


# --- Print a Detailed Classification Report ---
# This report provides precision, recall, and f1-score for each class
print("\n" + "="*60)
print("Classification Report")
print("="*60 + "\n")
print(classification_report(y_true, y_pred, target_names=class_labels))



# Load your best fine-tuned model
final_model = tf.keras.models.load_model('best_retinopathy_efficientnet_model.keras')



In [None]:
# Evaluate its performance on the validation set
print("Evaluating the final model...")
final_scores = final_model.evaluate(validation_generator)

print(f"\nFinal Validation Loss: {final_scores[0]}")
print(f"Final Validation Accuracy: {final_scores[1]}")



In [None]:
#cell 8

import numpy as np
from google.colab import files
import tensorflow as tf
from tensorflow.keras.utils import load_img, img_to_array
import matplotlib.pyplot as plt

# Define IMG_SIZE
IMG_SIZE = (300, 300) # Changed from (224, 224) to match model input

# --- Load the saved model from Drive ---
# predictor_model = tf.keras.models.load_model(model_save_path) # Removed this line
predictor_model = final_model # Use the model loaded in the previous cell

# --- Get class names from the generator ---
class_indices = train_generator.class_indices
class_labels = {v: k for k, v in class_indices.items()}

# --- Upload file ---
print("Please upload a retinal image for prediction:")
uploaded = files.upload()

# --- Process and predict each uploaded file ---
for fn in uploaded.keys():
  # Load and prepare the image
  path = '/content/' + fn
  img = load_img(path, target_size=IMG_SIZE)
  img_array = img_to_array(img)
  img_array /= 255.0
  img_batch = np.expand_dims(img_array, axis=0)

  # Make prediction
  prediction = predictor_model.predict(img_batch)
  predicted_class_index = np.argmax(prediction[0])
  predicted_class_label = class_labels[predicted_class_index]
  confidence = np.max(prediction[0]) * 100

  # Display results
  plt.figure(figsize=(6, 7))
  plt.imshow(img)
  plt.axis('off')
  plt.title(f"Predicted Class: {predicted_class_label}\nConfidence: {confidence:.2f}%", fontsize=14)
  plt.show()