In [1]:
import cv2
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report
import skimage.measure
import skimage.feature

# Define dataset paths
dataset_path = "/Users/abhishek/projects/aipi540/retinal-fundus-disorder-detection/notebooks/Retinal Fundus Images"
test_path = os.path.join(dataset_path, "test")

# Combined function: Extract features and classify images
def classify_fundus_image(image_path):
    """Extracts features and performs rule-based classification on retinal fundus images."""

    # Load image
    image = cv2.imread(image_path)
    if image is None:
        return "Error - Unable to Load", None

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # Feature Extraction
    edges = cv2.Canny(blurred, 10, 80)
    edge_density = np.sum(edges) / edges.size

    _, bright_regions = cv2.threshold(blurred, 130, 255, cv2.THRESH_BINARY)
    bright_pixel_count = cv2.countNonZero(bright_regions)

    mean_intensity = np.mean(gray)

    _, optic_disc = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
    optic_disc_size = np.sum(optic_disc) / gray.size

    vessel_skeleton = cv2.bitwise_and(edges, edges, mask=bright_regions)
    vessel_tortuosity = np.sum(vessel_skeleton) / bright_pixel_count if bright_pixel_count > 0 else 0

    glcm = skimage.feature.graycomatrix(gray, distances=[5], angles=[0], levels=256, symmetric=True, normed=True)
    contrast = skimage.feature.graycoprops(glcm, 'contrast')[0, 0]
    entropy = skimage.measure.shannon_entropy(gray)

    r_var = np.var(image[:, :, 2])
    g_var = np.var(image[:, :, 1])
    b_var = np.var(image[:, :, 0])

    features = [mean_intensity, bright_pixel_count, edge_density, optic_disc_size, 
                vessel_tortuosity, contrast, entropy, r_var, g_var, b_var]

    # 🔹 Updated Classification Rules
    if mean_intensity > 125 and bright_pixel_count > 5000 and edge_density < 25:
        return "Dry AMD", features
    elif bright_pixel_count > 2500 and mean_intensity > 115 and edge_density < 30:
        return "Wet AMD", features
    elif 2000 < bright_pixel_count < 10000 and edge_density > 4 and vessel_tortuosity > 0.25:
        return "Mild DR", features
    elif 1500 < bright_pixel_count < 8000 and 6 < edge_density < 35 and contrast > 1.5:
        return "Moderate DR", features
    elif 800 < bright_pixel_count < 6000 and edge_density > 10 and entropy > 4.5:
        return "Severe DR", features
    elif edge_density > 12 and bright_pixel_count < 4000 and vessel_tortuosity > 0.3:
        return "Proliferate DR", features
    elif mean_intensity < 105 and entropy < 4 and edge_density < 6:
        return "Cataract", features
    elif optic_disc_size > 0.018 and mean_intensity < 120 and g_var < 400:
        return "Glaucoma", features
    elif bright_pixel_count < 2200 and edge_density > 8 and r_var > 900:
        return "Pathological Myopia", features
    elif edge_density < 18 and mean_intensity > 125:
        return "Normal Fundus", features
    else:
        return "Uncertain - Further Review Needed", features


# Process all test images and save results
results = []
feature_data = []

for category in os.listdir(test_path):
    category_path = os.path.join(test_path, category)
    if not os.path.isdir(category_path):
        continue

    for image_name in os.listdir(category_path):
        image_path = os.path.join(category_path, image_name)

        # Classify image and extract features in one step
        predicted_label, features = classify_fundus_image(image_path)

        results.append([image_name, category, predicted_label])

        if features is not None:
            feature_data.append([category] + features)

results_df = pd.DataFrame(results, columns=["Image Name", "Actual Category", "Predicted Category"])

print("Updated predictions saved.")

# Print new predicted category distribution
print("Predicted Category Counts:\n", results_df["Predicted Category"].value_counts())

# Generate classification report
y_true = results_df["Actual Category"]
y_pred = results_df["Predicted Category"]

# Ensure all labels are included
all_labels = sorted(set(y_true.unique()) | set(y_pred.unique()))

report = classification_report(y_true, y_pred, labels=all_labels, target_names=all_labels, zero_division=1)
print("Updated Classification Report:\n", report)

Updated predictions saved.
Predicted Category Counts:
 Predicted Category
Uncertain - Further Review Needed    942
Wet AMD                              172
Dry AMD                              122
Name: count, dtype: int64
Updated Classification Report:
                                    precision    recall  f1-score   support

                         Cataract       1.00      0.00      0.00       112
                          Dry AMD       0.00      0.00      0.00        54
                         Glaucoma       1.00      0.00      0.00       156
         Hypertensive Retinopathy       1.00      0.00      0.00        94
                          Mild DR       1.00      0.00      0.00       102
                      Moderate DR       1.00      0.00      0.00       216
                    Normal Fundus       1.00      0.00      0.00       179
              Pathological Myopia       1.00      0.00      0.00       102
                   Proliferate DR       1.00      0.00      0.00     