In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
ZIP_PATH = "/content/drive/MyDrive/B. Disease Grading.zip"


In [None]:
!unzip -q "$ZIP_PATH" -d "/content"
print("Extraction completed!")


Extraction completed!


In [None]:
BASE = "/content/B. Disease Grading"

TRAIN_IMG = BASE + "/1. Original Images/a. Training Set"
TEST_IMG  = BASE + "/1. Original Images/b. Testing Set"

TRAIN_CSV = BASE + "/2. Groundtruths/a. IDRiD_Disease Grading_Training Labels.csv"
TEST_CSV  = BASE + "/2. Groundtruths/b. IDRiD_Disease Grading_Testing Labels.csv"

print("Train images folder:", TRAIN_IMG)
print("Test images folder:", TEST_IMG)
print("Train CSV:", TRAIN_CSV)
print("Test CSV:", TEST_CSV)


Train images folder: /content/B. Disease Grading/1. Original Images/a. Training Set
Test images folder: /content/B. Disease Grading/1. Original Images/b. Testing Set
Train CSV: /content/B. Disease Grading/2. Groundtruths/a. IDRiD_Disease Grading_Training Labels.csv
Test CSV: /content/B. Disease Grading/2. Groundtruths/b. IDRiD_Disease Grading_Testing Labels.csv


In [None]:
!pip install opencv-python scikit-image scikit-learn pywt tqdm matplotlib seaborn --quiet


[31mERROR: Could not find a version that satisfies the requirement pywt (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for pywt[0m[31m
[0m

In [None]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import joblib

from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

from skimage.feature import graycomatrix, graycoprops, local_binary_pattern
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from PIL import Image
import matplotlib.pyplot as plt

In [None]:
class UpgradedDRClassifier:
    def __init__(self):
        self.scaler = StandardScaler()
        self.clf = None
        self.pca = None

    # ------------------------------------------------------------
    # PREPROCESSING
    # ------------------------------------------------------------
    def preprocess(self, img):
        # Resize
        img = cv2.resize(img, (512, 512))

        # -------------------------------------
        # 1. Illumination Normalization
        # -------------------------------------
        img_float = img.astype(np.float32)

        # Remove uneven illumination
        background = cv2.GaussianBlur(img_float, (75, 75), 0)
        img_norm = cv2.divide(img_float, background + 1e-6, scale=255)

        img_norm = np.clip(img_norm, 0, 255).astype(np.uint8)

        # -------------------------------------
        # 2. Histogram Equalization (per channel)
        # -------------------------------------
        ycrcb = cv2.cvtColor(img_norm, cv2.COLOR_BGR2YCrCb)
        y, cr, cb = cv2.split(ycrcb)

        y_eq = cv2.equalizeHist(y)
        img_eq = cv2.cvtColor(cv2.merge([y_eq, cr, cb]), cv2.COLOR_YCrCb2BGR)

        # -------------------------------------
        # 3. CLAHE (Local Contrast)
        # -------------------------------------
        lab = cv2.cvtColor(img_eq, cv2.COLOR_BGR2LAB)
        l, a, b = cv2.split(lab)

        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        l = clahe.apply(l)

        enhanced = cv2.cvtColor(cv2.merge([l, a, b]), cv2.COLOR_LAB2BGR)

        return enhanced

    # ------------------------------------------------------------
    # MACULA / FOVEA DETECTION (BRIGHTNESS VALLEY SEARCH)
    # ------------------------------------------------------------
    def detect_macula(self, img):
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        blur = cv2.GaussianBlur(gray, (45, 45), 0)

        # macula = darkest point near center; use weighted center search
        h, w = gray.shape
        cx, cy = w // 2, h // 2

        crop = blur[cy-120:cy+120, cx-120:cx+120]
        min_loc = np.unravel_index(np.argmin(crop), crop.shape)

        mac_y = cy - 120 + min_loc[0]
        mac_x = cx - 120 + min_loc[1]

        return (mac_x, mac_y)

    # ------------------------------------------------------------
    # MACULA REGION EXUDATE DENSITY
    # ------------------------------------------------------------
    def macula_exudate_density(self, ex_mask, macula_coord):
        x, y = macula_coord
        r = 60  # radius around macula

        h, w = ex_mask.shape
        mask = np.zeros((h, w), np.uint8)

        cv2.circle(mask, (x, y), r, 255, -1)

        macula_region = cv2.bitwise_and(ex_mask, mask)
        density = np.sum(macula_region > 0) / (np.pi * r * r)

        return density

    # ------------------------------------------------------------
    # VESSEL DETECTION (simplified)
    # ------------------------------------------------------------
    def vessel_mask(self, img):
        green = img[:, :, 1]
        green = cv2.createCLAHE(2.0, (8, 8)).apply(green)
        bg = cv2.morphologyEx(green, cv2.MORPH_OPEN, np.ones((5, 5), np.uint8))
        vessel = cv2.subtract(green, bg)
        _, mask = cv2.threshold(vessel, 15, 255, cv2.THRESH_BINARY)
        return mask

    # ------------------------------------------------------------
    # LESION DETECTION (MA, HE, EX)
    # ------------------------------------------------------------
    def detect_ma(self, img):
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        tophat = cv2.morphologyEx(gray, cv2.MORPH_TOPHAT, np.ones((7, 7), np.uint8))
        _, mask = cv2.threshold(tophat, 10, 255, cv2.THRESH_BINARY)
        cnts, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        count, area = 0, 0
        for c in cnts:
            a = cv2.contourArea(c)
            if 5 < a < 120:
                count += 1
                area += a
        return count, area, mask

    def detect_he(self, img):
        red = img[:, :, 2]
        bg = cv2.morphologyEx(red, cv2.MORPH_OPEN, np.ones((7, 7), np.uint8))
        diff = cv2.subtract(red, bg)
        _, mask = cv2.threshold(diff, 20, 255, cv2.THRESH_BINARY)
        cnts, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        count, area = 0, 0
        for c in cnts:
            a = cv2.contourArea(c)
            if a > 40:
                count += 1
                area += a
        return count, area, mask

    def detect_ex(self, img):
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        lower = np.array([20, 40, 150])
        upper = np.array([35, 255, 255])
        mask = cv2.inRange(hsv, lower, upper)
        cnts, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        count, area = 0, 0
        for c in cnts:
            a = cv2.contourArea(c)
            if a > 40:
                count += 1
                area += a
        return count, area, mask

    # ------------------------------------------------------------
    # GLCM TEXTURE FEATURES
    # ------------------------------------------------------------
    def glcm_features(self, gray):
        glcm = graycomatrix(gray, distances=[2], angles=[0],
                            symmetric=True, normed=True)

        feats = [
            graycoprops(glcm, 'contrast')[0][0],
            graycoprops(glcm, 'homogeneity')[0][0],
            graycoprops(glcm, 'energy')[0][0],
            graycoprops(glcm, 'correlation')[0][0],
            graycoprops(glcm, 'dissimilarity')[0][0]
        ]

        return feats

    # ------------------------------------------------------------
    # LBP FEATURES
    # ------------------------------------------------------------
    def lbp_features(self, gray):
        radius = 2
        points = radius * 8
        lbp = local_binary_pattern(gray, points, radius, method='uniform')
        hist, _ = np.histogram(lbp.ravel(), bins=16, range=(0, 16), density=True)
        return hist.tolist()

    # ------------------------------------------------------------
    # MAIN FEATURE VECTOR
    # ------------------------------------------------------------
    def extract_features(self, img_path):
        img = cv2.imread(img_path)
        enh = self.preprocess(img)
        gray = cv2.cvtColor(enh, cv2.COLOR_BGR2GRAY)

        # --- Lesions ---
        ma_c, ma_a, ma_mask = self.detect_ma(enh)
        he_c, he_a, he_mask = self.detect_he(enh)
        ex_c, ex_a, ex_mask = self.detect_ex(enh)

        # --- Macula features ---
        mac = self.detect_macula(enh)
        mac_ex_density = self.macula_exudate_density(ex_mask, mac)

        # --- Texture ---
        glcm = self.glcm_features(gray)
        lbp = self.lbp_features(gray)

        # --- Vessel density ---
        vessel = self.vessel_mask(enh)
        vessel_density = np.sum(vessel > 0) / (512 * 512)

        # --- Final Feature Vector ---
        features = np.array([
            ma_c, ma_a,
            he_c, he_a,
            ex_c, ex_a,
            mac_ex_density,
            vessel_density,
            *glcm,
            *lbp
        ])

        return features

    # ------------------------------------------------------------
    # TRAIN RANDOM FOREST
    # ------------------------------------------------------------
    def train_rf(self, X_train, y_train):
        self.clf = RandomForestClassifier(
            n_estimators=300,
            max_depth=15,
            min_samples_leaf=3,
            min_samples_split=4,
            class_weight=None,
            random_state=42
        )
        self.clf.fit(X_train, y_train)
        print("Random Forest training completed!")

    # ------------------------------------------------------------
    # PREDICT
    # ------------------------------------------------------------
    def predict(self, X):
        return self.clf.predict(X)

In [None]:
# Load CSVs
train_df = pd.read_csv(TRAIN_CSV)
test_df  = pd.read_csv(TEST_CSV)

# Normalize column names if needed
train_df.rename(columns={"Image name": "image", "Retinopathy grade": "level"}, inplace=True)
test_df.rename(columns={"Image name": "image", "Retinopathy grade": "level"}, inplace=True)

# Keep only needed columns
train_df = train_df[["image", "level"]]
test_df  = test_df[["image", "level"]]

# Ensure .jpg extension exists
train_df["image"] = train_df["image"].apply(lambda x: x if x.lower().endswith(".jpg") else x + ".jpg")
test_df["image"]  = test_df["image"].apply(lambda x: x if x.lower().endswith(".jpg")  else x + ".jpg")

print("Train samples:", len(train_df))
print("Test samples:", len(test_df))

train_df.head(), test_df.head()



Train samples: 413
Test samples: 103


(           image  level
 0  IDRiD_001.jpg      3
 1  IDRiD_002.jpg      3
 2  IDRiD_003.jpg      2
 3  IDRiD_004.jpg      3
 4  IDRiD_005.jpg      4,
            image  level
 0  IDRiD_001.jpg      4
 1  IDRiD_002.jpg      4
 2  IDRiD_003.jpg      4
 3  IDRiD_004.jpg      4
 4  IDRiD_005.jpg      4)

In [None]:
clf = UpgradedDRClassifier()


In [None]:
X_train = []
y_train = []

print("Extracting TRAIN features...")

for i, row in tqdm(train_df.iterrows(), total=len(train_df)):
    img_path = os.path.join(TRAIN_IMG, row['image'])

    if not os.path.exists(img_path):
        print("Missing:", img_path)
        continue

    feats = clf.extract_features(img_path)
    X_train.append(feats)
    y_train.append(row['level'])

X_train = np.array(X_train)
y_train = np.array(y_train)

print("Training feature shape:", X_train.shape)


Extracting TRAIN features...


100%|██████████| 413/413 [02:08<00:00,  3.21it/s]

Training feature shape: (413, 29)





In [None]:
X_test = []
y_test = []

print("Extracting TEST features...")

for i, row in tqdm(test_df.iterrows(), total=len(test_df)):
    img_path = os.path.join(TEST_IMG, row['image'])

    if not os.path.exists(img_path):
        print("Missing:", img_path)
        continue

    feats = clf.extract_features(img_path)
    X_test.append(feats)
    y_test.append(row['level'])

X_test = np.array(X_test)
y_test = np.array(y_test)

print("Testing feature shape:", X_test.shape)


Extracting TEST features...


100%|██████████| 103/103 [01:01<00:00,  1.68it/s]

Testing feature shape: (103, 29)





In [None]:
clf.scaler.fit(X_train)

X_train_scaled = clf.scaler.transform(X_train)
X_test_scaled  = clf.scaler.transform(X_test)

# Scale using ONLY the BALANCED dataset


In [None]:
# ----------------------------------------
# SMOTE Oversampling (to fix class imbalance)
# ----------------------------------------

from imblearn.over_sampling import SMOTE

sm = SMOTE(k_neighbors=3, random_state=42)
X_train_bal, y_train_bal = sm.fit_resample(X_train_scaled, y_train)

print("Before SMOTE:", X_train_scaled.shape, np.bincount(y_train))
print("After SMOTE:",  X_train_bal.shape,  np.bincount(y_train_bal))

# ----------------------------------------
# 3. SCALE AGAIN (USING BALANCED DATA)
# ----------------------------------------

clf.scaler.fit(X_train_bal)

X_train_bal_scaled = clf.scaler.transform(X_train_bal)
X_test_scaled      = clf.scaler.transform(X_test)


Before SMOTE: (413, 29) [134  20 136  74  49]
After SMOTE: (680, 29) [136 136 136 136 136]


In [None]:

from sklearn.decomposition import PCA

pca = PCA(n_components=15, random_state=42)
pca.fit(X_train_bal_scaled)

X_train_pca = pca.transform(X_train_bal_scaled)
X_test_pca  = pca.transform(X_test_scaled)

print("PCA shapes:", X_train_pca.shape, X_test_pca.shape)


PCA shapes: (680, 15) (103, 15)


In [None]:
from sklearn.preprocessing import StandardScaler

feat_scaler = StandardScaler()
feat_scaler.fit(X_train_pca)

X_train_final = feat_scaler.transform(X_train_pca)
X_test_final  = feat_scaler.transform(X_test_pca)


In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC

svm_temp = SVC(
    kernel='rbf',
    C=8,
    gamma='scale',
    class_weight='balanced',
    random_state=42
)

scores = cross_val_score(
    svm_temp,
    X_train_pca,          # PCA-transformed balanced data
    y_train_bal,
    cv=5,
    scoring="accuracy"
)

print("SVM CV scores:", scores)
print("Mean SVM CV accuracy:", scores.mean())


SVM CV scores: [0.49264706 0.48529412 0.61029412 0.61764706 0.61029412]
Mean SVM CV accuracy: 0.5632352941176471


In [None]:
svm_clf = SVC(
    kernel='rbf',
    C=8,
    gamma='scale',
    class_weight='balanced',
    random_state=42
)

svm_clf.fit(X_train_pca, y_train_bal)
print("Final SVM trained!")


Final SVM trained!


In [None]:
from sklearn.metrics import accuracy_score, classification_report

train_pred = svm_clf.predict(X_train_pca)
print("Training Accuracy:", accuracy_score(y_train_bal, train_pred))
print("\nTraining Report:\n")
print(classification_report(y_train_bal, train_pred))


Training Accuracy: 0.3352941176470588

Training Report:

              precision    recall  f1-score   support

           0       0.32      0.57      0.41       136
           1       0.00      0.00      0.00       136
           2       0.34      0.61      0.44       136
           3       0.28      0.35      0.31       136
           4       0.68      0.15      0.25       136

    accuracy                           0.34       680
   macro avg       0.32      0.34      0.28       680
weighted avg       0.32      0.34      0.28       680



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
test_pred = svm_clf.predict(X_test_pca)
print("Test Accuracy:", accuracy_score(y_test, test_pred))
print("\nTesting Report:\n")
print(classification_report(y_test, test_pred))


Test Accuracy: 0.3106796116504854

Testing Report:

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        34
           1       0.00      0.00      0.00         5
           2       0.31      1.00      0.47        32
           3       0.00      0.00      0.00        19
           4       0.00      0.00      0.00        13

    accuracy                           0.31       103
   macro avg       0.06      0.20      0.09       103
weighted avg       0.10      0.31      0.15       103



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
from sklearn.svm import SVC

svm_clf = SVC(
    kernel='rbf',
    C=8,                   # good for noisy DR data
    gamma='scale',
    class_weight='balanced',   # important
    probability=False,
    random_state=42
)

svm_clf.fit(X_train_pca, y_train_bal)
print("SVM (RBF) training completed!")


SVM (RBF) training completed!


In [None]:
from sklearn.metrics import classification_report, accuracy_score

train_pred = svm_clf.predict(X_train_pca)

print("Training Accuracy:", accuracy_score(y_train_bal, train_pred))
print("\nTraining Classification Report:\n")
print(classification_report(y_train_bal, train_pred))


Training Accuracy: 0.7823529411764706

Training Classification Report:

              precision    recall  f1-score   support

           0       0.83      0.74      0.78       136
           1       0.81      0.98      0.89       136
           2       0.74      0.58      0.65       136
           3       0.73      0.75      0.74       136
           4       0.79      0.87      0.83       136

    accuracy                           0.78       680
   macro avg       0.78      0.78      0.78       680
weighted avg       0.78      0.78      0.78       680



In [None]:
test_pred = svm_clf.predict(X_test_pca)

print("Testing Accuracy:", accuracy_score(y_test, test_pred))
print("\nTesting Classification Report:\n")
print(classification_report(y_test, test_pred))


Testing Accuracy: 0.3300970873786408

Testing Classification Report:

              precision    recall  f1-score   support

           0       0.33      1.00      0.50        34
           1       0.00      0.00      0.00         5
           2       0.00      0.00      0.00        32
           3       0.00      0.00      0.00        19
           4       0.00      0.00      0.00        13

    accuracy                           0.33       103
   macro avg       0.07      0.20      0.10       103
weighted avg       0.11      0.33      0.16       103



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
