In [1]:
import os
import cv2
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
from torchvision import models, transforms
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, cohen_kappa_score
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset, random_split

# Paths and Parameters
data_path = "/kaggle/input/original-eye-disease-deep-learning/Original Dataset"
selected_folders = ["Diabetic Retinopathy", "Glaucoma", "Healthy", "Macular Scar", "Myopia"]
img_size = (256, 256)
clahe_tile_size = (8, 8)
clahe_clip_limit = 5.0

# Preprocessing functions
def crop_image(img):
    """Crop black borders around the retina."""
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    x, y, w, h = cv2.boundingRect(thresh)
    return img[y:y+h, x:x+w]

def apply_clahe(img):
    """Apply CLAHE to the L-channel of the image."""
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=clahe_clip_limit, tileGridSize=clahe_tile_size)
    cl = clahe.apply(l)
    lab = cv2.merge((cl, a, b))
    return cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)

def apply_gaussian_filter(img, kernel_size=(5, 5), sigma=1):
    """Apply Gaussian filter to reduce noise."""
    return cv2.GaussianBlur(img, kernel_size, sigma)

# Dataset Class
class EyeDiseaseDataset(Dataset):
    def __init__(self, root_dir, classes, transform=None):
        self.root_dir = root_dir
        self.classes = classes
        self.transform = transform
        self.data = []
        for label, cls in enumerate(classes):
            cls_dir = os.path.join(root_dir, cls)
            for img_name in os.listdir(cls_dir):
                self.data.append((os.path.join(cls_dir, img_name), label))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        img = cv2.imread(img_path)
        img = crop_image(img)
        img = apply_clahe(img)
        img = apply_gaussian_filter(img)
        img = cv2.resize(img, img_size)
        img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        if self.transform:
            img = self.transform(img)
        return img, label

# Transformations (Preprocessing)
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Paths and Classes
classes = ["Diabetic Retinopathy", "Glaucoma", "Healthy", "Macular Scar", "Myopia"]

# Load Full Dataset
full_dataset = EyeDiseaseDataset(data_path, classes, transform)

# Split Dataset into Train, Validation, and Test Sets
train_size = int(0.7 * len(full_dataset))
val_size = int(0.15 * len(full_dataset))
test_size = len(full_dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(full_dataset, [train_size, val_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Load Pre-trained Models
vgg16 = models.vgg16(pretrained=True)
resnet50 = models.resnet50(pretrained=True)
densenet201 = models.densenet201(pretrained=True)

# Adjust for feature extraction
vgg16.classifier = nn.Sequential(*list(vgg16.classifier.children())[:-1])
resnet50.fc = nn.Identity()
densenet201.classifier = nn.Identity()

# Define Feature Extraction Function
def extract_features(model, loader):
    features = []
    labels = []
    model.eval()
    with torch.no_grad():
        for inputs, targets in loader:
            inputs = inputs.to("cuda")
            model = model.to("cuda")
            outputs = model(inputs)
            features.append(outputs.cpu().numpy())
            labels.append(targets.numpy())
    return np.vstack(features), np.hstack(labels)

# Extract Features from All Models
vgg16 = vgg16.to("cuda")
resnet50 = resnet50.to("cuda")
densenet201 = densenet201.to("cuda")

train_features_vgg, train_labels = extract_features(vgg16, train_loader)
val_features_vgg, val_labels = extract_features(vgg16, val_loader)
test_features_vgg, test_labels = extract_features(vgg16, test_loader)

train_features_resnet, _ = extract_features(resnet50, train_loader)
val_features_resnet, _ = extract_features(resnet50, val_loader)
test_features_resnet, _ = extract_features(resnet50, test_loader)

train_features_dense, _ = extract_features(densenet201, train_loader)
val_features_dense, _ = extract_features(densenet201, val_loader)
test_features_dense, _ = extract_features(densenet201, test_loader)

# Concatenate Features
train_combined = np.hstack([train_features_vgg, train_features_resnet, train_features_dense])
val_combined = np.hstack([val_features_vgg, val_features_resnet, val_features_dense])
test_combined = np.hstack([test_features_vgg, test_features_resnet, test_features_dense])

# Apply mRMR for Feature Selection
mrmr_selector = SelectKBest(mutual_info_classif, k=400)
train_selected = mrmr_selector.fit_transform(train_combined, train_labels)
val_selected = mrmr_selector.transform(val_combined)
test_selected = mrmr_selector.transform(test_combined)

# Train SVM Classifier
svm_classifier = SVC(kernel="linear", probability=True)
svm_classifier.fit(train_selected, train_labels)

# Evaluate on Test Set
test_predictions = svm_classifier.predict(test_selected)
print("Classification Report:")
print(classification_report(test_labels, test_predictions))
print("Confusion Matrix:")
print(confusion_matrix(test_labels, test_predictions))
print("Accuracy Score:", accuracy_score(test_labels, test_predictions))


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:02<00:00, 230MB/s]
Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 63.2MB/s]
Downloading: "https://download.pytorch.org/models/densenet201-c1103571.pth" to /root/.cache/torch/hub/checkpoints/densenet201-c1103571.pth
100%|██████████| 77.4M/77.4M [00:00<00:00, 156MB/s]


Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.87      0.82       245
           1       0.50      0.53      0.52       196
           2       0.61      0.52      0.56       160
           3       0.29      0.22      0.25        58
           4       0.48      0.47      0.47        66

    accuracy                           0.61       725
   macro avg       0.53      0.52      0.53       725
weighted avg       0.60      0.61      0.60       725

Confusion Matrix:
[[213  13   3  10   6]
 [ 16 104  42  16  18]
 [ 11  53  83   6   7]
 [ 20  15   7  13   3]
 [ 12  21   2   0  31]]
Accuracy Score: 0.6124137931034482
