In [14]:
import os
import cv2
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.model_selection import train_test_split

In [2]:
file_path = "data/full_df.csv" 
training_images_dir = "data/ODIR-5K/ODIR-5K/Training Images" 
testing_images_dir = "data/ODIR-5K/ODIR-5K/Testing Images"   


data = pd.read_csv(file_path)


In [3]:
# Parse 'target' column into one-hot encoded labels
data['target'] = data['target'].apply(lambda x: list(map(int, x.strip("[]").split(", "))))
labels = np.array(data['target'].tolist())


In [4]:
# Correct and assign image paths
data['Left-Fundus'] = data['Left-Fundus'].apply(lambda x: os.path.join(training_images_dir, os.path.basename(x)))
data['Right-Fundus'] = data['Right-Fundus'].apply(lambda x: os.path.join(training_images_dir, os.path.basename(x)))

In [5]:
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

print(f"Number of training samples: {len(train_data)}")
print(f"Number of testing samples: {len(test_data)}")


Number of training samples: 5113
Number of testing samples: 1279


In [6]:
IMG_HEIGHT, IMG_WIDTH = 224, 224

In [7]:
# Define custom dataset
class FundusDataset(Dataset):
    def __init__(self, left_paths, right_paths, labels, transform=None):
        self.left_paths = left_paths
        self.right_paths = right_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # Load and preprocess left and right images
        left_img = cv2.imread(self.left_paths[idx])
        right_img = cv2.imread(self.right_paths[idx])

        # Resize and concatenate
        left_img = cv2.resize(left_img, (IMG_WIDTH, IMG_HEIGHT))
        right_img = cv2.resize(right_img, (IMG_WIDTH, IMG_HEIGHT))
        combined_img = np.concatenate((left_img, right_img), axis=2)  # Concatenate along the channel axis

        # Apply transformations if any
        if self.transform:
            combined_img = self.transform(combined_img)

        # Convert label to tensor
        label = torch.tensor(self.labels[idx], dtype=torch.float32)
        return combined_img, label

# Transformations
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert to PyTorch tensor
    transforms.Normalize(mean=[0.5] * 6, std=[0.5] * 6)  # Normalize 6-channel input
])

In [8]:
train_dataset = FundusDataset(
    train_data['Left-Fundus'].tolist(),
    train_data['Right-Fundus'].tolist(),
    train_data['target'].tolist(),
    transform=transform
)
test_dataset = FundusDataset(
    test_data['Left-Fundus'].tolist(),
    test_data['Right-Fundus'].tolist(),
    test_data['target'].tolist(),
    transform=transform
)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)


In [9]:
class FundusCNN(nn.Module):
    def __init__(self, num_classes):
        super(FundusCNN, self).__init__()
        self.conv1 = nn.Conv2d(6, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.pool = nn.MaxPool2d(2, 2)
        self.flatten = nn.Flatten()

        # Calculate the flattened size dynamically
        self.fc1_input_size = self._get_flattened_size()
        self.fc1 = nn.Linear(self.fc1_input_size, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def _get_flattened_size(self):
        dummy_input = torch.zeros(1, 6, IMG_HEIGHT, IMG_WIDTH)  # Example input
        x = self.pool(nn.functional.relu(self.conv1(dummy_input)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        return int(np.prod(x.size()))

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = self.flatten(x)
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = FundusCNN(num_classes=labels.shape[1]).to(device)

In [11]:
# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCEWithLogitsLoss()

In [12]:
def train_model(model, train_loader, optimizer, criterion, device):
    model.train()
    for epoch in range(10):  # Number of epochs
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward + Optimize
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}")

In [13]:
def test_model(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            total_loss += loss.item()
            preds = torch.round(torch.sigmoid(outputs))
            correct_predictions += (preds == labels).all(axis=1).sum().item()
            total_samples += labels.size(0)

    avg_loss = total_loss / len(test_loader)
    accuracy = correct_predictions / total_samples
    print(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.4f}")
    return avg_loss, accuracy

In [14]:
train_model(model, train_loader, optimizer, criterion, device)
print("\nTesting the model on test data...")
test_loss, test_accuracy = test_model(model, test_loader, criterion, device)

Epoch 1, Loss: 0.34432710241526365
Epoch 2, Loss: 0.287399728409946
Epoch 3, Loss: 0.2781493427231908
Epoch 4, Loss: 0.26793810185045003
Epoch 5, Loss: 0.260829190351069
Epoch 6, Loss: 0.24905990529805422
Epoch 7, Loss: 0.24293717946857213
Epoch 8, Loss: 0.2309240188449621
Epoch 9, Loss: 0.22767015937715768
Epoch 10, Loss: 0.21496192179620266

Testing the model on test data...
Test Loss: 0.2882, Test Accuracy: 0.3972


In [14]:
class DeeperCNN(nn.Module):
    def __init__(self, num_classes):
        super(DeeperCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(6, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 28 * 28, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = DeeperCNN(num_classes=labels.shape[1]).to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_model(model, train_loader, optimizer, criterion, device)
test_model(model, test_loader, criterion, device)


Epoch 1, Loss: 0.3121997078880668
Epoch 2, Loss: 0.2930171752348542
Epoch 3, Loss: 0.2841109404340386
Epoch 4, Loss: 0.27648793971166014
Epoch 5, Loss: 0.27058696998283266
Epoch 6, Loss: 0.25906306179240346
Epoch 7, Loss: 0.2496647596359253
Epoch 8, Loss: 0.2366360009647906
Epoch 9, Loss: 0.22007160931825637
Epoch 10, Loss: 0.20899307113140822
Test Loss: 0.2597, Test Accuracy: 0.2987


(0.25967072434723376, 0.2986708365910868)

In [None]:
import torch

torch.cuda.empty_cache()

torch.cuda.reset_accumulated_memory_stats()
torch.cuda.reset_peak_memory_stats()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_max_memory_cached()

print("GPU memory cleared!")


In [15]:
from torchvision.models import resnet18, ResNet18_Weights
import torch.nn as nn
import torch.optim as optim

#weights = ResNet18_Weights.IMAGENET1K_V1  
weights=ResNet18_Weights.DEFAULT
model = resnet18(weights=weights)

model.conv1 = nn.Conv2d(6, 64, kernel_size=7, stride=2, padding=3, bias=False)  # Adjust for 6 input channels
model.fc = nn.Linear(model.fc.in_features, labels.shape[1])  # Adjust output for the number of classes


model = model.to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_model(model, train_loader, optimizer, criterion, device)
test_model(model, test_loader, criterion, device)


Epoch 1, Loss: 0.3069136343896389
Epoch 2, Loss: 0.28414420792832973
Epoch 3, Loss: 0.2710399144329131
Epoch 4, Loss: 0.26306209033355116
Epoch 5, Loss: 0.2545126880519092
Epoch 6, Loss: 0.247240727301687
Epoch 7, Loss: 0.23901394717395305
Epoch 8, Loss: 0.22515542106702924
Epoch 9, Loss: 0.208184705208987
Epoch 10, Loss: 0.18488518092781306
Test Loss: 0.2768, Test Accuracy: 0.4996


(0.27677437514066694, 0.49960906958561374)

In [19]:
def train_model(model, train_loader, optimizer, criterion, device):
    model.train()
    for epoch in range(20):  
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward + Optimize
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}")
from torchvision.models import resnet18, ResNet18_Weights
import torch.nn as nn
import torch.optim as optim

#weights = ResNet18_Weights.IMAGENET1K_V1  
weights=ResNet18_Weights.DEFAULT
model = resnet18(weights=weights)

model.conv1 = nn.Conv2d(6, 64, kernel_size=7, stride=2, padding=3, bias=False)  # Adjust for 6 input channels
model.fc = nn.Linear(model.fc.in_features, labels.shape[1])  # Adjust output for the number of classes


model = model.to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_model(model, train_loader, optimizer, criterion, device)
test_model(model, test_loader, criterion, device)


Epoch 1, Loss: 0.30285149198025463
Epoch 2, Loss: 0.27429342502728105
Epoch 3, Loss: 0.2642074725590646
Epoch 4, Loss: 0.25768773378804327
Epoch 5, Loss: 0.24753322498872876
Epoch 6, Loss: 0.23777932738885282
Epoch 7, Loss: 0.222606410831213
Epoch 8, Loss: 0.20503963716328144
Epoch 9, Loss: 0.18162675183266402
Epoch 10, Loss: 0.15661577391438186
Epoch 11, Loss: 0.1363362907897681
Epoch 12, Loss: 0.11960528686176986
Epoch 13, Loss: 0.10484163574874401
Epoch 14, Loss: 0.09567877377849072
Epoch 15, Loss: 0.08764436475466937
Epoch 16, Loss: 0.08058427629293874
Epoch 17, Loss: 0.07104553657118232
Epoch 18, Loss: 0.06579594750655815
Epoch 19, Loss: 0.0588947803596966
Epoch 20, Loss: 0.0557667943648994
Test Loss: 0.4041, Test Accuracy: 0.6106


(0.40413015596568586, 0.6106333072713057)

In [14]:
def train_model(model, train_loader, optimizer, criterion, device):
    model.train()
    for epoch in range(50):  
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward + Optimize
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}")
        
from torchvision.models import resnet18, ResNet18_Weights
import torch.nn as nn
import torch.optim as optim

train_loader = DataLoader(train_dataset, batch_size=32, num_workers=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, num_workers=8, shuffle=False)

#weights = ResNet18_Weights.IMAGENET1K_V1  
weights=ResNet18_Weights.DEFAULT
model = resnet18(weights=weights)

model.conv1 = nn.Conv2d(6, 64, kernel_size=7, stride=2, padding=3, bias=False)  # Adjust for 6 input channels
model.fc = nn.Linear(model.fc.in_features, labels.shape[1])  # Adjust output for the number of classes


model = model.to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_model(model, train_loader, optimizer, criterion, device)
test_model(model, test_loader, criterion, device)


Epoch 1, Loss: 0.3088725393638015
Epoch 2, Loss: 0.2852587480098009
Epoch 3, Loss: 0.2717562666162848
Epoch 4, Loss: 0.26330678081139924
Epoch 5, Loss: 0.2546147517859936
Epoch 6, Loss: 0.24655858837068081
Epoch 7, Loss: 0.23700620606541634
Epoch 8, Loss: 0.22391622764989733
Epoch 9, Loss: 0.20341461785137654
Epoch 10, Loss: 0.1803715586196631
Epoch 11, Loss: 0.15449490454047918
Epoch 12, Loss: 0.13474518165457994
Epoch 13, Loss: 0.11722873304970563
Epoch 14, Loss: 0.10039609172381461
Epoch 15, Loss: 0.08879498860333115
Epoch 16, Loss: 0.08236858196323738
Epoch 17, Loss: 0.0751825044164434
Epoch 18, Loss: 0.06541310301981866
Epoch 19, Loss: 0.05990673466585576
Epoch 20, Loss: 0.056420024135150015
Epoch 21, Loss: 0.05113081146264449
Epoch 22, Loss: 0.0488740139931906
Epoch 23, Loss: 0.046250163397053255
Epoch 24, Loss: 0.04393439340637997
Epoch 25, Loss: 0.043226481712190436
Epoch 26, Loss: 0.04213545227539726
Epoch 27, Loss: 0.04080969077767804
Epoch 28, Loss: 0.04086752092116512
Epoch

(0.6939314991235733, 0.6231430805316653)

In [24]:
from torchvision.models import resnet18
from torchvision import transforms


def train_model(model, train_loader, optimizer, criterion, device):
    model.train()
    for epoch in range(10):  
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}")

transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    transforms.RandomRotation(10),  # Randomly rotate the image
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),  # Random crop and resize
    transforms.Normalize(mean=[0.5] * 6, std=[0.5] * 6)  # Normalize 6-channel input
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5] * 6, std=[0.5] * 6)  # No augmentation for testing
])

train_dataset = FundusDataset(
    train_data['Left-Fundus'].tolist(),
    train_data['Right-Fundus'].tolist(),
    train_data['target'].tolist(),
    transform=transform_train
)
test_dataset = FundusDataset(
    test_data['Left-Fundus'].tolist(),
    test_data['Right-Fundus'].tolist(),
    test_data['target'].tolist(),
    transform=transform_test
)

train_loader = DataLoader(train_dataset, batch_size=32, num_workers=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, num_workers=8, shuffle=False)

model = resnet18(pretrained=True)
model.conv1 = nn.Conv2d(6, 64, kernel_size=7, stride=2, padding=3, bias=False)  # Adjust input channels to 6
model.fc = nn.Linear(model.fc.in_features, labels.shape[1])  # Adjust output classes
model = model.to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_model(model, train_loader, optimizer, criterion, device)

test_loss, test_accuracy = test_model(model, test_loader, criterion, device)


Epoch 1, Loss: 0.305388648994267
Epoch 2, Loss: 0.2893676109611988
Epoch 3, Loss: 0.27777685802429913
Epoch 4, Loss: 0.27069378094747665
Epoch 5, Loss: 0.26514550345018506
Epoch 6, Loss: 0.2609480190090835
Epoch 7, Loss: 0.2572805553674698
Epoch 8, Loss: 0.25450759632512926
Epoch 9, Loss: 0.2523126413114369
Epoch 10, Loss: 0.25011427691206334
Test Loss: 0.2797, Test Accuracy: 0.4550


In [17]:
from torchvision.models import resnet50, ResNet50_Weights
import torch.nn as nn
import torch.optim as optim

weights = ResNet50_Weights.DEFAULT
model = resnet50(weights=weights)

model.conv1 = nn.Conv2d(6, 64, kernel_size=7, stride=2, padding=3, bias=False)  # Adjust for 6 input channels
model.fc = nn.Linear(model.fc.in_features, labels.shape[1])  # Adjust output for the number of classes

model = model.to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_model(model, train_loader, optimizer, criterion, device)
test_model(model, test_loader, criterion, device)


Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /home/spandan/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:08<00:00, 12.2MB/s]


Epoch 1, Loss: 0.30010922681540253
Epoch 2, Loss: 0.2792366216890514
Epoch 3, Loss: 0.2634122113697231
Epoch 4, Loss: 0.25498043056577446
Epoch 5, Loss: 0.24627583706751466
Epoch 6, Loss: 0.23434898676350713
Epoch 7, Loss: 0.22154965419322253
Epoch 8, Loss: 0.20625313641503454
Epoch 9, Loss: 0.1855567626655102
Epoch 10, Loss: 0.16920666061341763
Test Loss: 0.2521, Test Accuracy: 0.4957


(0.2521042112261057, 0.49569976544175137)

In [25]:
from sklearn.ensemble import RandomForestClassifier
import torch.nn as nn
import torch
from torchvision.models import resnet18, ResNet18_Weights
import torch.optim as optim
# Load pretrained ResNet18 and adjust for 6 input channels
weights=ResNet18_Weights.DEFAULT
resnet = resnet18(weights=weights)
# Modify the first convolutional layer
resnet.conv1 = nn.Conv2d(
    6, 64, kernel_size=7, stride=2, padding=3, bias=False
)  # Change input channels to 6
resnet.fc = nn.Identity()  # Remove the classification head for feature extraction
resnet = resnet.to(device)

# Extract features function
def extract_features(loader):
    features, labels = [], []
    resnet.eval()
    with torch.no_grad():
        for inputs, lbls in loader:
            inputs = inputs.to(device)
            feat = resnet(inputs).cpu().numpy()
            features.append(feat)
            labels.append(lbls.numpy())
    return np.vstack(features), np.vstack(labels)

train_loader = DataLoader(train_dataset, batch_size=32, num_workers=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, num_workers=8, shuffle=False)
# Extract features for training and testing sets
train_features, train_labels = extract_features(train_loader)
test_features, test_labels = extract_features(test_loader)

# Train a Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(train_features, train_labels)

# Test the Random Forest classifier
predictions = clf.predict(test_features)
accuracy = (predictions == test_labels).mean()
print("Random Forest Accuracy with ResNet Features:", accuracy)


Random Forest Accuracy with ResNet Features: 0.8808639562157936


In [26]:
from torchvision.models import efficientnet_b0

model = efficientnet_b0(pretrained=True)
model.features[0][0] = nn.Conv2d(6, 32, kernel_size=3, stride=2, padding=1, bias=False)  # Adjust input channels
model.classifier[1] = nn.Linear(model.classifier[1].in_features, labels.shape[1])  # Adjust output classes
model = model.to(device)
train_loader = DataLoader(train_dataset, batch_size=32, num_workers=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, num_workers=8, shuffle=False)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_model(model, train_loader, optimizer, criterion, device)
test_model(model, test_loader, criterion, device)




Epoch 1, Loss: 0.2894620593637228
Epoch 2, Loss: 0.2562329418957233
Epoch 3, Loss: 0.24414443271234632
Epoch 4, Loss: 0.23514638980850577
Epoch 5, Loss: 0.22638313770294188
Epoch 6, Loss: 0.2196933708153665
Epoch 7, Loss: 0.21165706971660256
Epoch 8, Loss: 0.20746300062164663
Epoch 9, Loss: 0.19929902590811252
Epoch 10, Loss: 0.19423500169068575
Test Loss: 0.2531, Test Accuracy: 0.4934


(0.2530756704509258, 0.49335418295543393)

In [1]:
import os
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

from torchvision import transforms
from transformers import ViTForImageClassification, ViTConfig

from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

file_path = "data/full_df.csv"
training_images_dir = "data/ODIR-5K/ODIR-5K/Training Images"

data = pd.read_csv(file_path)

data['Left-Fundus'] = data['Left-Fundus'].apply(lambda x: os.path.join(training_images_dir, os.path.basename(x)))
data['Right-Fundus'] = data['Right-Fundus'].apply(lambda x: os.path.join(training_images_dir, os.path.basename(x)))

data['target'] = data['target'].apply(lambda x: list(map(int, x.strip("[]").split(", "))))
labels = torch.tensor(data['target'].tolist())

class FundusDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        left_img_path = self.df.iloc[idx]['Left-Fundus']
        right_img_path = self.df.iloc[idx]['Right-Fundus']
        label = torch.tensor(self.df.iloc[idx]['target'])

        try:
            left_img = Image.open(left_img_path).convert("RGB")
        except:
            left_img = Image.new("RGB", (224, 224))

        try:
            right_img = Image.open(right_img_path).convert("RGB")
        except:
            right_img = Image.new("RGB", (224, 224))

        if self.transform:
            left_img = self.transform(left_img)
            right_img = self.transform(right_img)

        # Concatenate left and right images along the channel dimension
        combined_img = torch.cat([left_img, right_img], dim=0)

        return combined_img, label

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

train_split = int(0.8 * len(data))
train_data = data[:train_split]
test_data  = data[train_split:]

train_dataset = FundusDataset(train_data, transform=transform)
test_dataset  = FundusDataset(test_data, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True,  num_workers=8)
test_loader  = DataLoader(test_dataset,  batch_size=32, shuffle=False, num_workers=8)

config = ViTConfig.from_pretrained('google/vit-base-patch16-224')
config.num_labels = labels.shape[1]
config.image_size = 224  
config.num_channels = 6  

model = ViTForImageClassification.from_pretrained(
    'google/vit-base-patch16-224',
    config=config,
    ignore_mismatched_sizes=True
)

model.vit.embeddings.patch_embeddings.projection = nn.Conv2d(
    in_channels=6,
    out_channels=768,
    kernel_size=16,
    stride=16,
    bias=False
)

model = model.to(device)


criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


def train_model(model, train_loader, optimizer, criterion, device, epochs=10):

    model.train()
    
    for epoch in range(epochs):
        running_loss = 0.0
        
        all_preds = []
        all_labels = []
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            
            outputs = model(inputs).logits
            loss = criterion(outputs, labels.float())
            
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            
            # Convert logits to 0/1 predictions
            preds = (torch.sigmoid(outputs) > 0.5).float()
            all_preds.append(preds.detach().cpu())
            all_labels.append(labels.detach().cpu())
        
        epoch_loss = running_loss / len(train_loader)
        
        all_preds  = torch.cat(all_preds, dim=0).numpy()
        all_labels = torch.cat(all_labels, dim=0).numpy()
        
        precision_micro = precision_score(all_labels, all_preds, average='micro', zero_division=0)
        precision_macro = precision_score(all_labels, all_preds, average='macro', zero_division=0)
        recall_micro    = recall_score(all_labels, all_preds, average='micro', zero_division=0)
        recall_macro    = recall_score(all_labels, all_preds, average='macro', zero_division=0)
        f1_micro        = f1_score(all_labels, all_preds, average='micro', zero_division=0)
        f1_macro        = f1_score(all_labels, all_preds, average='macro', zero_division=0)
        
        # Exact match ratio (sample-level accuracy for all labels)
        exact_matches = np.all(all_labels == all_preds, axis=1).sum()
        exact_match_ratio = exact_matches / all_labels.shape[0]
        
        # Label-based accuracy (micro-style): how many individual labels are correct
        correct_labels = (all_preds == all_labels).sum()
        total_labels   = all_labels.size  # number of elements in all_labels
        label_accuracy = correct_labels / total_labels
        
        # Print training metrics for the epoch
        print(f"\nEpoch [{epoch+1}/{epochs}]")
        print(f"  Training Loss: {epoch_loss:.4f}")
        print(f"  Precision (Micro/Macro): {precision_micro:.4f} / {precision_macro:.4f}")
        print(f"  Recall (Micro/Macro):    {recall_micro:.4f} / {recall_macro:.4f}")
        print(f"  F1-score (Micro/Macro):  {f1_micro:.4f} / {f1_macro:.4f}")
        print(f"  Exact Match Ratio:       {exact_match_ratio:.4f}")
        print(f"  Label-based Accuracy:    {label_accuracy:.4f}")


def test_model(model, test_loader, criterion, device):

    model.eval()
    total_loss = 0.0
    
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs).logits
            loss = criterion(outputs, labels.float())
            total_loss += loss.item()
            
            preds = (torch.sigmoid(outputs) > 0.5).float()
            all_preds.append(preds.detach().cpu())
            all_labels.append(labels.detach().cpu())
    
    avg_loss = total_loss / len(test_loader)
    
    all_preds  = torch.cat(all_preds, dim=0).numpy()
    all_labels = torch.cat(all_labels, dim=0).numpy()
    
    precision_micro = precision_score(all_labels, all_preds, average='micro', zero_division=0)
    precision_macro = precision_score(all_labels, all_preds, average='macro', zero_division=0)
    recall_micro    = recall_score(all_labels, all_preds, average='micro', zero_division=0)
    recall_macro    = recall_score(all_labels, all_preds, average='macro', zero_division=0)
    f1_micro        = f1_score(all_labels, all_preds, average='micro', zero_division=0)
    f1_macro        = f1_score(all_labels, all_preds, average='macro', zero_division=0)
    
    exact_matches = np.all(all_labels == all_preds, axis=1).sum()
    exact_match_ratio = exact_matches / all_labels.shape[0]
    
    correct_labels = (all_preds == all_labels).sum()
    total_labels   = all_labels.size
    label_accuracy = correct_labels / total_labels
    
    per_label_precision = precision_score(all_labels, all_preds, average=None, zero_division=0)
    per_label_recall    = recall_score(all_labels, all_preds, average=None, zero_division=0)
    
    print("\nTest Set Evaluation:")
    print(f"  Test Loss: {avg_loss:.4f}")
    print(f"  Precision (Micro/Macro): {precision_micro:.4f} / {precision_macro:.4f}")
    print(f"  Recall (Micro/Macro):    {recall_micro:.4f} / {recall_macro:.4f}")
    print(f"  F1-score (Micro/Macro):  {f1_micro:.4f} / {f1_macro:.4f}")
    print(f"  Exact Match Ratio:       {exact_match_ratio:.4f}")
    print(f"  Label-based Accuracy:    {label_accuracy:.4f}")
    print(f"  Per-label Precision:     {per_label_precision}")
    print(f"  Per-label Recall:        {per_label_recall}")


train_model(model, train_loader, optimizer, criterion, device, epochs=10)
test_model(model, test_loader, criterion, device)


2025-01-26 21:16:10.614386: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1737906370.637228  187327 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1737906370.651472  187327 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-26 21:16:10.717136: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and a


Epoch [1/10]
  Training Loss: 0.3299
  Precision (Micro/Macro): 0.3208 / 0.1073
  Recall (Micro/Macro):    0.0434 / 0.0168
  F1-score (Micro/Macro):  0.0765 / 0.0276
  Exact Match Ratio:       0.0377
  Label-based Accuracy:    0.8689

Epoch [2/10]
  Training Loss: 0.3199
  Precision (Micro/Macro): 0.4545 / 0.0568
  Recall (Micro/Macro):    0.0196 / 0.0059
  F1-score (Micro/Macro):  0.0375 / 0.0106
  Exact Match Ratio:       0.0196
  Label-based Accuracy:    0.8745

Epoch [3/10]
  Training Loss: 0.3175
  Precision (Micro/Macro): 0.4484 / 0.0560
  Recall (Micro/Macro):    0.0348 / 0.0105
  F1-score (Micro/Macro):  0.0646 / 0.0176
  Exact Match Ratio:       0.0348
  Label-based Accuracy:    0.8740

Epoch [4/10]
  Training Loss: 0.3138
  Precision (Micro/Macro): 0.4476 / 0.0560
  Recall (Micro/Macro):    0.0334 / 0.0100
  F1-score (Micro/Macro):  0.0622 / 0.0170
  Exact Match Ratio:       0.0334
  Label-based Accuracy:    0.8740

Epoch [5/10]
  Training Loss: 0.3135
  Precision (Micro/Mac

In [13]:
import os
import cv2
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torchvision.models import resnet18
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# Paths and dataset
file_path = "data/full_df.csv"  # Path to your CSV file
training_images_dir = "data/ODIR-5K/ODIR-5K/Training Images"
testing_images_dir = "data/ODIR-5K/ODIR-5K/Testing Images"

# Load dataset
data = pd.read_csv(file_path)

# Parse 'target' column into one-hot encoded labels
data['target'] = data['target'].apply(lambda x: list(map(int, x.strip("[]").split(", "))))
labels = np.array(data['target'].tolist())

# Assign image paths
data['Left-Fundus'] = data['Left-Fundus'].apply(lambda x: os.path.join(training_images_dir, os.path.basename(x)))
data['Right-Fundus'] = data['Right-Fundus'].apply(lambda x: os.path.join(training_images_dir, os.path.basename(x)))

# Group images by PatientID (Add a unique identifier if not present)
if 'PatientID' not in data.columns:
    data['PatientID'] = data.index // 2  # Assuming two images per patient

# Filter patients with valid images
valid_patients = []
for patient_id in data['PatientID'].unique():
    patient_data = data[data['PatientID'] == patient_id]
    valid = True
    for _, row in patient_data.iterrows():
        left_img_path = row['Left-Fundus']
        right_img_path = row['Right-Fundus']
        if not os.path.exists(left_img_path) or not os.path.exists(right_img_path):
            valid = False
            break
    if valid:
        valid_patients.append(patient_id)

data = data[data['PatientID'].isin(valid_patients)]
print(f"Number of valid patients: {len(valid_patients)}")

# Define image size and transformations
IMG_HEIGHT, IMG_WIDTH = 224, 224
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5] * 6, std=[0.5] * 6),
])

# Define custom dataset
class FundusSequenceDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform
        self.valid_patient_ids = self.data['PatientID'].unique()

    def __len__(self):
        return len(self.valid_patient_ids)

    def __getitem__(self, idx):
        patient_id = self.valid_patient_ids[idx]
        patient_data = self.data[self.data['PatientID'] == patient_id]
        images = []
        labels = None

        for _, row in patient_data.iterrows():
            left_img_path = row['Left-Fundus']
            right_img_path = row['Right-Fundus']

            # Load images
            left_img = cv2.imread(left_img_path)
            right_img = cv2.imread(right_img_path)

            if left_img is None or right_img is None:
                print(f"Skipping corrupted images: {left_img_path}, {right_img_path}")
                continue

            # Resize and concatenate
            left_img = cv2.resize(left_img, (IMG_WIDTH, IMG_HEIGHT))
            right_img = cv2.resize(right_img, (IMG_WIDTH, IMG_HEIGHT))
            combined_img = np.concatenate((left_img, right_img), axis=2)

            # Apply transformations
            if self.transform:
                combined_img = self.transform(combined_img)
            images.append(combined_img)

            # Use the same label for all images in the sequence
            labels = torch.tensor(row['target'], dtype=torch.float32)

        if not images:
            print(f"Skipping PatientID {patient_id} due to no valid images.")
            return None  # Mark invalid patient to skip later

        images = torch.stack(images)
        return images, labels


# Split dataset by PatientID
train_ids, test_ids = train_test_split(data['PatientID'].unique(), test_size=0.2, random_state=42)
train_data = data[data['PatientID'].isin(train_ids)]
test_data = data[data['PatientID'].isin(test_ids)]

train_dataset = FundusSequenceDataset(train_data, transform=transform)
test_dataset = FundusSequenceDataset(test_data, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)  # Sequence per batch
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# CNN Feature Extractor
class CNNFeatureExtractor(nn.Module):
    def __init__(self):
        super(CNNFeatureExtractor, self).__init__()
        self.resnet = resnet18(pretrained=True)
        self.resnet.conv1 = nn.Conv2d(6, 64, kernel_size=7, stride=2, padding=3, bias=False)  # Adjust for 6 channels
        nn.init.kaiming_normal_(self.resnet.conv1.weight[:, :3, :, :], mode="fan_out", nonlinearity="relu")
        nn.init.zeros_(self.resnet.conv1.weight[:, 3:, :, :])
        self.resnet.fc = nn.Identity()  # Remove classification head

    def forward(self, x):
        return self.resnet(x)

# CNN + RNN Model
class CNNRNNModel(nn.Module):
    def __init__(self, cnn_feature_size, rnn_hidden_size, num_classes):
        super(CNNRNNModel, self).__init__()
        self.cnn = CNNFeatureExtractor()
        self.rnn = nn.LSTM(cnn_feature_size, rnn_hidden_size, batch_first=True)
        self.fc = nn.Linear(rnn_hidden_size, num_classes)

    def forward(self, x):
        batch_size, seq_len, _, _, _ = x.size()
        x = x.view(batch_size * seq_len, 6, 224, 224)  # Flatten batch and sequence
        features = self.cnn(x)
        features = features.view(batch_size, seq_len, -1)  # Reshape for RNN
        _, (hidden, _) = self.rnn(features)
        return self.fc(hidden[-1])  # Use the last hidden state

# Model parameters
cnn_feature_size = 512  # ResNet18 outputs 512 features
rnn_hidden_size = 256
num_classes = labels.shape[1]

# Instantiate model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNNRNNModel(cnn_feature_size, rnn_hidden_size, num_classes).to(device)

# Loss and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
def train_model(model, train_loader, optimizer, criterion, device, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}")

# Testing loop
def test_model(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            total_loss += loss.item()
            preds = torch.round(torch.sigmoid(outputs))
            correct_predictions += (preds == labels).all(axis=1).sum().item()
            total_samples += labels.size(0)

    avg_loss = total_loss / len(test_loader)
    accuracy = correct_predictions / total_samples
    print(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.4f}")
    return avg_loss, accuracy

# Train and evaluate
train_model(model, train_loader, optimizer, criterion, device, num_epochs=10)
test_loss, test_accuracy = test_model(model, test_loader, criterion, device)


Number of valid patients: 3196




Epoch 1, Loss: 0.3068171671864246
Epoch 2, Loss: 0.30360736502611396
Epoch 3, Loss: 0.3037444114515992
Epoch 4, Loss: 0.3025095158071492
Epoch 5, Loss: 0.3032484085376321
Epoch 6, Loss: 0.3024349377486208
Epoch 7, Loss: 0.30247978627366917
Epoch 8, Loss: 0.3026329233647792
Epoch 9, Loss: 0.30207242571540377
Epoch 10, Loss: 0.30236809604826015
Test Loss: 0.2993, Test Accuracy: 0.0000


In [5]:
print(f"Training Patients: {len(train_data['PatientID'].unique())}")
print(f"Testing Patients: {len(test_data['PatientID'].unique())}")


Training Patients: 2556
Testing Patients: 640


In [6]:
missing_files = []
for path in data['Left-Fundus'].tolist() + data['Right-Fundus'].tolist():
    if not os.path.exists(path):
        missing_files.append(path)

print(f"Number of missing files: {len(missing_files)}")
if missing_files:
    print("Missing files:", missing_files[:10])  # Print a few missing file paths for debugging


Number of missing files: 0


In [9]:
for patient_id in data['PatientID'].unique():
    patient_data = data[data['PatientID'] == patient_id]
    left_images = patient_data['Left-Fundus'].tolist()
    right_images = patient_data['Right-Fundus'].tolist()
    print(f"Patient {patient_id}: {len(left_images)} left, {len(right_images)} right images")


Patient 0: 2 left, 2 right images
Patient 1: 2 left, 2 right images
Patient 2: 2 left, 2 right images
Patient 3: 2 left, 2 right images
Patient 4: 2 left, 2 right images
Patient 5: 2 left, 2 right images
Patient 6: 2 left, 2 right images
Patient 7: 2 left, 2 right images
Patient 8: 2 left, 2 right images
Patient 9: 2 left, 2 right images
Patient 10: 2 left, 2 right images
Patient 11: 2 left, 2 right images
Patient 12: 2 left, 2 right images
Patient 13: 2 left, 2 right images
Patient 14: 2 left, 2 right images
Patient 15: 2 left, 2 right images
Patient 16: 2 left, 2 right images
Patient 17: 2 left, 2 right images
Patient 18: 2 left, 2 right images
Patient 19: 2 left, 2 right images
Patient 20: 2 left, 2 right images
Patient 21: 2 left, 2 right images
Patient 22: 2 left, 2 right images
Patient 23: 2 left, 2 right images
Patient 24: 2 left, 2 right images
Patient 25: 2 left, 2 right images
Patient 26: 2 left, 2 right images
Patient 27: 2 left, 2 right images
Patient 28: 2 left, 2 right im

In [10]:
for i in range(len(train_dataset)):
    try:
        images, labels = train_dataset[i]
        print(f"Patient {i}: Sequence size {images.size()}, Labels: {labels}")
    except Exception as e:
        print(f"Error for Patient {i}: {e}")


Error for Patient 0: stack expects a non-empty TensorList
Patient 1: Sequence size torch.Size([2, 6, 224, 224]), Labels: tensor([0., 1., 0., 0., 0., 0., 0., 0.])
Patient 2: Sequence size torch.Size([2, 6, 224, 224]), Labels: tensor([0., 1., 0., 0., 0., 0., 0., 0.])
Patient 3: Sequence size torch.Size([2, 6, 224, 224]), Labels: tensor([1., 0., 0., 0., 0., 0., 0., 0.])
Patient 4: Sequence size torch.Size([2, 6, 224, 224]), Labels: tensor([1., 0., 0., 0., 0., 0., 0., 0.])
Patient 5: Sequence size torch.Size([2, 6, 224, 224]), Labels: tensor([0., 0., 0., 0., 0., 0., 1., 0.])
Patient 6: Sequence size torch.Size([2, 6, 224, 224]), Labels: tensor([0., 0., 0., 0., 0., 0., 0., 1.])
Patient 7: Sequence size torch.Size([2, 6, 224, 224]), Labels: tensor([0., 0., 0., 0., 0., 0., 0., 1.])
Patient 8: Sequence size torch.Size([2, 6, 224, 224]), Labels: tensor([0., 1., 0., 0., 0., 0., 0., 0.])
Patient 9: Sequence size torch.Size([2, 6, 224, 224]), Labels: tensor([0., 0., 0., 0., 0., 1., 0., 0.])
Patien

KeyboardInterrupt: 