In [1]:
import os
import glob
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from sklearn.preprocessing import MultiLabelBinarizer
import timm
from torchcam.methods import GradCAM
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [64]:
# ---------------- CONFIG ----------------
IMG_SIZE = 299
BATCH_SIZE = 8
EPOCHS = 10
LR = 1e-4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ROOT_DIR = r'D:/Kaif/Hackathon25/CDAC/1000images/1000images'
MODEL_SAVE_PATH = "best_fundus_efficientnetb3.pth"
CSV_OUTPUT_PATH = "fundus_predictions.csv"
CAM_OUTPUT_DIR = "cam_outputs"
os.makedirs(CAM_OUTPUT_DIR, exist_ok=True)

In [65]:
# ---------------- LABEL SETUP ----------------
class_names = sorted([d for d in os.listdir(ROOT_DIR) if os.path.isdir(os.path.join(ROOT_DIR, d))])
mlb = MultiLabelBinarizer(classes=class_names)
mlb.fit([class_names])

0,1,2
,classes,"['0.0.Normal', '0.1.Tessellated fundus', ...]"
,sparse_output,False


In [5]:
print(class_names)

['0.0.Normal', '0.1.Tessellated fundus', '0.2.Large optic cup', '0.3.DR1', '1.0.DR2', '1.1.DR3', '10.0.Possible glaucoma', '10.1.Optic atrophy', '11.Severe hypertensive retinopathy', '12.Disc swelling and elevation', '13.Dragged Disc', '14.Congenital disc abnormality', '15.0.Retinitis pigmentosa', '15.1.Bietti crystalline dystrophy', '16.Peripheral retinal degeneration and break', '17.Myelinated nerve fiber', '18.Vitreous particles', '19.Fundus neoplasm', '2.0.BRVO', '2.1.CRVO', '20.Massive hard exudates', '21.Yellow-white spots-flecks', '22.Cotton-wool spots', '23.Vessel tortuosity', '24.Chorioretinal atrophy-coloboma', '25.Preretinal hemorrhage', '26.Fibrosis', '27.Laser Spots', '28.Silicon oil in eye', '29.0.Blur fundus without PDR', '29.1.Blur fundus with suspected PDR', '3.RAO', '4.Rhegmatogenous RD', '5.0.CSCR', '5.1.VKH disease', '6.Maculopathy', '7.ERM', '8.MH', '9.Pathological myopia']


In [66]:
# ---------------- DATASET ----------------
class FundusDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.image_paths = []
        self.labels = []
        self.transform = transform
        for cls in class_names:
            folder = os.path.join(root_dir, cls)
            imgs = glob.glob(os.path.join(folder, "*.JPG"))
            self.image_paths.extend(imgs)
            self.labels.extend([[cls]] * len(imgs))
        self.encoded_labels = mlb.transform(self.labels)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        img = Image.open(img_path).convert("RGB")
        label = torch.FloatTensor(self.encoded_labels[idx])
        if self.transform:
            img = self.transform(img)
        return img, label, os.path.basename(img_path)

In [67]:
# ---------------- TRANSFORMS ----------------
train_tfms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3),
])

In [68]:
# ---------------- DATALOADERS ----------------
dataset = FundusDataset(ROOT_DIR, transform=train_tfms)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_set, val_set = random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE)

In [69]:
len(dataset)

997

In [70]:
# ---------------- MODEL ----------------
model = timm.create_model("efficientnet_b3", pretrained=True, num_classes=len(class_names))
model = model.to(DEVICE)

In [71]:
# ---------------- CAM ----------------
cam_extractor = GradCAM(model, target_layer="blocks.5")  # You can change this to "conv_head" if needed

In [72]:
# ---------------- TRAINING ----------------
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

In [73]:
def train_model():
    best_loss = float('inf')
    for epoch in range(EPOCHS):
        model.train()
        total_loss = 0
        for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}"):
            imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
            raw_outputs = model(imgs)
            outputs = torch.sigmoid(raw_outputs)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}, Train Loss: {total_loss:.4f}")

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
                raw_outputs = model(imgs)
                outputs = torch.sigmoid(raw_outputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
        print(f"Validation Loss: {val_loss:.4f}")
        if val_loss < best_loss:
            best_loss = val_loss
            torch.save(model.state_dict(), MODEL_SAVE_PATH)
            print(" Saved best model!")

In [74]:
# ---------------- PREDICTION + CSV ----------------
def export_predictions(model, dataloader, csv_path):
    model.eval()
    all_preds, all_labels = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(dataloader, desc="Exporting Predictions"):
            imgs = imgs.to(DEVICE)
            outputs = torch.sigmoid(model(imgs))
            preds = (outputs > 0.5).int().cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels.numpy())

    df_preds = pd.DataFrame(all_preds, columns=[f"Pred_{c}" for c in class_names])
    df_true = pd.DataFrame(all_labels, columns=[f"True_{c}" for c in class_names])
    df_combined = pd.concat([df_true, df_preds], axis=1)
    df_combined.to_csv(csv_path, index=False)
    print(f" Predictions saved to {csv_path}")

In [75]:
# ---------------- GRAD-CAM DISPLAY ----------------
def visualize_cam(image_tensor, class_idx):
    model.eval()
    image_tensor = image_tensor.unsqueeze(0).to(DEVICE)
    image_tensor.requires_grad_()  # ✅ Important for CAM

    scores = model(image_tensor)
    cam = cam_extractor(class_idx, scores)

    img_np = image_tensor.cpu().squeeze().permute(1, 2, 0).numpy()
    heatmap = cam[0].squeeze().cpu().numpy()

    fig, ax = plt.subplots(1, 2, figsize=(10, 4))
    ax[0].imshow((img_np * 0.5 + 0.5))
    ax[0].set_title("Original")
    ax[1].imshow((img_np * 0.5 + 0.5))
    ax[1].imshow(heatmap, cmap='jet', alpha=0.5)
    ax[1].set_title(f"Grad-CAM: {class_names[class_idx]}")
    plt.tight_layout()
    plt.show()

In [76]:
train_model()

Epoch 1/10:   0%|          | 0/100 [00:00<?, ?it/s]


ValueError: too many values to unpack (expected 2)

In [None]:
model.load_state_dict(torch.load(MODEL_SAVE_PATH))

In [None]:
export_predictions(model, val_loader, CSV_OUTPUT_PATH)

In [None]:
sample_img, sample_label, sample_name = dataset[0]
sample_label_bin = sample_label.int().tolist()
visualize_cam(sample_img, sample_label_bin, filename=sample_name)

In [2]:
import os
import glob
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from sklearn.preprocessing import MultiLabelBinarizer
import timm
from torchcam.methods import GradCAM
import matplotlib.pyplot as plt

# ---------------- CONFIG ----------------
IMG_SIZE = 299
BATCH_SIZE = 8
EPOCHS = 5
LR = 1e-4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ROOT_DIR = r'D:/Kaif/Hackathon25/CDAC/1000images/1000images'
MODEL_SAVE_PATH = "best_fundus_efficientnetb3.pth"
CSV_OUTPUT_PATH = "fundus_predictions.csv"

# ---------------- LABEL SETUP ----------------
class_names = sorted([d for d in os.listdir(ROOT_DIR) if os.path.isdir(os.path.join(ROOT_DIR, d))])
mlb = MultiLabelBinarizer()
mlb.fit([class_names])  # Fixed fitting: use list of class names

# ---------------- TRANSFORMS ----------------
# Separate transforms for train/val
train_tfms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3),
])

val_tfms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3),
])

# ---------------- DATASET ----------------
class FundusDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.image_paths = []
        self.labels = []
        self.transform = transform
        
        for cls in class_names:
            folder = os.path.join(root_dir, cls)
            # Include both .JPG and .jpg extensions
            imgs = glob.glob(os.path.join(folder, "*.JPG")) + glob.glob(os.path.join(folder, "*.jpg"))
            self.image_paths.extend(imgs)
            self.labels.extend([[cls]] * len(imgs))
        
        self.encoded_labels = mlb.transform(self.labels)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx]).convert("RGB")
        label = torch.FloatTensor(self.encoded_labels[idx])
        if self.transform:
            img = self.transform(img)
        return img, label

# ---------------- DATALOADERS ----------------
dataset = FundusDataset(ROOT_DIR, transform=None)  # No initial transform
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_set, val_set = random_split(dataset, [train_size, val_size])

# Apply transforms to subsets
train_set.dataset.transform = train_tfms  # Apply training transforms to train set
val_set.dataset.transform = val_tfms      # Apply validation transforms to val set

train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE)

# ---------------- MODEL ----------------
model = timm.create_model("efficientnet_b3", pretrained=True, num_classes=len(class_names))
model = model.to(DEVICE)

# ---------------- CAM ----------------
cam_extractor = GradCAM(model, target_layer="conv_head")  # More effective target layer

# ---------------- TRAINING ----------------
criterion = nn.BCEWithLogitsLoss()  # Better numerical stability
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

def train_model():
    best_loss = float('inf')
    for epoch in range(EPOCHS):
        model.train()
        total_loss = 0
        for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}"):
            imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
            
            outputs = model(imgs)  # Raw logits
            loss = criterion(outputs, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        avg_train_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}")

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
                outputs = model(imgs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
        
        avg_val_loss = val_loss / len(val_loader)
        print(f"Validation Loss: {avg_val_loss:.4f}")
        
        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            torch.save(model.state_dict(), MODEL_SAVE_PATH)
            print("✅ Saved best model!")

# ---------------- PREDICTION + CSV ----------------
def export_predictions(model, dataloader, csv_path):
    model.eval()
    all_preds = []
    all_labels = []
    all_paths = []

    with torch.no_grad():
        for imgs, labels in tqdm(dataloader, desc="Exporting Predictions"):
            imgs = imgs.to(DEVICE)
            outputs = model(imgs)
            probs = torch.sigmoid(outputs).cpu().numpy()
            all_preds.extend(probs)
            all_labels.extend(labels.numpy())
            
            # Track image paths for reference
            batch_indices = dataloader.dataset.indices  # Get indices for current batch
            for idx in batch_indices:
                all_paths.append(dataset.image_paths[idx])

    # Create comprehensive DataFrame
    pred_cols = [f"Pred_{c}" for c in class_names]
    true_cols = [f"True_{c}" for c in class_names]
    
    df_preds = pd.DataFrame(all_preds, columns=pred_cols)
    df_true = pd.DataFrame(all_labels, columns=true_cols)
    df_paths = pd.DataFrame({"Image_Path": all_paths})
    
    df_combined = pd.concat([df_paths, df_true, df_preds], axis=1)
    df_combined.to_csv(csv_path, index=False)
    print(f"✅ Predictions saved to {csv_path}")

# ---------------- GRAD-CAM DISPLAY ----------------
def visualize_cam(image_tensor, class_idx):
    model.eval()
    image_tensor = image_tensor.unsqueeze(0).to(DEVICE)
    
    # Generate CAM
    with torch.no_grad():
        scores = model(image_tensor)
    
    # Use class_idx for CAM extraction
    cam = cam_extractor(class_idx, scores)[0].squeeze().cpu().numpy()
    
    # Process image
    img_np = image_tensor.cpu().squeeze().permute(1, 2, 0).numpy()
    img_np = (img_np * 0.5) + 0.5  # Reverse normalization
    
    # Visualize
    fig, ax = plt.subplots(1, 2, figsize=(10, 4))
    ax[0].imshow(img_np)
    ax[0].set_title("Original")
    ax[0].axis('off')
    
    ax[1].imshow(img_np)
    ax[1].imshow(cam, cmap='jet', alpha=0.5)
    ax[1].set_title(f"Grad-CAM: {class_names[class_idx]}")
    ax[1].axis('off')
    
    plt.tight_layout()
    plt.show()


In [3]:
print(f"Using device: {DEVICE}")
print(f"Found {len(dataset)} images across {len(class_names)} classes")

print("Starting training...")
train_model()

Using device: cpu
Found 1994 images across 39 classes
Starting training...


Epoch 1/5: 100%|██████████| 200/200 [25:06<00:00,  7.53s/it]


Epoch 1, Train Loss: 0.1538


RuntimeError: cannot register a hook on a tensor that doesn't require gradient

In [None]:
print("Loading best model for prediction & CAM...")
model.load_state_dict(torch.load(MODEL_SAVE_PATH, map_location=DEVICE))
export_predictions(model, val_loader, CSV_OUTPUT_PATH)

In [None]:
print("Visualizing Grad-CAM on one sample...")
# Get a sample from validation set
sample_idx = val_set.indices[0]
sample_img, sample_label = dataset[sample_idx]
class_to_vis = torch.argmax(sample_label).item()
visualize_cam(sample_img, class_to_vis)