In [13]:
pip install torch torchvision pandas

Note: you may need to restart the kernel to use updated packages.


In [14]:
pip install tqdm

Note: you may need to restart the kernel to use updated packages.


In [1]:
import torch
from torchvision import models, transforms
from PIL import Image
import os
import numpy as np
import pandas as pd
from tqdm import tqdm

In [2]:
# Set your dataset directory and output file
image_dir = r"F:\Big_data_dataset\EAMDR_data\dr_unified_v2\train"
output_csv = "train_features_dr_unified.csv"

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cpu


In [None]:
# Load pretrained ResNet50 and remove the classification head
resnet = models.resnet50(pretrained=True)
model = torch.nn.Sequential(*list(resnet.children())[:-1])
model.to(device)
model.eval()
print("ResNet50 loaded")



ResNet50 loaded


In [4]:
# Image transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Required size for ResNet
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])
print("Transform ready")


Transform ready


In [19]:
# Actual feature extraction logic
all_features, all_labels, all_paths = [], [], []

for label in sorted(os.listdir(image_dir)):
    class_path = os.path.join(image_dir, label)
    if not os.path.isdir(class_path):
        continue

    print(f"Processing class {label}")
    for file_name in tqdm(os.listdir(class_path), desc=f"Class {label}"):
        file_path = os.path.join(class_path, file_name)
        try:
            img = Image.open(file_path).convert('RGB')
            img_tensor = transform(img).unsqueeze(0).to(device)

            with torch.no_grad():
                features = model(img_tensor).squeeze().cpu().numpy()

            all_features.append(features)
            all_labels.append(int(label))
            all_paths.append(file_path)

        except Exception as e:
            print(f"Failed on {file_path} → {e}")

🔍 Processing class 0


Class 0: 100%|██████████| 55162/55162 [36:48<00:00, 24.97it/s]


🔍 Processing class 1


Class 1: 100%|██████████| 55590/55590 [40:33<00:00, 22.85it/s]


🔍 Processing class 2


Class 2: 100%|██████████| 48475/48475 [36:47<00:00, 21.96it/s]


🔍 Processing class 3


Class 3: 100%|██████████| 42616/42616 [32:29<00:00, 21.86it/s]


🔍 Processing class 4


Class 4: 100%|██████████| 49715/49715 [40:17<00:00, 20.56it/s]


In [20]:
# Save features
df = pd.DataFrame(all_features)
df['label'] = all_labels
df['path'] = all_paths

df.to_csv(output_csv, index=False)
print(f"✅ Feature CSV saved to: {output_csv}")

✅ Feature CSV saved to: train_features_dr_unified.csv
