In [2]:
import torch
from torchvision import models, transforms
from PIL import Image
import os
import numpy as np
import pandas as pd
from tqdm import tqdm

In [3]:
# Set base directory
base_dir = r"F:\augmented_resized_V2"
splits = ["train", "val", "test"]

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("✅ Using device:", device)

✅ Using device: cuda


In [4]:
# Load pretrained ResNet50 (remove classification layer)
resnet = models.resnet50(pretrained=True)
model = torch.nn.Sequential(*list(resnet.children())[:-1])
model.to(device)
model.eval()



Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


In [5]:
# Define transform
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [6]:

# Loop through each split folder
for split in splits:
    image_dir = os.path.join(base_dir, split)
    output_csv = f"{split}_features_augmented_resized_V2.csv"

    all_features, all_labels, all_paths = [], [], []

    for label in sorted(os.listdir(image_dir)):
        class_path = os.path.join(image_dir, label)
        if not os.path.isdir(class_path):
            continue

        print(f"🔍 Processing {split} class {label}")
        for file_name in tqdm(os.listdir(class_path), desc=f"{split.upper()} Class {label}"):
            file_path = os.path.join(class_path, file_name)
            try:
                img = Image.open(file_path).convert('RGB')
                img_tensor = transform(img).unsqueeze(0).to(device)

                with torch.no_grad():
                    features = model(img_tensor).squeeze().cpu().numpy()

                all_features.append(features)
                all_labels.append(int(label))
                all_paths.append(file_path)

            except Exception as e:
                print(f"⚠️ Failed on {file_path} → {e}")

🔍 Processing train class 0


TRAIN Class 0: 100%|██████████| 55162/55162 [30:24<00:00, 30.23it/s]


🔍 Processing train class 1


TRAIN Class 1: 100%|██████████| 60079/60079 [35:14<00:00, 28.41it/s]


🔍 Processing val class 0


VAL Class 0: 100%|██████████| 6895/6895 [02:50<00:00, 40.51it/s]


🔍 Processing val class 1


VAL Class 1: 100%|██████████| 1840/1840 [00:44<00:00, 40.99it/s]


🔍 Processing test class 0


TEST Class 0: 100%|██████████| 6896/6896 [02:55<00:00, 39.28it/s]


🔍 Processing test class 1


TEST Class 1: 100%|██████████| 1862/1862 [00:45<00:00, 40.69it/s]


In [8]:
  # Save as CSV
df = pd.DataFrame(all_features)
df['label'] = all_labels
df['path'] = all_paths
df.to_csv(output_csv, index=False)
print(f"✅ Saved features to {output_csv}")

✅ Saved features to test_features_augmented_resized_V2.csv
