In [None]:
import sys
import os

sys.path.append(os.path.abspath("../"))
import torch
from torchvision.models import resnet50
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import numpy as np
import os
from src.preprocessing.resize_crop import preprocess_image
from src.preprocessing.image_loader import load_images_from_folder

DATA_DIR = "../data/selected_5_classes"
FEATURES_DIR = "../data/features"
os.makedirs(FEATURES_DIR, exist_ok=True)

selected_classes = ["golden_retriever", "pug", "beagle", "german_shepherd", "chihuahua"]
images, labels, class_to_idx = load_images_from_folder(DATA_DIR, selected_classes)

class DogDataset(Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels
    def __len__(self):
        return len(self.images)
    def __getitem__(self, idx):
        img = preprocess_image(self.images[idx])
        label = self.labels[idx]
        return img, label

dataset = DogDataset(images, labels)
dataloader = DataLoader(dataset, batch_size=32, shuffle=False)

model = resnet50(pretrained=True)
model = torch.nn.Sequential(*(list(model.children())[:-1]))  # remove FC layer
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
model.eval()


features_list = []
labels_list = []
with torch.no_grad():
    for imgs, lbls in dataloader:
        imgs = imgs.to(device)
        feats = model(imgs).view(imgs.size(0), -1)
        features_list.append(feats.cpu().numpy())
        labels_list.extend(lbls.numpy())


X = np.concatenate(features_list, axis=0)
y = np.array(labels_list)
np.save(os.path.join(FEATURES_DIR, "X_features.npy"), X)
np.save(os.path.join(FEATURES_DIR, "y_labels.npy"), y)


import json
with open(os.path.join(FEATURES_DIR, "class_names.json"), "w") as f:
    json.dump(class_to_idx, f)

print(f"Saved features: {X.shape}, labels: {y.shape}")




Saved features: (849, 2048), labels: (849,)
