In [None]:
import cv2 # Library for dealing with images, the biggest and the fastest, written in cpp, wrapped in python.
import os # Library for dealing wiht the operating system, getting folders, files, and much much more.
import numpy as np # Numerical Python, library for doing most of the mathematical operations needed in machine learning.
import torch
from torchvision import transforms

def load_dataset(data_dir, model):

    features = []
    labels = []

    classes_names = [entry.name for entry in os.scandir(data_dir) if entry.is_dir()]
    classes_names = sorted(classes_names)

    
    preprocess = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]) # preprocess is a function

    for class_index, class_name in enumerate(classes_names):
        class_path = os.path.join(data_dir,class_name)
        print(f"Loading Class {class_index}: {class_name}")


        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            
            try:
                img_np_arr = cv2.imread(img_path)

                if img_np_arr is None:
                    continue

                img_rgb = cv2.cvtColor(img_np_arr, cv2.COLOR_BGR2RGB)
                img_t = preprocess(img_rgb)
                input_batch = img_t.unsqueeze(0)

                with torch.no_grad():
                    output_tensor = model(input_batch)
                    feature_vector = output_tensor.flatten().cpu().numpy()
                
                features.append(feature_vector)
                labels.append(class_index)

            except Exception as e:
                print(f"Error loading {img_name}: {e}")

    return np.array(features), np.array(labels), classes_names
    


