PROBLEM 1: 

An image feature is simply interesting or meaningful areas in an image such as the corners of a house or the peaks of a mountain. They are the most useful areas in an image to give enough information to describe the image. An image feature vector is just the numerical representation of these image features placed in a vector.  Typically many of these interest points would have to be found using something like sift rather than manually looking through an image since some points are not as intuitive as say the corners of a house.

In [1]:
#PROBLEM 2

import os
import numpy as np
import torch
import torchvision
from torchvision import transforms
import cv2
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from PIL import Image

class ImageDatasetLoader:
    def __init__(self, img_dir):
        # three 2-class tasks
        self.tasks = [
            {
                'name': 'Dog vs Cat',
                'classes': {
                    'dog': 1,  # Class A
                    'cat': 2   # Class B
                }
            },
            {
                'name': 'Mango vs Banana',
                'classes': {
                    'mango': 3,  # Class C
                    'banana': 4  # Class D
                }
            },
            {
                'name': 'Goldfish vs Orca',
                'classes': {
                    'goldfish': 5,  # Class E
                    'orca': 6       # Class F
                }
            }
        ]
        self.img_dir = img_dir
    
    def load_datasets(self):
        """
        Load images for each 2-class task
        Returns a list of dictionaries, each containing:
        - task name
        - image paths
        - labels
        """
        all_task_datasets = []
        
        for task in self.tasks:
            image_paths = []
            labels = []
            
            #10 images for each class in the task
            for category, label in task['classes'].items():
                for i in range(10):
                    filename = f"{category}_{i}.jpg"
                    full_path = os.path.join(self.img_dir, filename)
                    
                    if os.path.exists(full_path):
                        image_paths.append(full_path)
                        labels.append(label)
                    else:
                        print(f"{filename} not found")
            
            task_dataset = {
                'name': task['name'],
                'image_paths': image_paths,
                'labels': labels
            }
            all_task_datasets.append(task_dataset)
        
        return all_task_datasets

#Load the datasets

img_dir = "image_set/"
dataset_loader = ImageDatasetLoader(img_dir)

task_datasets = dataset_loader.load_datasets()

# Making dataset copies so that they dont affect each other 
task_datasetsHAND = task_datasets
task_datasetsCNN = task_datasets
task_datasetsDINO = task_datasets

#Print
for task in task_datasets:
    print(f"\nTask: {task['name']}")
    print("Image Paths:", task['image_paths'])
    print("Labels:", task['labels'])
    print("Total Images:", len(task['image_paths']))


Task: Dog vs Cat
Image Paths: ['image_set/dog_0.jpg', 'image_set/dog_1.jpg', 'image_set/dog_2.jpg', 'image_set/dog_3.jpg', 'image_set/dog_4.jpg', 'image_set/dog_5.jpg', 'image_set/dog_6.jpg', 'image_set/dog_7.jpg', 'image_set/dog_8.jpg', 'image_set/dog_9.jpg', 'image_set/cat_0.jpg', 'image_set/cat_1.jpg', 'image_set/cat_2.jpg', 'image_set/cat_3.jpg', 'image_set/cat_4.jpg', 'image_set/cat_5.jpg', 'image_set/cat_6.jpg', 'image_set/cat_7.jpg', 'image_set/cat_8.jpg', 'image_set/cat_9.jpg']
Labels: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
Total Images: 20

Task: Mango vs Banana
Image Paths: ['image_set/mango_0.jpg', 'image_set/mango_1.jpg', 'image_set/mango_2.jpg', 'image_set/mango_3.jpg', 'image_set/mango_4.jpg', 'image_set/mango_5.jpg', 'image_set/mango_6.jpg', 'image_set/mango_7.jpg', 'image_set/mango_8.jpg', 'image_set/mango_9.jpg', 'image_set/banana_0.jpg', 'image_set/banana_1.jpg', 'image_set/banana_2.jpg', 'image_set/banana_3.jpg', 'image_set/banana_4.jpg', 'imag

Printed above is the array of each image in the dataset followed by an array with the class labels of each respective image. This is done for A vs B, C vs D, and E vs F.

Classes 1, 2, 3, 4, 5, and 6 correspond with A, B, C, D, E, F respectively where
A = dog, 
B = cat, 
C = mango, 
D = banana, 
E = goldfish, 
F = orca

In [2]:
#PROBLEM 3

# This code finds all the feature vectors using DINOv2.

import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image
import numpy as np

torch.cuda.empty_cache() 

class DinoV2FeatureExtractor:

    #initializing
    def __init__(self, model_name='dinov2_vits14'):

        try:
            self.model = torch.hub.load('facebookresearch/dinov2', model_name)
            self.model.eval()
        except Exception as e:
            print(f"Error loading DinoV2 model: {e}")
            raise
        
        # this is the standard transformation for DinoV2
        print("---")
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225]
            )
        ])
    
    def extract_features(self, image_path):

        # Extract features from a single image
        # Args:
        #     Path to the image file
        # Returns:
        #     Extracted feature vector

        try:
            # Open and transform the image
            image = Image.open(image_path).convert('RGB')
            input_tensor = self.transform(image).unsqueeze(0)
            
            #Extract features
            with torch.no_grad():
                features = self.model.get_intermediate_layers(input_tensor, n=1)[0]
                features = features.mean(1).squeeze()  
            
            return features.numpy()
        
        except Exception as e:
            print(f"failed for {image_path}: {e}")
            return np.zeros(768) 


#Extract DinoV2 features for all tasks

def extract_dinov2_features(task_datasetsDINO):

    #initializing feature vec
    extractor = DinoV2FeatureExtractor()
    
    task_features = []
    
    for task in task_datasetsDINO:
        #extracting features for tast
        features = []
        for image_path in task['image_paths']:
            task_feature = extractor.extract_features(image_path)
            features.append(task_feature)
        
        #creating a task feature dictionary
        task_feature_dict = {
            'name': task['name'],
            'features': np.array(features),
            'labels': task['labels']
        }
        
        task_features.append(task_feature_dict)
    
    return task_features

dinov2_task_features = extract_dinov2_features(task_datasetsDINO)

# Printing
for task_features in dinov2_task_features:
    print(f"\nTask: {task_features['name']}")
    print("Feature shape:", task_features['features'].shape)
    print("Labels shape:", len(task_features['labels']))
    
# The code below writes all the feature vectors (for all 60 images) to a txt file

with open('dinov2_features.txt', 'w') as file:
    for task_features in dinov2_task_features:
        file.write(f"\nTask: {task_features['name']}\n")
        file.write("Feature Vectors:\n")
        
        for i, (feature, label) in enumerate(zip(task_features['features'], task_features['labels'])):
            file.write(f"Image {i}:\n")
            file.write(f"  Label: {label}\n")
            file.write(f"  Feature Vector: {feature}\n")
            file.write(f"  Vector Length: {len(feature)}\n")
            file.write("\n")  


Using cache found in C:\Users\nihal/.cache\torch\hub\facebookresearch_dinov2_main


---

Task: Dog vs Cat
Feature shape: (20, 384)
Labels shape: 20

Task: Mango vs Banana
Feature shape: (20, 384)
Labels shape: 20

Task: Goldfish vs Orca
Feature shape: (20, 384)
Labels shape: 20


In [None]:
#PROBLEM 3

# This code finds all the feature vectors using a CNN method (ResNet).

import os
import numpy as np
import torch
import torchvision
from torchvision import transforms
from PIL import Image


torch.cuda.empty_cache()

class ResNetFeatureExtractor:

    #Initializing ResNet model for feature extraction
    def __init__(self, model_name='resnet18', pretrained=True):

        
        # choiosing model
        if model_name == 'resnet18':
            self.model = torchvision.models.resnet18(pretrained=pretrained)
        elif model_name == 'resnet50':
            self.model = torchvision.models.resnet50(pretrained=pretrained)
        else:
            raise ValueError("Unsupported ResNet model. Choose 'resnet18' or 'resnet50'.")
        
        #removing the classification layer
        self.model = torch.nn.Sequential(*list(self.model.children())[:-1])
        
        self.model.eval() #evaluation  mode
        
        # Changing to default resnet size
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),  
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],  
                std=[0.229, 0.224, 0.225]    
            )
        ])
    
    # Extracting feature vectors for given image paths
    def extract_features(self, image_paths):

        features = []
        
        with torch.no_grad():  
            for img_path in image_paths:
                # Open and transform the image
                img = Image.open(img_path).convert('RGB')
                img_tensor = self.transform(img).unsqueeze(0)  
                
                # Extract features
                feature = self.model(img_tensor)
                
                #convert to numpy
                feature_vector = feature.squeeze().numpy()
                features.append(feature_vector)
        
        return np.array(features)
    
#  Extracting features for all tasks in the dataset and save to a single file
def extract_features_for_tasks(task_datasetsCNN, model_name='resnet18', output_file='cnn_features.txt'):

    feature_extractor = ResNetFeatureExtractor(model_name=model_name)
    
    # Writing all features to txt file
    with open(output_file, 'w') as f:
        for task in task_datasetsCNN:
            
            task['feature_vectors'] = feature_extractor.extract_features(task['image_paths'])
            print(f"\nTask: {task['name']}")
            print("Feature Vector Shape:", task['feature_vectors'].shape)
            
            f.write(f"Task: {task['name']}\n")
            f.write("Feature Vectors:\n")
            
            for i, (img_path, feature_vector, label) in enumerate(
                zip(task['image_paths'], task['feature_vectors'], task['labels'])
            ):
                f.write(f"Image {i}:\n")
                f.write(f"  Label: {label}\n")
                f.write("  Feature Vector: ")
                
                
                np.set_printoptions(precision=7, suppress=True, linewidth=np.inf)
                f.write(np.array2string(feature_vector, separator=', ') + "\n\n")
    
    
    return task_datasetsCNN

# Extract features
CNN_task_features = extract_features_for_tasks(task_datasetsCNN)

AttributeError: 'ResNetFeatureExtractor' object has no attribute 'model'

In [None]:
#PROBLEM 3
import glob
import cv2
import matplotlib.pyplot as plt


# Load the images
# images = glob.glob('image_set/*.jpg')
# print(f'Loaded {len(images)} images.')

# for fname in images:
#     img = cv2.imread(fname)
#     print('Processing image %s...' % fname)
#     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#     sift =cv2.SIFT_create()
#     keypoints = sift.detect(gray, None)
#     # Draw keypoints on the image
#     image_with_keypoints = cv2.drawKeypoints(img, keypoints, None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
#     plt.figure(figsize=(10, 5))
#     plt.title('SIFT Keypoints')
#     plt.imshow(cv2.cvtColor(image_with_keypoints, cv2.COLOR_BGR2RGB))
#     plt.show()



In [None]:
#PROBLEM 4

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

def classify_with_svm(task_features, dataset_type="generic"):
    """
    Train and evaluate SVM for each classification task using the given features and labels.
    Args:
        task_features (list): List of dictionaries containing task features and labels.
        dataset_type (str): Type of dataset ('DinoV2', 'CNN', or 'generic') to determine key structure.
    Returns:
        None
    """
    print(f"\nSVM Classification Results for {dataset_type} Features:")
    
    for task in task_features:
        print(f"\nTask: {task.get('name', 'Unknown Task')}")
        
        # Dynamically handle feature extraction based on dataset type
        if dataset_type == "DinoV2":
            features = task.get('features', None)
        elif dataset_type == "CNN":
            features = task.get('feature_vectors', None)
        else:
            features = task.get('features', task.get('feature_vectors', None))  # Generic handling
        
        if features is None:
            print("Error: Features not found in task data!")
            continue
        
        labels = task.get('labels', None)
        if labels is None:
            print("Error: Labels not found in task data!")
            continue
        
        # Standardize the features (important for SVM performance)
        scaler = StandardScaler()
        features = scaler.fit_transform(features)
        
        # Split into train and test sets
        X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)
        
        # Train the SVM
        svm = SVC(kernel='linear', random_state=42)
        svm.fit(X_train, y_train)
        
        # Predict on the test set
        y_pred = svm.predict(X_test)
        
        # Evaluate the model
        accuracy = accuracy_score(y_test, y_pred)
        print(f"Accuracy: {accuracy:.2f}")
        print("Classification Report:")
        print(classification_report(y_test, y_pred, target_names=[str(label) for label in set(labels)]))


# Call the function for DinoV2 features
print("DinoV2 Results:")
classify_with_svm(dinov2_task_features, dataset_type="DinoV2")

# Call the function for CNN features
print("\nCNN Results:")
classify_with_svm(CNN_task_features, dataset_type="CNN")



CNN Results:

SVM Classification Results for CNN Features:

Task: Dog vs Cat
Accuracy: 1.00
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         4
           2       1.00      1.00      1.00         2

    accuracy                           1.00         6
   macro avg       1.00      1.00      1.00         6
weighted avg       1.00      1.00      1.00         6


Task: Mango vs Banana
Accuracy: 1.00
Classification Report:
              precision    recall  f1-score   support

           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         2

    accuracy                           1.00         6
   macro avg       1.00      1.00      1.00         6
weighted avg       1.00      1.00      1.00         6


Task: Goldfish vs Orca
Accuracy: 1.00
Classification Report:
              precision    recall  f1-score   support

           5       1.00      1.00      1.00         