In [1]:
!pip install torch torchvision



In [2]:
import torch
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
import os
import numpy as np
from torch.utils.data import DataLoader, Dataset

In [3]:
def load_and_preprocess_image(img_path, transform):
    image = Image.open(img_path).convert("RGB")
    image = transform(image)
    return image

In [4]:
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [5]:
models_list = {
    'AlexNet': models.alexnet(pretrained=True),
    'VGG16': models.vgg16(pretrained=True),
    'ResNet50': models.resnet50(pretrained=True)
}



In [6]:
imageNet_to_coco_map = {
    2: 444, # bicycle
    38: 21, # kite
    24: 340, # zebra
    27: 414, # backpack
    76: 508, # keyboard
    73: 620, # laptop 
    78: 651, # microwave
    74: 673, # mouse
    15: 703, # bench
    14: 704, # parking meter
    82: 760, # refrigerator
    80: 859, # toaster
    28: 879, # umbrella
    86: 883, # vase
    10: 920, # traffic light
    56: 937, # broccoli
    55: 950, # orange
    52: 954, # banana
    59: 963, # pizza
    47: 968, # cup
}

def get_annotation(img_path):
    ann_path = f'{img_path.replace("images", "labels")[:-3]}txt'
    with open(ann_path, 'r') as file:
        classes = file.read().split(' ')
    return imageNet_to_coco_map.get(int(classes[0]))

In [7]:
def match(double_list, value):
    for item in double_list[0]:
        if value == item:
            return True
    return False

In [8]:
def predict_and_evaluate(models, img_dir, transform, device):
    results = {}
    
    img_files = [f for f in os.listdir(img_dir) if os.path.isfile(os.path.join(img_dir, f))]
    
    for model_name, model in models.items():
        model.to(device)
        model.eval()
        
        top1_correct = 0
        top5_correct = 0
        total = 0
        
        with torch.no_grad():
            for img_file in img_files:
                img_path = os.path.join(img_dir, img_file)
                image = load_and_preprocess_image(img_path, transform)
                image = image.unsqueeze(0).to(device)
                
                outputs = model(image)
                
                scores, top5_pred = outputs.topk(5, 1, True, True)
                top1_pred = top5_pred[:, 0]
                
                ground_truth = get_annotation(img_path)

                total += 1
                top1_correct += 1 if top1_pred.tolist()[0] == ground_truth else 0
                top5_correct += 1 if match(top5_pred.tolist(), ground_truth) else 0
        
        top1_acc = top1_correct / total
        top5_acc = top5_correct / total
        
        results[model_name] = {'Top-1 Accuracy': top1_acc, 'Top-5 Accuracy': top5_acc}
    
    return results

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

img_dir = 'dataset/images/'
results = predict_and_evaluate(models_list, img_dir, transform, device)

print(results)

{'AlexNet': {'Top-1 Accuracy': 0.62, 'Top-5 Accuracy': 0.76}, 'VGG16': {'Top-1 Accuracy': 0.74, 'Top-5 Accuracy': 0.86}, 'ResNet50': {'Top-1 Accuracy': 0.74, 'Top-5 Accuracy': 0.86}}
