# Lab 1

Dominykas Misius, 2213772

**Model:** ResNet50

**Dataset:** Open Images V7

**Classes:** Cat, Dog, Fruit

In [None]:
!pip install -q fiftyone

from google.colab import drive
import os
import json

# Mount Google Drive to persist data between sessions
drive.mount('/content/drive')

BASE_DIR = '/content/drive/MyDrive/GMM_Lab1'
LABELS_PATH = os.path.join(BASE_DIR, 'ground_truth_labels.json')

os.makedirs(BASE_DIR, exist_ok=True)
print(f"Base dir: {BASE_DIR}")

## 2. Data Download from OpenImages V7

Downloading 1000 validation images using FiftyOne. Labels are saved to a JSON file.

In [None]:
import fiftyone as fo
import fiftyone.zoo as foz

CHOSEN_CLASSES = ["Cat", "Dog", "Fruit"]

print("Loading dataset...")
dataset = foz.load_zoo_dataset(
    "open-images-v6",
    split="validation",
    max_samples=1000,
    classes=CHOSEN_CLASSES,
    label_types=["classifications"],
)

DATA_DIR = os.path.dirname(next(iter(dataset)).filepath)
print(f"Images at: {DATA_DIR}")

if os.path.exists(LABELS_PATH):
    print("Labels file found, loading from disk.")
    with open(LABELS_PATH, 'r') as f:
        ground_truth = json.load(f)
else:
    ground_truth = {}
    for sample in dataset:
        filename = os.path.basename(sample.filepath)
        pos_labels = set()
        if sample.positive_labels is not None:
            for cls in sample.positive_labels.classifications:
                if cls.label in CHOSEN_CLASSES:
                    pos_labels.add(cls.label)

        ground_truth[filename] = list(pos_labels)

    with open(LABELS_PATH, 'w') as f:
        json.dump(ground_truth, f, indent=2)
    print(f"Labels saved to: {LABELS_PATH}")

print(f"\nTotal images: {len(ground_truth)}")
for cls in CHOSEN_CLASSES:
    count = sum(1 for labels in ground_truth.values() if cls in labels)
    print(f"  {cls}: {count}")

## 3. Dataset Class and DataLoader

Standard ImageNet normalization is applied to every image.

In [None]:
import torch
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm.notebook import tqdm
import ipywidgets as widgets
from IPython.display import display

DEVICE = torch.device("cuda:0")

# Map OpenImages class names to corresponding ids
IMAGENET_MAPPING = {
    "Cat":   list(range(281, 286)),
    "Dog":   list(range(151, 269)),
    "Fruit": list(range(948, 958)),
}

IMAGENET_TRANSFORM = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


class OpenImagesDataset(Dataset):
    def __init__(self, data_dir, labels_dict, chosen_classes):
        self.image_paths = sorted([
            os.path.join(data_dir, f)
            for f in os.listdir(data_dir)
            if f.lower().endswith(('.jpg', '.jpeg', '.png'))
        ])
        self.labels_dict = labels_dict
        self.chosen_classes = chosen_classes
        print(f"Dataset: {len(self.image_paths)} images from {data_dir}")

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        try:
            image_tensor = IMAGENET_TRANSFORM(Image.open(img_path).convert("RGB"))
        except Exception as e:
            print(f"Error reading {img_path}: {e}")
            image_tensor = torch.zeros(3, 224, 224)

        filename = os.path.basename(img_path)
        gt_labels = self.labels_dict.get(filename, [])
        binary_labels = torch.tensor(
            [1.0 if cls in gt_labels else 0.0 for cls in self.chosen_classes],
            dtype=torch.float32
        )
        return image_tensor, binary_labels


val_dataset = OpenImagesDataset(
    data_dir=DATA_DIR,
    labels_dict=ground_truth,
    chosen_classes=list(IMAGENET_MAPPING.keys()),
)

val_loader = DataLoader(
    val_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=2,
    pin_memory=True,
)

print(f"DataLoader: {len(val_loader)} batches")

## 4. Model Loading and Inference

Load ResNet50, run all images through it, store the probabilities.

In [None]:
print("Loading pretrained ResNet50...")
model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT).to(DEVICE)
model.eval()

all_probs, all_labels = [], []
print(f"Running inference on {len(val_dataset)} images...")
with torch.no_grad():
    for images, labels in tqdm(val_loader, desc="Inference"):
        p = torch.softmax(model(images.to(DEVICE, non_blocking=True)), dim=1)
        all_probs.append(p.cpu().numpy())
        all_labels.append(labels.numpy())

probs = np.vstack(all_probs)
true_labels = np.vstack(all_labels)
print(f"Done. Probs: {probs.shape}, Labels: {true_labels.shape}")

## 5. Metrics with Variable Threshold

An image is marked positive if the models probability is at least T

- **Accuracy** — how many images were classified correctly
- **Precision** — of all images the model said were positive, how many actually were
- **Recall** — of all images that actually were positive, how many did the model find
- **F1** — balance between precision and recall

In [None]:
CLASS_NAMES = list(IMAGENET_MAPPING.keys())


def compute_and_display_metrics(threshold):
    print(f"Threshold T = {threshold:.2f}\n")

    for i, cls_name in enumerate(CLASS_NAMES):
        imagenet_ids = IMAGENET_MAPPING[cls_name]

        # Max probability across all matching ImageNet IDs
        class_probs = probs[:, imagenet_ids].max(axis=1)
        predicted = (class_probs >= threshold).astype(int)
        true_binary = true_labels[:, i].astype(int)

        acc  = accuracy_score(true_binary, predicted)
        prec = precision_score(true_binary, predicted, zero_division=0)
        rec  = recall_score(true_binary, predicted, zero_division=0)
        f1   = f1_score(true_binary, predicted, zero_division=0)

        print(f"  --- {cls_name} ---")
        print(f"  GT positives:  {true_binary.sum()}")
        print(f"  Predicted pos: {predicted.sum()}")
        print(f"  Accuracy:      {acc:.4f}")
        print(f"  Precision:     {prec:.4f}")
        print(f"  Recall:        {rec:.4f}")
        print(f"  F1:            {f1:.4f}\n")

widgets.interact(
    compute_and_display_metrics,
    threshold=widgets.FloatSlider(
        value=0.5, min=0.0, max=1.0, step=0.01,
        description='T:',
        style={'description_width': 'initial'},
        continuous_update=False,
    ),
)

## 6. Custom Images

In [None]:
import io
import urllib.request
from google.colab import files
from IPython.display import Image as IPImage

# Fetch ImageNet class names
_labels_url = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
with urllib.request.urlopen(_labels_url) as r:
    imagenet_class_names = [line.strip() for line in r.read().decode().splitlines()]

print("Upload an image:")
uploaded_files = files.upload()

if not uploaded_files:
    print("No images uploaded.")
else:
    for filename, file_bytes in uploaded_files.items():
        print(f"\n--- {filename} ---")
        display(IPImage(data=file_bytes, width=300))

        img_tensor = IMAGENET_TRANSFORM(
            Image.open(io.BytesIO(file_bytes)).convert("RGB")
        ).unsqueeze(0).to(DEVICE)

        with torch.no_grad():
            probs_single = torch.softmax(model(img_tensor), dim=1).cpu().numpy()[0]

        print("\n  Top-5 predictions:")
        for rank, idx in enumerate(probs_single.argsort()[::-1][:5], 1):
            print(f"    {rank}. {imagenet_class_names[idx]:<30s}  {probs_single[idx]:.4f}")

        print("\n  Our class probabilities:")
        for cls_name, imagenet_ids in IMAGENET_MAPPING.items():
            cls_prob = probs_single[imagenet_ids].max()
            best_id  = imagenet_ids[probs_single[imagenet_ids].argmax()]
            print(f"    {cls_name:<8s}  {cls_prob:.4f}  (best match: {imagenet_class_names[best_id]})")