Hubertas Vindžigalskis, LSP: 2213817, ["Traffic light", "Sandal", "Castle"]

# Pasiruošimas


In [None]:
!pip install openimages
import os, glob
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from openimages.download import download_dataset
from PIL import Image
from google.colab import drive

In [None]:
drive.mount('/content/drive')

DATA_ROOT = "/content/drive/MyDrive/OpenImages"
SAMPLE_LIMIT = 380
TARGET_LABELS = ["Traffic light", "Sandal", "Strawberry"]

def dataset_exists(root, labels):
    return all(os.path.exists(os.path.join(root, lbl.lower())) for lbl in labels)

if not dataset_exists(DATA_ROOT, TARGET_LABELS):
    download_dataset(DATA_ROOT, TARGET_LABELS, limit=SAMPLE_LIMIT)
else:
    print("Images already downloaded for all classes, skipping download.")

# Procesoriaus ir modelio paruošimas

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

model = models.vgg19(pretrained=True).to(device)
model.eval()

# Dataset paruošimas

In [None]:
class_dirs = glob.glob(os.path.join(DATA_ROOT, '*'))
folder_names = [os.path.basename(folder) for folder in class_dirs]
file_paths = [glob.glob(os.path.join(folder, "images", "*")) for folder in class_dirs]
all_files = [fp for sublist in file_paths for fp in sublist]

idx_to_class = {i: name for i, name in enumerate(folder_names)}
class_to_idx = {name: i for i, name in idx_to_class.items()}
print(idx_to_class)
print(class_to_idx)

In [6]:
class Dataset(Dataset):
    def __init__(self, files, transform):
        self.files = files
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, index):
        fpath = self.files[index]
        img = Image.open(fpath)
        if img.mode != 'RGB':
            img = img.convert('RGB')
        img = self.transform(img)

        # Extract label from path
        lbl = os.path.basename(os.path.dirname(os.path.dirname(fpath)))
        return img, class_to_idx[lbl]

In [7]:
img_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

In [8]:
data = Dataset(all_files, img_transform)
loader = DataLoader(data, batch_size=32, shuffle=True, num_workers=2)

# Inference ciklas

In [9]:
gt_all = []      # Ground-truth labels
pred_tl = []     # Predictions for "Traffic light" (using index 920)
pred_sd = []     # Predictions for "Sandal" (using index 774)
pred_sb = []     # Predictions for "Strawberry" (using index 483)

for images, labels in loader:
    outputs = model(images.to(device))
    for i in range(outputs.size(0)):
        probs = torch.softmax(outputs[i], dim=0).detach().cpu().numpy()
        pred_tl.append(probs[920])
        pred_sd.append(probs[774])
        pred_sb.append(probs[949])
    gt_all.extend(labels.numpy())

# Konfuzijos matrica ir matavimai

In [12]:
def display_confusion_matrix(class_idx, matrix):
    print("Class: ", idx_to_class[class_idx])
    print("-------------------")
    print("|   TP   |   FP   |")
    print("| {0:^6} | {1:^6} |".format(matrix['TP'], matrix['FP']))
    print("|--------|--------|")
    print("|   FN   |   TN   |")
    print("| {0:^6} | {1:^6} |".format(matrix['FN'], matrix['TN']))

def compute_confusion_matrix(gt, pred, cls, thresh = 0.5):
    binary_pred = (np.array(pred) >= thresh).astype(int)
    matrix = {
        'TP': np.sum((np.array(gt) == cls) & (binary_pred == 1)),
        'TN': np.sum((np.array(gt) != cls) & (binary_pred == 0)),
        'FP': np.sum((np.array(gt) != cls) & (binary_pred == 1)),
        'FN': np.sum((np.array(gt) == cls) & (binary_pred == 0)),
    }
    display_confusion_matrix(cls, matrix)
    return matrix

def calculate_metrics(TP, TN, FP, FN):
    accuracy = (TP + TN) / (TP + FP + TN + FN)
    recall = TP / (TP + FN) if (TP + FN) else 0
    precision = TP / (TP + FP) if (TP + FP) else 0
    f1 = 2 * (recall * precision) / (recall + precision) if (recall + precision) else 0
    return {'accuracy': accuracy, 'recall': recall, 'precision': precision, 'f1': f1}

def show_metrics(mets, cid):
    print("Class ", idx_to_class[cid], " metrics:")
    print("  accuracy : ", mets['accuracy'])
    print("  recall : ", mets['recall'])
    print("  precision : ", mets['precision'])
    print("  f1 : ", mets['f1'])
    print()

def show_overall(mets):
    print("All  metrics:")
    print("  accuracy : ", mets['accuracy'])
    print("  recall : ", mets['recall'])
    print("  precision : ", mets['precision'])
    print("  f1 : ", mets['f1'])

# Atliekame skaičiavimus

In [None]:
conf_tl = compute_confusion_matrix(gt_all, pred_tl, 0, thresh=0.05)  # For "Traffic light"
conf_sd = compute_confusion_matrix(gt_all, pred_sd, 1, thresh=0.05)  # For "Sandal"
conf_sb = compute_confusion_matrix(gt_all, pred_sb, 2, thresh=0.05)  # For "Strawberry"

metrics_tl = calculate_metrics(conf_tl['TP'], conf_tl['TN'], conf_tl['FP'], conf_tl['FN'])
metrics_sb = calculate_metrics(conf_sb['TP'], conf_sb['TN'], conf_sb['FP'], conf_sb['FN'])
metrics_sd = calculate_metrics(conf_sd['TP'], conf_sd['TN'], conf_sd['FP'], conf_sd['FN'])

combined_conf = {k: conf_tl[k] + conf_sb[k] + conf_sd[k] for k in ['TP','TN','FP','FN']}
metrics_all = calculate_metrics(combined_conf['TP'], combined_conf['TN'], combined_conf['FP'], combined_conf['FN'])

show_metrics(metrics_sd, 0)
show_metrics(metrics_tl, 1)
show_metrics(metrics_sb, 2)
show_overall(metrics_all)