Hubertas Vindžigalskis, LSP: 2213817, ["Traffic light", "Sandal", "Castle"]

# Pasiruošimas


In [11]:
!pip install openimages
import os, glob
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from openimages.download import download_dataset
from PIL import Image
from google.colab import drive



In [12]:
drive.mount('/content/drive')

DATA_ROOT = "/content/drive/MyDrive/colab_content"
SI_ROOT = DATA_ROOT + "/SampleImages"
OI_ROOT = DATA_ROOT + "/OpenImages"
SAMPLE_LIMIT = 380
TARGET_LABELS = ["Traffic light", "Sandal", "Strawberry"]

def dataset_exists(root, labels):
    return all(os.path.exists(os.path.join(root, lbl.lower())) for lbl in labels)

if not dataset_exists(OI_ROOT, TARGET_LABELS):
    download_dataset(OI_ROOT, TARGET_LABELS, limit=SAMPLE_LIMIT)
else:
    print("Images already downloaded for all classes, skipping download.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Images already downloaded for all classes, skipping download.


# Procesoriaus ir modelio paruošimas

In [13]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

model = models.vgg19(pretrained=True).to(device)
model.eval()

cpu


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

# Dataset paruošimas

In [14]:
class_dirs = glob.glob(os.path.join(OI_ROOT, '*'))
folder_names = [os.path.basename(folder) for folder in class_dirs]
file_paths = [glob.glob(os.path.join(folder, "images", "*")) for folder in class_dirs]
all_files = [fp for sublist in file_paths for fp in sublist]
all_sample_files = glob.glob(os.path.join(SI_ROOT, "*"))

idx_to_class = {i: name for i, name in enumerate(folder_names)}
class_to_idx = {name: i for i, name in idx_to_class.items()}
print(idx_to_class)
print(class_to_idx)

{0: 'traffic light', 1: 'sandal', 2: 'strawberry'}
{'traffic light': 0, 'sandal': 1, 'strawberry': 2}


In [15]:
class Dataset(Dataset):
    def __init__(self, files, transform):
        self.files = files
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, index):
        fpath = self.files[index]
        img = Image.open(fpath)
        if img.mode != 'RGB':
            img = img.convert('RGB')
        img = self.transform(img)

        # Extract label from path
        if (os.path.basename(os.path.dirname(fpath)) == "images"):
            label = os.path.basename(os.path.dirname(os.path.dirname(fpath)))
        else:
            fname = os.path.basename(fpath)
            basename = os.path.splitext(fname)[0]
            label = basename.split('_')[0].lower()
            label = label.replace('-', ' ')
        return img, class_to_idx[label]

In [16]:
img_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),
                         (0.229, 0.224, 0.225))
])

In [17]:
data = Dataset(all_files, img_transform)
loader = DataLoader(data, batch_size=32, shuffle=True, num_workers=2)

# data = Dataset(all_sample_files, img_transform)
# loader = DataLoader(data, batch_size=1, shuffle=True, num_workers=1)

# Inference ciklas

In [18]:
gt_all = []      # Ground-truth labels
pred_tl = []     # Predictions for "Traffic light" (using index 920)
pred_sd = []     # Predictions for "Sandal" (using index 774)
pred_sb = []     # Predictions for "Strawberry" (using index 483)

for images, labels in loader:
    outputs = model(images.to(device))
    for i in range(outputs.size(0)):
        # Softmax produces a probability distribution over multiple classes
        # Sigmoid works on a single class at a time
        probs = torch.softmax(outputs[i], dim=0).detach().cpu().numpy()
        pred_tl.append(probs[920])
        pred_sd.append(probs[774])
        pred_sb.append(probs[949])
        print("I920: ", probs[920], "; I774: ", probs[774], "; I949: ", probs[949])
    gt_all.extend(labels.numpy())

I920:  1.4030778e-06 ; I774:  0.00020131991 ; I949:  0.0027390278
I920:  0.13521665 ; I774:  6.0557625e-08 ; I949:  4.5308377e-07
I920:  0.96915406 ; I774:  5.044912e-07 ; I949:  8.659935e-07
I920:  6.1427286e-08 ; I774:  1.5791713e-06 ; I949:  0.0009351486
I920:  2.9398453e-07 ; I774:  9.109366e-07 ; I949:  0.95515436
I920:  6.725721e-05 ; I774:  0.007058654 ; I949:  9.454764e-06
I920:  1.4794127e-05 ; I774:  3.7223163e-05 ; I949:  0.28584936
I920:  4.248711e-09 ; I774:  2.7942962e-10 ; I949:  0.9970702
I920:  0.024385504 ; I774:  1.5903997e-07 ; I949:  3.727185e-08
I920:  5.469028e-07 ; I774:  1.0680128e-06 ; I949:  0.0068947617
I920:  1.5717232e-07 ; I774:  2.7349545e-06 ; I949:  0.3100424
I920:  4.5166784e-05 ; I774:  3.1373795e-06 ; I949:  0.7110865
I920:  0.41974765 ; I774:  9.556017e-06 ; I949:  2.357821e-06
I920:  0.77336663 ; I774:  6.0408024e-06 ; I949:  4.745289e-06
I920:  0.50076663 ; I774:  7.3392336e-08 ; I949:  3.61805e-08
I920:  1.974955e-05 ; I774:  8.176652e-07 ; I949

# Konfuzijos matrica ir matavimai

In [19]:
def display_confusion_matrix(class_idx, matrix):
    print("Class: ", idx_to_class[class_idx])
    print("-------------------")
    print("|   TP   |   FP   |")
    print("| {0:^6} | {1:^6} |".format(matrix['TP'], matrix['FP']))
    print("|--------|--------|")
    print("|   FN   |   TN   |")
    print("| {0:^6} | {1:^6} |".format(matrix['FN'], matrix['TN']))

def compute_confusion_matrix(gt, pred, cls, thresh = 0.5):
    binary_pred = (np.array(pred) >= thresh).astype(int)
    matrix = {
        'TP': np.sum((np.array(gt) == cls) & (binary_pred == 1)),
        'TN': np.sum((np.array(gt) != cls) & (binary_pred == 0)),
        'FP': np.sum((np.array(gt) != cls) & (binary_pred == 1)),
        'FN': np.sum((np.array(gt) == cls) & (binary_pred == 0)),
    }
    display_confusion_matrix(cls, matrix)
    return matrix

def calculate_metrics(TP, TN, FP, FN):
    accuracy = (TP + TN) / (TP + FP + TN + FN)
    recall = TP / (TP + FN) if (TP + FN) else 0
    precision = TP / (TP + FP) if (TP + FP) else 0
    f1 = 2 * (recall * precision) / (recall + precision) if (recall + precision) else 0
    return {'accuracy': accuracy, 'recall': recall, 'precision': precision, 'f1': f1}

def show_metrics(mets, cid):
    print("Class ", idx_to_class[cid], " metrics:")
    print("  accuracy : ", mets['accuracy'])
    print("  recall : ", mets['recall'])
    print("  precision : ", mets['precision'])
    print("  f1 : ", mets['f1'])
    print()

def show_overall(mets):
    print("All  metrics:")
    print("  accuracy : ", mets['accuracy'])
    print("  recall : ", mets['recall'])
    print("  precision : ", mets['precision'])
    print("  f1 : ", mets['f1'])

# Atliekame skaičiavimus

In [28]:
conf_tl = compute_confusion_matrix(gt_all, pred_tl, 0, thresh=0.05)  # For "Traffic light"
conf_sd = compute_confusion_matrix(gt_all, pred_sd, 1, thresh=0.05)  # For "Sandal"
conf_sb = compute_confusion_matrix(gt_all, pred_sb, 2, thresh=0.05)  # For "Strawberry"

metrics_tl = calculate_metrics(conf_tl['TP'], conf_tl['TN'], conf_tl['FP'], conf_tl['FN'])
metrics_sb = calculate_metrics(conf_sb['TP'], conf_sb['TN'], conf_sb['FP'], conf_sb['FN'])
metrics_sd = calculate_metrics(conf_sd['TP'], conf_sd['TN'], conf_sd['FP'], conf_sd['FN'])

combined_conf = {k: conf_tl[k] + conf_sb[k] + conf_sd[k] for k in ['TP','TN','FP','FN']}
metrics_all = calculate_metrics(combined_conf['TP'], combined_conf['TN'], combined_conf['FP'], combined_conf['FN'])

show_metrics(metrics_sd, 0)
show_metrics(metrics_tl, 1)
show_metrics(metrics_sb, 2)
show_overall(metrics_all)

Class:  traffic light
-------------------
|   TP   |   FP   |
|  344   |   0    |
|--------|--------|
|   FN   |   TN   |
|   36   |  681   |
Class:  sandal
-------------------
|   TP   |   FP   |
|  155   |   1    |
|--------|--------|
|   FN   |   TN   |
|  146   |  759   |
Class:  strawberry
-------------------
|   TP   |   FP   |
|  255   |   0    |
|--------|--------|
|   FN   |   TN   |
|  125   |  681   |
Class  traffic light  metrics:
  accuracy :  0.8614514608859567
  recall :  0.5149501661129569
  precision :  0.9935897435897436
  f1 :  0.6783369803063457

Class  sandal  metrics:
  accuracy :  0.9660697455230914
  recall :  0.9052631578947369
  precision :  1.0
  f1 :  0.9502762430939227

Class  strawberry  metrics:
  accuracy :  0.882186616399623
  recall :  0.6710526315789473
  precision :  1.0
  f1 :  0.8031496062992126

All  metrics:
  accuracy :  0.9032359409362237
  recall :  0.7106503298774741
  precision :  0.9986754966887417
  f1 :  0.8303964757709251
