Hubertas Vindžigalskis, LSP: 2213817, ["Traffic light", "Sandal", "Castle"]

# Pasiruošimas


In [14]:
!pip install openimages
import os, glob
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from openimages.download import download_dataset
from PIL import Image
from google.colab import drive



In [15]:
drive.mount('/content/drive')

DATA_ROOT = "/content/drive/MyDrive/colab_content"
SI_ROOT = DATA_ROOT + "/SampleImages"
OI_ROOT = DATA_ROOT + "/OpenImages"
SAMPLE_LIMIT = 380
TARGET_LABELS = ["Traffic light", "Sandal", "Strawberry"]

def dataset_exists(root, labels):
    return all(os.path.exists(os.path.join(root, lbl.lower())) for lbl in labels)

if not dataset_exists(OI_ROOT, TARGET_LABELS):
    download_dataset(OI_ROOT, TARGET_LABELS, limit=SAMPLE_LIMIT)
else:
    print("Images already downloaded for all classes, skipping download.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Images already downloaded for all classes, skipping download.


# Procesoriaus ir modelio paruošimas

In [16]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

model = models.vgg19(pretrained=True).to(device)
model.eval()

cuda:0


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

# Dataset paruošimas

In [17]:
class_dirs = glob.glob(os.path.join(OI_ROOT, '*'))
folder_names = [os.path.basename(folder) for folder in class_dirs]
file_paths = [glob.glob(os.path.join(folder, "images", "*")) for folder in class_dirs]
all_files = [fp for sublist in file_paths for fp in sublist]
all_sample_files = glob.glob(os.path.join(SI_ROOT, "*"))

idx_to_class = {i: name for i, name in enumerate(folder_names)}
class_to_idx = {name: i for i, name in idx_to_class.items()}
print(idx_to_class)
print(class_to_idx)

{0: 'traffic light', 1: 'sandal', 2: 'strawberry'}
{'traffic light': 0, 'sandal': 1, 'strawberry': 2}


In [18]:
class Dataset(Dataset):
    def __init__(self, files, transform):
        self.files = files
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, index):
        fpath = self.files[index]
        img = Image.open(fpath)
        if img.mode != 'RGB':
            img = img.convert('RGB')
        img = self.transform(img)

        # Extract label from path
        if (os.path.basename(os.path.dirname(fpath)) == "images"):
            label = os.path.basename(os.path.dirname(os.path.dirname(fpath)))
        else:
            fname = os.path.basename(fpath)
            basename = os.path.splitext(fname)[0]
            label = basename.split('_')[0].lower()
            label = label.replace('-', ' ')
        return img, class_to_idx[label]

In [19]:
img_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),
                         (0.229, 0.224, 0.225))
])

In [20]:
data = Dataset(all_files, img_transform)
loader = DataLoader(data, batch_size=32, shuffle=True, num_workers=2)

# To use custom images, uncomment this section:
# data = Dataset(all_sample_files, img_transform)
# loader = DataLoader(data, batch_size=1, shuffle=True, num_workers=1)

# Inference ciklas

In [21]:
gt_all = []      # Ground-truth labels
pred_tl = []     # Predictions for "Traffic light" (using index 920)
pred_sd = []     # Predictions for "Sandal" (using index 774)
pred_sb = []     # Predictions for "Strawberry" (using index 483)

for images, labels in loader:
    outputs = model(images.to(device))
    for i in range(outputs.size(0)):
        # Softmax produces a probability distribution over multiple classes
        # Sigmoid works on a single class at a time
        probs = torch.sigmoid(outputs[i]).detach().cpu().numpy()
        pred_tl.append(probs[920])
        pred_sd.append(probs[774])
        pred_sb.append(probs[949])
    gt_all.extend(labels.numpy())

# Konfuzijos matrica ir matavimai

In [22]:
def display_confusion_matrix(class_idx, matrix):
    print("-------------------")
    print("|   TP   |   FP   |")
    print("| {0:^6} | {1:^6} |".format(matrix['TP'], matrix['FP']))
    print("|--------|--------|")
    print("|   FN   |   TN   |")
    print("| {0:^6} | {1:^6} |".format(matrix['FN'], matrix['TN']))
    print("-------------------")

def compute_confusion_matrix(gt, pred, cls, thresh = 0.5):
    binary_pred = (np.array(pred) >= thresh).astype(int)
    matrix = {
        'TP': np.sum((np.array(gt) == cls) & (binary_pred == 1)),
        'TN': np.sum((np.array(gt) != cls) & (binary_pred == 0)),
        'FP': np.sum((np.array(gt) != cls) & (binary_pred == 1)),
        'FN': np.sum((np.array(gt) == cls) & (binary_pred == 0)),
    }
    return matrix

def calculate_metrics(TP, TN, FP, FN):
    accuracy = (TP + TN) / (TP + FP + TN + FN)

    if (TP + FN):
        recall = TP / (TP + FN)
    else:
        recall = 0

    if (TP + FP):
        precision = TP / (TP + FP)
    else:
        precision = 0

    if (recall + precision):
        f1 = 2 * (recall * precision) / (recall + precision)
    else:
        f1 = 0

    return {'accuracy': accuracy, 'recall': recall, 'precision': precision, 'f1': f1}

def show_metrics(mets, cid):
    print("  accuracy : ", mets['accuracy'])
    print("  recall : ", mets['recall'])
    print("  precision : ", mets['precision'])
    print("  f1 : ", mets['f1'])
    print()

def show_overall(mets):
    print("  accuracy : ", mets['accuracy'])
    print("  recall : ", mets['recall'])
    print("  precision : ", mets['precision'])
    print("  f1 : ", mets['f1'])

# Atliekame skaičiavimus

In [23]:
conf_tl = compute_confusion_matrix(gt_all, pred_tl, 0, thresh=0.9)  # For "Traffic light"
conf_sd = compute_confusion_matrix(gt_all, pred_sd, 1, thresh=0.9)  # For "Sandal"
conf_sb = compute_confusion_matrix(gt_all, pred_sb, 2, thresh=0.9)  # For "Strawberry"

metrics_tl = calculate_metrics(conf_tl['TP'], conf_tl['TN'], conf_tl['FP'], conf_tl['FN'])
metrics_sd = calculate_metrics(conf_sd['TP'], conf_sd['TN'], conf_sd['FP'], conf_sd['FN'])
metrics_sb = calculate_metrics(conf_sb['TP'], conf_sb['TN'], conf_sb['FP'], conf_sb['FN'])

combined_conf = {k: conf_tl[k] + conf_sb[k] + conf_sd[k] for k in ['TP','TN','FP','FN']}
metrics_all = calculate_metrics(combined_conf['TP'], combined_conf['TN'], combined_conf['FP'], combined_conf['FN'])

print("Class ", idx_to_class[0], " metrics:")
display_confusion_matrix(0, conf_tl)
show_metrics(metrics_tl, 0)
print("Class ", idx_to_class[1], " metrics:")
display_confusion_matrix(1, conf_sd)
show_metrics(metrics_sd, 1)
print("Class ", idx_to_class[2], " metrics:")
display_confusion_matrix(2, conf_sb)
show_metrics(metrics_sb, 2)
print("All  metrics:")
show_overall(metrics_all)

Class  traffic light  metrics:
-------------------
|   TP   |   FP   |
|  380   |  147   |
|--------|--------|
|   FN   |   TN   |
|   0    |  534   |
-------------------
  accuracy :  0.8614514608859567
  recall :  1.0
  precision :  0.7210626185958254
  f1 :  0.8379272326350606

Class  sandal  metrics:
-------------------
|   TP   |   FP   |
|  296   |  197   |
|--------|--------|
|   FN   |   TN   |
|   5    |  563   |
-------------------
  accuracy :  0.8096135721017907
  recall :  0.9833887043189369
  precision :  0.6004056795131846
  f1 :  0.7455919395465996

Class  strawberry  metrics:
-------------------
|   TP   |   FP   |
|  379   |   41   |
|--------|--------|
|   FN   |   TN   |
|   1    |  640   |
-------------------
  accuracy :  0.9604147031102733
  recall :  0.9973684210526316
  precision :  0.9023809523809524
  f1 :  0.9475

All  metrics:
  accuracy :  0.8771599120326736
  recall :  0.9943449575871819
  precision :  0.7326388888888888
  f1 :  0.8436625349860056
