# GMM PROJECT 1 
*Lukas Elenbergas, 1712238*

Variant: **Car, Cat, Guitar**

***Task***: Using an already existing pre-trained model calculate accuracy, precision, recall and F1 scores for a 1000 images from the OpenImages dataset by utilizing an effective data loader. The program should implement a threshold changer. The scores have to recalculate after thresholds changing.
 
The data classes must be picked from [OpenImages V6](https://storage.googleapis.com/openimages/web/index.html) classification task dataset.

In [1]:
# ---------------------------------------------------
# IMPORTS
# ---------------------------------------------------
# Fiftyone package install for the Collab environment
'''!pip install fiftyone'''
import fiftyone as fo
import fiftyone.zoo as foz
import os
import torch
import numpy as np
import torchvision.transforms as transforms
import torchvision.transforms.functional as F
import torchvision.models as models
from torch.utils.data import Dataset
from PIL import Image
# ---------------------------------------------------
# ENVIRONMENT VARIABLES
# ---------------------------------------------------
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
IMAGE_PATH = 'C:\\Projects\\GMM\\Images\\'
CLASS_PATH = 'C:\\Projects\\GMM\\Other\\imagenet_classes_modified.txt'
# ---------------------------------------------------

In [2]:
# ---------------------------------------------------
# HELPER FUNCTION
# ---------------------------------------------------
def ground_truth_array_generation(labels):
    temporary_array = [0, 0, 0]

    if 'Car' in labels:
        temporary_array[0] = 1
    if 'Cat' in labels:
        temporary_array[1] = 1
    if 'Guitar' in labels:
        temporary_array[2] = 1

    return np.array(temporary_array)
# ---------------------------------------------------

In [3]:
# ---------------------------------------------------
# FIFTYONE DATASET
# ---------------------------------------------------
def create_fo_dataset(
        dataset_name: str,
        dataset_type: str,
        dataset_samp: int,
):
    # Fiftyone dataset creation by loading OpenImages V6 dataset from the FO Zoo collection 
    dataset = foz.load_zoo_dataset(
        'open-images-v6',
        dataset_type,
        label_types='classifications',
        classes=['Car', 'Cat', 'Guitar'],
        only_matching=True,
        max_samples=dataset_samp,
        dataset_name=dataset_name
    )

    # Making the ground truth detection easier to access by appending all lables of the image into one list
    for sample in dataset:
        sample['labels'] = []
        for item in sample.positive_labels.classifications:
            sample['labels'].append(item.label)
        sample['ground_truth'] = ground_truth_array_generation(sample.labels)
        sample.save()

    return dataset
# ---------------------------------------------------

In [4]:
# ---------------------------------------------------
# TORCH DATASET
# ---------------------------------------------------
class ClassificationDataset(Dataset):

    def __init__(self, data, label_field='ground_truth'):
        self.samples = data
        self.label_field = label_field
        self.img_paths = self.samples.values('filepath')
        self.transforms = transforms.Compose([
            transforms.ToTensor(),
            transforms.CenterCrop((1024, 1024)),
            transforms.Resize((256, 256), interpolation=F.InterpolationMode.BILINEAR, antialias=True),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        ])

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        sample = self.samples[img_path]
        image = Image.open(img_path).convert('RGB')
        ground_truth = sample[self.label_field]
        image_tensor = self.transforms(image)

        return { 'image': image_tensor, 'ground_truth': ground_truth}
    
    def __len__(self):
        return len(self.img_paths)

    def sample_path(self, idx):
        return self.img_paths[idx]
# ---------------------------------------------------

In [5]:
# ---------------------------------------------------
# PREP
# ---------------------------------------------------
# MODEL AND DEVICE INIT
# ---------------------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = models.vgg19(pretrained=True).to(device)
model.eval()
# ---------------------------------------------------
# DATASET INIT
# ---------------------------------------------------
reset_required = False
if reset_required:
    fo.delete_dataset('Testing_1')
if 'Testing_1' in fo.list_datasets():
    fo_dataset = fo.load_dataset('Testing_1')
else:
    fo_dataset = create_fo_dataset('Testing_1', 'test', 1000)
testing_dataset = ClassificationDataset(fo_dataset)
# ---------------------------------------------------
# TRANSFORMATIONS FOR IMAGES
# ---------------------------------------------------
all_transforms = transforms.Compose([
    transforms.Resize((224, 224), antialias=True),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
# ---------------------------------------------------
# GETTING THE ACTUAL LABELS FROM THE TEXT FILE
# ---------------------------------------------------
with open(CLASS_PATH, 'r') as fid:
    all_labels = [ln.strip() for ln in fid]
# ---------------------------------------------------
# FUNCTION TO RETURN THE READABLE LABELS
# ---------------------------------------------------
def readable_predictions(predicted_classes):
    predicted_labels = []
    match predicted_classes.size:
        case 0:
            predicted_labels.append("none")
        case 1:
            predicted_labels.append(all_labels[predicted_classes])
        case _ if predicted_classes.size > 1:
            for item in predicted_classes:
                predicted_labels.append(all_labels[item])
    return predicted_labels
# ---------------------------------------------------
# FUNCTION TO RETURN INDICES OF PREDICTED CLASSES
# ---------------------------------------------------
def get_indices(thresholded_output):
    number_of_classes = thresholded_output.count_nonzero()
    _, predicted_classes = torch.topk(thresholded_output, number_of_classes)
    predicted_classes = predicted_classes.cpu().numpy().squeeze()

    return predicted_classes
# ---------------------------------------------------



In [6]:
# ---------------------------------------------------
# THRESHOLD AND METRICS
# ---------------------------------------------------
threshold = 0.5
TP = 0
TN = 0
FP = 0
FN = 0
# ---------------------------------------------------
# CALCULATING STATS
# ---------------------------------------------------
for data_item in testing_dataset:
    # Getting the output tensor from the model and thresholding it
    input_image = data_item['image'].unsqueeze(0).to(device)
    output = model(input_image)
    output = torch.softmax(output, dim=1)
    thresholded_output = (output >= threshold).int()
    # Getting the indices of predicted classes
    predicted_classes = get_indices(thresholded_output)
    # Getting the actual labels of predicted classes
    predicted_labels = readable_predictions(predicted_classes)
    # Formatting the labels into an array comparable to the ground truth
    predicted_ground_truth = ground_truth_array_generation(predicted_labels)
    # Calculating TP, TN, FP, FN
    TP += np.sum(np.bitwise_and(data_item['ground_truth'] == 1, predicted_ground_truth == 1))
    TN += np.sum(np.bitwise_and(data_item['ground_truth'] == 0, predicted_ground_truth == 0))
    FP += np.sum(np.bitwise_and(data_item['ground_truth'] == 0, predicted_ground_truth == 1))
    FN += np.sum(np.bitwise_and(data_item['ground_truth'] == 1, predicted_ground_truth == 0))

metrics = {}
metrics['accuracy'] = (TP + TN) / (TP + FP + TN + FN)
metrics['recall'] = TP / (TP + FN)
metrics['precision'] = TP / (TP + FP)
metrics['F1'] = 2 * (metrics['precision'] * metrics['recall']) / (metrics['precision'] + metrics['recall'])

print(metrics)
# ---------------------------------------------------

{'accuracy': 0.807, 'recall': 0.42273180458624127, 'precision': 1.0, 'F1': 0.5942536790469516}


In [7]:
# ---------------------------------------------------
# IMAGE PREP BEFORE PASSING TO MODEL
# ---------------------------------------------------
image = Image.open(IMAGE_PATH + '8.jpg')
image = all_transforms(image).unsqueeze(0).to(device)
# ---------------------------------------------------
# LOCAL THRESHOLD
# ---------------------------------------------------
local_threshold = 0.5
# ---------------------------------------------------
# PREDICTING THE CLASS USING THE PRETRAINED MODEL
# ---------------------------------------------------
output = model(image)
output = torch.softmax(output, dim=1)
thresholded_output = (output > local_threshold).int()
predicted_classes = get_indices(thresholded_output)
predicted_labels = readable_predictions(predicted_classes)
print(predicted_labels)
# ---------------------------------------------------

['Car']
