# **Deep Learning Methods. First task.**
Author: Armantas Pikšrys

LSP: 2016018

Pretrained model: resnet50

## Download images

In [48]:
!pip install openimages

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [49]:
import os
from openimages.download import download_dataset

In [50]:
data_dir = "data"
image_amount = 350
classes = ["Orange", "Vase", "Ice cream"]

In [51]:
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

In [52]:
print("Downloading is starting...")
download_dataset(data_dir, classes, limit = image_amount)

Downloading is starting...


100%|██████████| 350/350 [00:21<00:00, 16.60it/s]
100%|██████████| 350/350 [00:20<00:00, 16.74it/s]
100%|██████████| 350/350 [00:20<00:00, 17.01it/s]


{'orange': {'images_dir': 'data/orange/images'},
 'vase': {'images_dir': 'data/vase/images'},
 'ice cream': {'images_dir': 'data/ice cream/images'}}

## Custom Dataset 

In [84]:
import torch
import torchvision
from torchvision import datasets
from torchvision.io import read_image
from torch.utils.data import Dataset
import numpy as np
from glob import glob
import PIL
from PIL import Image
import matplotlib.pyplot as plt
from skimage.color import label2rgb

In [85]:
# Creation of Custom dataset

class CustomDataset(Dataset):
    def __init__(self, images_dir, transforms = None):
        self.images_dir = images_dir
        self.transforms = transforms

        self.class1_files = glob(images_dir + "/{}/images/*.jpg".format(classes[0].lower()))
        self.class2_files = glob(images_dir + "/{}/images/*.jpg".format(classes[1].lower()))
        self.class3_files = glob(images_dir + "/{}/images/*.jpg".format(classes[2].lower()))
        
        self.class1 = len(self.class1_files)
        self.class2 = len(self.class2_files)
        self.class3 = len(self.class3_files)

        self.files = self.class1_files + self.class2_files + self.class3_files

        # Set labels for each file
        self.labels = np.zeros(len(self.files))
        self.labels[self.class1:(self.class1 + self.class2)] = 1
        self.labels[(self.class1 + self.class2):] = 2

        self.labels = [int(x) for x in self.labels]

        # Shuffle
        self.order =  [x for x in np.random.permutation(len(self.labels))]

        self.files = [self.files[x] for x in self.order]
        self.labels = [self.labels[x] for x in self.order]


    def __len__(self):
        return (len(self.labels))


    def __getitem__(self, i):
        file_path = self.files[i]
        image = Image.open(file_path)
        img_tensor = image.convert('RGB')
        
        if self.transforms is not None:
           img_tensor = self.transforms(img_tensor)

        label = torch.tensor(self.labels[i])

        return (img_tensor, label)   

In [86]:
# Test dataset
dataset = CustomDataset("./data")
print(dataset.files)


['./data/ice cream/images/235a6a82093ad673.jpg', './data/orange/images/324594b0908bdf40.jpg', './data/ice cream/images/40db28f2e872ff65.jpg', './data/vase/images/34649748b91c50de.jpg', './data/vase/images/5fcfcf4464f1cc86.jpg', './data/orange/images/5273e8ca9183c2ba.jpg', './data/ice cream/images/2b9f2464d2624ef7.jpg', './data/ice cream/images/393d88d7b3d2e77d.jpg', './data/orange/images/529bf35cd6a22af0.jpg', './data/vase/images/62e945e8a330d104.jpg', './data/orange/images/3b14f373ea906af1.jpg', './data/ice cream/images/32be9c522e345093.jpg', './data/orange/images/34bc2d81d7544c4e.jpg', './data/orange/images/0686675712ef0233.jpg', './data/vase/images/0715fb7241b39e00.jpg', './data/vase/images/22eea896111b28b6.jpg', './data/vase/images/0d710bbb9a378312.jpg', './data/ice cream/images/0a0b258b4323d159.jpg', './data/vase/images/5db0b7ee9c34818a.jpg', './data/orange/images/2bd7c6482f41ac8d.jpg', './data/ice cream/images/07b5b83dd3799710.jpg', './data/vase/images/4414801cb8ad7f47.jpg', './d

## Load model

In [87]:
from torchvision import models

In [88]:
# Get pretrained weights
weights = models.ResNet50_Weights.DEFAULT

model = models.resnet50(weights = weights)

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model.to(device)

# Set to eval mode
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

## Load Data using DataLoader

In [89]:
from torch.utils.data import DataLoader
from torchvision import transforms

In [90]:
# Predefined transformation taken from resnet50 weights
image_transform = weights.transforms()

# Define the dataset
dataset = CustomDataset("./data", transforms = image_transform)

batch_size = 64
num_workers = 5

# Define the data loader for the test set
data_loader = DataLoader(dataset, batch_size = batch_size, num_workers = num_workers)

## Evaluate data

In [91]:
model_classes = weights.meta["categories"]

evaluation_classes_ids = [model_classes.index(evaluation_class.lower()) for evaluation_class in classes]

evaluation_results = []

with torch.no_grad():
    for inputs, labels in data_loader:
        # Move inputs and labels to the GPU
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(inputs)
        probs = torch.sigmoid(outputs)

        for img_id, class_probs in enumerate(probs):
            selected_probs = [class_probs[index].item() for index in evaluation_classes_ids]
            actual_class = labels[img_id].item()
            evaluation_results.append((selected_probs, actual_class))

print(evaluation_classes_ids)
print(evaluation_results)

[950, 883, 928]
[([0.9743726253509521, 0.9849783182144165, 0.5124088525772095], 1), ([0.5800768733024597, 0.884231448173523, 0.5292780995368958], 1), ([0.9978398084640503, 0.4543288052082062, 0.44341161847114563], 0), ([0.9992528557777405, 0.361001580953598, 0.47084513306617737], 0), ([0.9932804703712463, 0.4721100330352783, 0.7313205599784851], 0), ([0.617060124874115, 0.9936366081237793, 0.4561808407306671], 1), ([0.5283852219581604, 0.8351368308067322, 0.9991395473480225], 2), ([0.9965320825576782, 0.34465956687927246, 0.4086706042289734], 0), ([0.9898654818534851, 0.5269505977630615, 0.49930277466773987], 0), ([0.4529173970222473, 0.6796741485595703, 0.38792064785957336], 1), ([0.6140051484107971, 0.6461824178695679, 0.9990560412406921], 2), ([0.6636143326759338, 0.9998268485069275, 0.350982129573822], 1), ([0.9955785870552063, 0.3017370402812958, 0.1986137479543686], 0), ([0.5744042992591858, 0.993988037109375, 0.5049141645431519], 1), ([0.6418386697769165, 0.9976275563240051, 0.3

## Analyze with thresholds

In [92]:
thresholds = [0.8, 0.8, 0.8]
binary_evaluation_results = []

def convert_to_binary_list(scores, thresholds):
    binary_list = []
    for i in range(len(scores)):
        if scores[i] > thresholds[i]:
            binary_list.append(1)
        else:
            binary_list.append(0)
    return binary_list

for index, (pred_score, label) in enumerate(evaluation_results):
    binary_evaluation_results.append((convert_to_binary_list(pred_score, thresholds), label))

print(binary_evaluation_results)

[([1, 1, 0], 1), ([0, 1, 0], 1), ([1, 0, 0], 0), ([1, 0, 0], 0), ([1, 0, 0], 0), ([0, 1, 0], 1), ([0, 1, 1], 2), ([1, 0, 0], 0), ([1, 0, 0], 0), ([0, 0, 0], 1), ([0, 0, 1], 2), ([0, 1, 0], 1), ([1, 0, 0], 0), ([0, 1, 0], 1), ([0, 1, 0], 1), ([0, 1, 0], 1), ([1, 0, 0], 0), ([0, 1, 0], 1), ([0, 1, 0], 1), ([1, 0, 1], 2), ([1, 1, 0], 0), ([0, 1, 0], 1), ([1, 0, 0], 0), ([1, 0, 0], 0), ([1, 1, 1], 2), ([0, 1, 0], 1), ([0, 1, 0], 1), ([0, 0, 1], 2), ([0, 1, 1], 1), ([1, 0, 0], 0), ([0, 0, 1], 2), ([1, 0, 0], 0), ([0, 0, 1], 2), ([0, 1, 0], 1), ([0, 0, 1], 2), ([1, 0, 0], 0), ([0, 0, 1], 2), ([0, 1, 1], 1), ([0, 1, 0], 1), ([0, 1, 0], 1), ([1, 0, 1], 0), ([0, 0, 1], 2), ([0, 1, 0], 1), ([0, 0, 1], 0), ([0, 0, 1], 2), ([0, 1, 0], 1), ([0, 1, 0], 1), ([0, 0, 0], 1), ([0, 0, 1], 2), ([0, 1, 0], 1), ([0, 1, 0], 1), ([1, 0, 0], 0), ([0, 0, 1], 2), ([1, 0, 0], 0), ([1, 0, 0], 0), ([0, 0, 1], 2), ([0, 1, 0], 1), ([1, 0, 1], 2), ([0, 1, 0], 1), ([1, 0, 0], 0), ([0, 1, 0], 1), ([1, 0, 0], 0), ([0, 0,

## Calculate Statistics

In [93]:
# First we need to calculate the confusion matrix

def get_confusion_matrix_stats(predictions):
    tp, fp, tn, fn = 0, 0, 0, 0

    for pred_classes, actual_class in predictions:
        for index, prediction in enumerate(pred_classes):
            if prediction == 1 and index == actual_class:
                tp += 1
            
            if prediction == 1 and index != actual_class:
                fp += 1

            if prediction == 0 and index == actual_class:
                fn += 1

            if prediction == 0 and index != actual_class:
                tn += 1

    return tp, fp, tn, fn

true_positives, false_positives, true_negatives, false_negatives = get_confusion_matrix_stats(binary_evaluation_results)

print("True positives: ", true_positives)
print("False positives: ", false_positives)
print("True negatives: ", true_negatives)
print("False negatives: ", false_negatives)

True positives:  1003
False positives:  147
True negatives:  1953
False negatives:  47


In [94]:
# Calculate accuracy
accuracy = (true_positives + true_negatives) / (true_positives + true_negatives + false_positives + false_negatives)

# Calculate precision
precision = true_positives / (true_positives + false_positives)

# Calculate recall
recall = true_positives / (true_positives + false_negatives)

# Calculate f1 score
f1_score = 2 * ((precision * recall) / (precision + recall))

print("Accuracy: ", accuracy)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1 Score : ", f1_score)

Accuracy:  0.9384126984126984
Precision:  0.8721739130434782
Recall:  0.9552380952380952
F1 Score :  0.9118181818181819


## Evaluate single image

In [96]:
file_path = './data/test/images/GettyImages-90053856-588b7aff5f9b5874ee534b04.jpg'

image = Image.open(file_path)
img_tensor = image.convert('RGB')
img_tensor = image_transform(img_tensor)
img_tensor = img_tensor.to(device)

output = model(img_tensor.unsqueeze(0))
probs = torch.sigmoid(output)

single_results = []

for img_id, class_probs in enumerate(probs):
    selected_probs = [class_probs[index].item() for index in evaluation_classes_ids]
    single_results.append(selected_probs)

print(single_results)


[[0.6925175189971924, 0.7017097473144531, 0.9988380074501038]]
