1 - as laboratorinis darbas (individualus)

Nojus Džiaugys s2110552

Pasirinktos klases: Spider, Sushi, Table

Modelis: resnet50

version 1.0

# Setup
## Imports

In [53]:
import torch
import torchvision
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch.nn as nn
import numpy as np
from torchvision.models import resnet50, ResNet50_Weights
from torchvision.transforms import transforms
from tqdm import tqdm as tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score

import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

## Image download
Should only be run once

In [14]:
import os
from openimages.download import download_dataset

data_dir_base = "data"
number_for_samples = 1000
classes = ["Bee", "Cat", "Dog"]

i = 1
while os.path.exists(f"{data_dir_base}{i}"):
    i += 1    
os.makedirs(f"{data_dir_base}{i}")

print("Download started...")
download_dataset(f"{data_dir_base}{i}", classes, limit=number_for_samples)

Download started...


2024-03-05  14:26:54 INFO Downloading 989 train images for class 'bee'
 43%|████▎     | 424/989 [00:15<00:21, 26.66it/s]


KeyboardInterrupt: 

## Use cuda instead of cpu

In [114]:
def use_device():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    return device

## global variables

In [123]:
BATCH_SIZE = 128
CLASS_COUNT = 3
DATA_DIR = "data1"

## Model setup
this set up the resnet50 pretrained model

In [116]:
# get the device used for calculations
device = use_device()

# by recommendation, this uses default weights
default_weights = ResNet50_Weights.DEFAULT
model = resnet50(weights=default_weights)

num_features_in_fc_layer = model.fc.in_features

preprocess = default_weights.transforms()

model.fc = nn.Linear(num_features_in_fc_layer, CLASS_COUNT)
# model is sent to calculation device
model = model.to(device)

Using device: cuda:0


another resnet50 setup

In [149]:
model = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_resnet50', pretrained=True)
utils = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_convnets_processing_utils')

num_features_in_fc_layer = model.fc.in_features
model.fc = nn.Linear(num_features_in_fc_layer, CLASS_COUNT)

device = use_device()
model.eval().to(device)

Using cache found in C:\Users\ndzia/.cache\torch\hub\NVIDIA_DeepLearningExamples_torchhub


Using device: cuda:0


Using cache found in C:\Users\ndzia/.cache\torch\hub\NVIDIA_DeepLearningExamples_torchhub


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layers): Sequential(
    (0): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv2d

Third way to setup resnet50  model

In [155]:
model = torchvision.models.resnet50(pretrained = True).to(use_device())
model.eval()

Using device: cuda:0


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

# Helper functions
## Calculate F1

In [156]:
def calculate_metrics(true_classes, predicted_classes, class_count, threshold):
    # sets up counters for confusion matrix quarters. Tn is not used for f1
    tp_count = []
    fp_count = []
    fn_count = []

    for _ in range(class_count):
        tp_count.append(0)
        fp_count.append(0)
        fn_count.append(0)
    
    # calculates, confusion matrix quarter counters
    for i in range(len(true_classes)):
        if  predicted_classes[i] == true_classes[i]:
            tp_count[true_classes[i]] += 1
        else:
            fp_count[predicted_classes[i]] += 1
            fn_count[true_classes[i]] += 1

    # calculates precision, recall and f1 for each class
    precision = []
    recall = []
    f1 = []
    
    for i in range(len(class_count)):
        tp, fp, fn = tp_count[i], fp_count[i], fn_count[i]

        precision.append(tp / (tp + fp) if (tp + fp) > 0 else 0)
        recall.append(tp / (tp + fn) if (tp + fn) > 0 else 0)

        p, r = precision[i], recall[i]
        f1.append(2 * p * r / (p + r) if (p + r) > 0 else 0)

    # returns average f1
    return sum(f1) / len(f1)


## Data loading

In [118]:
std = [0.2391, 0.2312, 0.2300]
mean = [0.4412, 0.4161, 0.3800]

image_transformer = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x[:3, :, :]),  # Keep only the first three channels (RGB)
    transforms.Normalize(torch.Tensor(mean), torch.Tensor(std))
])

In [139]:
dataset = torchvision.datasets.ImageFolder(root = DATA_DIR, transform = image_transformer)

dataset_loader = torch.utils.data.DataLoader(dataset, batch_size = BATCH_SIZE, shuffle = False)

# Main calculation

In [151]:
def main_calculation(model, dataset_loader, threshold):
    
    device = use_device()
    
    all_probabilities = []
    all_labels = []
    all_outputs = []
    all_probabilities = []



    with torch.no_grad():
        # for each image, unpairs image and label
        for image_batch, label_batch in dataset_loader:
            # sends image and label to device
            image_batch = image_batch.to(device)
            label_batch = label_batch.to(device)

            # MAIN PART calculates output and probability
            outputs = model(image_batch)

            #translates to probabilities
            probabilities = F.softmax(outputs, dim=1)

            # adds outputs, probabilities, labels to lists
            all_outputs.extend(outputs.cpu().numpy())
            all_probabilities.extend(probabilities.cpu().numpy())
            all_labels.extend(label_batch.cpu().numpy())
    
    # magic line
    all_predictions = [np.argmax(all_probabilities[i]) for i in range(len(all_probabilities))]
    print(f"All labels | outputs | probabilities | predictions:")
    # for item in zip(all_labels, all_outputs,  all_predictions, all_probabilities):
    #     print(f"L: {item[0]}, O: {item[1]}")
    #     print(f"P: {item[2]}, P: {item[3]}\n")
    print(all_labels, "\n", all_predictions)
    #calculates accuracy
    accuracy = accuracy_score(all_labels, all_predictions)

    # calculates f1
    f1 = calculate_f1_score(all_labels, all_predictions, CLASS_COUNT)

    #calculates precision and recall with macro average
    recall = recall_score(all_labels, all_predictions, average="macro", zero_division=0)
    precision = precision_score(all_labels, all_predictions, average="macro", zero_division=0)


    # returns accuracy, precision, recall and f1 of the model
    print(f"With Threshold: {threshold}\nAccuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}")



## Main calculation call

In [None]:
main_calculation(model, dataset_loader, 0.30)

# Version 1.1

## Setup

In [70]:
# Reikalingi paketai
import torch, torchvision
from torchvision import transforms
from torchvision.models import resnet50, ResNet50_Weights
from PIL import Image
import numpy as np
import pprint


### Commons

#### Adding a device

In [3]:
def use_device():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    return device

#### Global variables

In [89]:
CHOSEN_CLASSES = sorted(["Jellyfish", "Snail", "Isopod"])
THRESHOLD = 0.7
BATCH_SIZE = 10
DATA_DIR = "data3"

### Download images

should only be run once, or when new classes get added

In [40]:
import os
from openimages.download import download_dataset

data_dir_base = "data"
number_for_samples = 1000
i = 1
while os.path.exists(f"{data_dir_base}{i}"):
    i += 1    
os.makedirs(f"{data_dir_base}{i}")

print("Download started...")
download_dataset(f"{data_dir_base}{i}", CHOSEN_CLASSES, limit=number_for_samples)

Download started...


2024-03-05  14:34:04 INFO Downloading 379 train images for class 'jellyfish'
100%|██████████| 379/379 [00:15<00:00, 24.62it/s]
2024-03-05  14:34:20 INFO Downloading 414 train images for class 'snail'
100%|██████████| 414/414 [00:14<00:00, 29.18it/s]
2024-03-05  14:34:34 INFO Downloading 72 train images for class 'isopod'
100%|██████████| 72/72 [00:03<00:00, 21.26it/s]
2024-03-05  14:34:39 INFO Downloading 23 validation images for class 'jellyfish'
100%|██████████| 23/23 [00:01<00:00, 11.54it/s]
2024-03-05  14:34:41 INFO Downloading 31 validation images for class 'snail'
100%|██████████| 31/31 [00:02<00:00, 15.05it/s]
2024-03-05  14:34:43 INFO Downloading 5 validation images for class 'isopod'
100%|██████████| 5/5 [00:01<00:00,  3.96it/s]
2024-03-05  14:34:48 INFO Downloading 54 test images for class 'jellyfish'
100%|██████████| 54/54 [00:03<00:00, 16.59it/s]
2024-03-05  14:34:51 INFO Downloading 100 test images for class 'snail'
100%|██████████| 100/100 [00:04<00:00, 20.03it/s]
2024-03

{'jellyfish': {'images_dir': 'data3\\jellyfish\\images'},
 'snail': {'images_dir': 'data3\\snail\\images'},
 'isopod': {'images_dir': 'data3\\isopod\\images'}}

### Model setup

#### Model itself

In [54]:
device = use_device()
model = torchvision.models.resnet50(weights=ResNet50_Weights.DEFAULT).to(device)

Using device: cuda:0


#### Image transformer for model

In [55]:
all_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

#### Run model
for one image

In [62]:
# Vaizdo nuskaitymas iš disko
image = Image.open('data1\spider\images\\0a03dbf1a69f2bec.jpg')

# Modelio paleidimas
output = model(all_transforms(image).unsqueeze(0).to(device))
output = output.squeeze().cpu()
# idx = torch.argmax(output)
# print(torch.argmax(output))


classes = open('general_classification.txt', 'r').readlines()
#chatgpt
probabilities = torch.nn.functional.softmax(output, dim=0)
for class_name in CHOSEN_CLASSES:
    class_idx = classes.index((class_name + '\n').lower())
    class_prob = probabilities[class_idx]
    print(f"Class: {class_name}, Probability: {class_prob.item() * 100:.2f}%")


Class: Isopod, Probability: 0.95%
Class: Jellyfish, Probability: 0.06%
Class: Snail, Probability: 0.03%


### Metrics

#### Data loader
makes a dataset of all images

In [63]:
dataset = torchvision.datasets.ImageFolder(root = DATA_DIR, transform = all_transforms)
dataset_loader = torch.utils.data.DataLoader(dataset, batch_size = BATCH_SIZE, shuffle = False)

#### Calculates metrics
based on predictions and actual truths

In [87]:
def calculate_metrics(ground_truth, predictions, threshold = THRESHOLD):
  # for i in range(len(predictions)):
  #   print(f"Ground truth: \n{ground_truth[i]}, Predictions: \n{predictions[i]}")
  predictions = (np.array(predictions) >= threshold).astype(np.float64)
  for i in range(len(predictions)):
    print(f"Ground truth: \n{ground_truth[i]}, Predictions: \n{predictions[i]}")
  metrics_per_class = {}
  Tp = [0] * len(CHOSEN_CLASSES)
  Tn = [0] * len(CHOSEN_CLASSES)
  Fp = [0] * len(CHOSEN_CLASSES)
  Fn = [0] * len(CHOSEN_CLASSES)
  
  for batch_truths, batch_predictions in zip(ground_truth, predictions):#how many batches
    for i in range(len(batch_truths)): #batch size
      for j in range(len(batch_truths[i])):#class count
        Tp[j] += np.bitwise_and(batch_truths[i][j] == 1, batch_predictions[i][j] == 1)
        Tn[j] += np.bitwise_and(batch_truths[i][j] == 0, batch_predictions[i][j] == 0)
        Fp[j] += np.bitwise_and(batch_truths[i][j] == 0, batch_predictions[i][j] == 1)
        Fn[j] += np.bitwise_and(batch_truths[i][j] == 1, batch_predictions[i][j] == 0)


  for i in range(len(CHOSEN_CLASSES)):
    tp = Tp[i]
    tn = Tn[i]
    fp = Fp[i]
    fn = Fn[i]
    metrics_per_class[CHOSEN_CLASSES[i]] = {
      'accuracy': (tp + tn) / (tp + tn + fp + fn),
      'recall': (tp) / (tp + fn),
      'precision': (tp) / (tp + fp)
    }
    m = metrics_per_class[CHOSEN_CLASSES[i]]
    m['f1'] = 2 * (m['precision'] * m['recall']) / (m['precision'] + m['recall'])
  return metrics_per_class

#### Run model on all images

In [90]:
# Initialize lists to store predictions and ground truth labels
all_predictions = []
all_ground_truth = []

# Set the model to evaluation mode
model.eval()

# Iterate through the DataLoader
for inputs, labels in dataset_loader:
    # Move inputs to the device
    inputs = inputs.to(device)
    # Perform inference
    with torch.no_grad():
        outputs = model(inputs)

    # Apply softmax to the model's output along the specified dimension
    probabilities = torch.nn.functional.softmax(outputs, dim=1)
    
    # Convert model outputs to numpy array
    predictions = probabilities.squeeze().cpu().numpy()

    # Convert labels tensor to numpy array
    ground_truth_indices = labels.numpy()

    # Convert ground truth indices to one-hot encoded vectors
    ground_truth_one_hot = torch.nn.functional.one_hot(torch.tensor(ground_truth_indices), num_classes = len(CHOSEN_CLASSES)).numpy()
    

    # Filter predictions for chosen classes
    chosen_predictions = predictions[:, [classes.index(f"{class_name.lower()}" + "\n") for class_name in CHOSEN_CLASSES]]
    #print(chosen_predictions)
    # # Assuming 'CHOSEN_CLASSES' is a list of chosen class names
    # for class_name, class_prob in zip(CHOSEN_CLASSES, chosen_predictions[0]):
    #     print(f"Class: {class_name}, Probability: {class_prob * 100:.2f}%")

    # print(ground_truth_one_hot)
    # Append predictions and ground truth to the lists
    # print(chosen_predictions)
    # print(ground_truth_one_hot)
    all_predictions.append(chosen_predictions)
    all_ground_truth.append(ground_truth_one_hot)

# # Concatenate the lists to obtain a single array for predictions and ground truth
# all_predictions = np.concatenate(all_predictions)
# all_ground_truth = np.concatenate(all_ground_truth)

# Pad all arrays to have 10 rows
all_predictions_padded = [np.pad(x, ((0, BATCH_SIZE - len(x)), (0, 0))) for x in all_predictions]


# print (all_predictions_padded)
# for instance_predictions in all_predictions[0]:
#     for class_name, class_prob in zip(CHOSEN_CLASSES, instance_predictions):
#         print(f"Class: {class_name}, Probability: {class_prob * 100:.2f}%")
# print (all_ground_truth)
# Use calculate_metrics function
metrics = calculate_metrics(all_ground_truth, all_predictions_padded, threshold=THRESHOLD)

print(f"Threshold: {THRESHOLD}\n")
for i in metrics:
  print(f"Class: {i}")
  pprint.pprint(metrics[i])
  print('\n')

Ground truth: 
[[1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]], Predictions: 
[[0. 0. 0.]
 [0. 0. 0.]
 [1. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
Ground truth: 
[[1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]], Predictions: 
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
Ground truth: 
[[1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]], Predictions: 
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
Ground truth: 
[[1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]], Predictions: 
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
Ground truth: 
[[1 0 0]
 [1 0 0]