In [1]:
import torch
import torch as F
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision import datasets
import matplotlib.pyplot as plt
import numpy as np
import cv2

import os
os.chdir("../models")
from common_utils import *
from model import *

# set seed
set_seed(42)

load test dataset

In [2]:
transform2 = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((100, 100), antialias=True),
]) # apply basic image transformations on test data


# load data
test_dataset = datasets.Flowers102(root='../data', split='train', download=True, transform=transform2)

# define dataloader to load single image
dataloader = DataLoader(dataset=test_dataset, shuffle=False, batch_size=1)

load final model

In [3]:
model = DepthPointWiseCNN()
model_path = 'saved_models/FinalModel/cuda/best_model.pt'
# determine device type
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)

DepthPointWiseCNN(
  (conv_stack): Sequential(
    (0): BatchNorm2d(3, eps=1e-05, momentum=None, affine=True, track_running_stats=True)
    (1): Conv2d(3, 3, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=3)
    (2): Conv2d(3, 64, kernel_size=(1, 1), stride=(1, 1))
    (3): ReLU()
    (4): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=2)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): BatchNorm2d(64, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (7): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=64)
    (8): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1))
    (9): ReLU()
    (10): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=2)
    (11): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (13): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1,

Evaluate final model

In [36]:
logits = test_model(model, dataloader, None, device)

predicted_labels = []
# get the predicted class for each logits
for i in range(len(logits)):
    predicted_labels.append(np.argmax(logits[i]))

# convert to tensor elements to integers
for i in range(len(logits)):
    predicted_labels[i] = int(predicted_labels[i])

# get the true class for each image
true_labels = []
for i in range(len(test_dataset)):
    true_labels.append(test_dataset[i][1])

# calculate accuracy
acc = 0
for i in range(len(predicted_labels)):
    if predicted_labels[i] == true_labels[i]:
        acc += 1
acc /= len(predicted_labels)
print("Accuracy: ", acc)

# calculate F1 score
from sklearn.metrics import f1_score
f1 = f1_score(true_labels, predicted_labels, average='macro')
print("F1 score: ", f1)

# calculate top-5 accuracy
top5 = 0
for i in range(len(logits)):
    if true_labels[i] in np.argsort(logits[i])[-5:]:
        top5 += 1
top5 /= len(logits)
print("Top-5 accuracy: ", top5)


Accuracy:  0.7637254901960784
F1 score:  0.7543372395523346
Top-5 accuracy:  0.9205882352941176


Load baseline model

In [38]:
model = BaselineCNN()
model_path = 'saved_models/BaselineCNN/cuda/best_model.pt'
# determine device type
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)

BaselineCNN(
  (conv_stack): Sequential(
    (0): Conv2d(3, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=2)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (5): ReLU()
    (6): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=2)
    (7): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fcn_stack): Sequential(
    (

Evaluate final model

In [39]:
logits = test_model(model, dataloader, None, device)

predicted_labels = []
# get the predicted class for each logits
for i in range(len(logits)):
    predicted_labels.append(np.argmax(logits[i]))

# convert to tensor elements to integers
for i in range(len(logits)):
    predicted_labels[i] = int(predicted_labels[i])

# get the true class for each image
true_labels = []
for i in range(len(test_dataset)):
    true_labels.append(test_dataset[i][1])

# calculate accuracy
acc = 0
for i in range(len(predicted_labels)):
    if predicted_labels[i] == true_labels[i]:
        acc += 1
acc /= len(predicted_labels)
print("Accuracy: ", acc)

# calculate F1 score
from sklearn.metrics import f1_score
f1 = f1_score(true_labels, predicted_labels, average='macro')
print("F1 score: ", f1)

# calculate top-5 accuracy
top5 = 0
for i in range(len(logits)):
    if true_labels[i] in np.argsort(logits[i])[-5:]:
        top5 += 1
top5 /= len(logits)
print("Top-5 accuracy: ", top5)


Accuracy:  0.1892156862745098
F1 score:  0.1725621325260018
Top-5 accuracy:  0.4588235294117647
