In [1]:
from torchvision.models import  ResNet101_Weights
from torchvision.datasets import ImageFolder
import torch
from torchmetrics import Accuracy
from sklearn.metrics import confusion_matrix
import sys
sys.path.append('../')
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset
from models import two_conv_network
import matplotlib.pyplot as plt
import seaborn as sn
import pandas as pd
import numpy as np
import torch.nn as nn

In [2]:
data = ImageFolder('../data_retrieval/data', ResNet101_Weights.IMAGENET1K_V2.transforms())
targets = data.targets
train_indices, test_val_indices = train_test_split(np.arange(len(targets)), stratify=targets, train_size=0.65, random_state=21)
train_data = Subset(data, indices=train_indices)
val_test_data = Subset(data, indices=test_val_indices)
targets = np.array(targets)
test_val_targets = targets[test_val_indices]
test_indices, val_indices = train_test_split(test_val_indices, stratify=test_val_targets, train_size=0.57, random_state=21)
val_data, test_data = Subset(data, indices=val_indices), Subset(data, indices=test_indices)

In [3]:
model = two_conv_network.TwoLayerConvNet(len(test_data.dataset.classes))

In [5]:
model.load_state_dict(torch.load('../models/models_weights/two_conv_network_diagnostic.pth'))

<All keys matched successfully>

In [6]:
model.eval()

TwoLayerConvNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu1): ReLU()
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU()
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=100352, out_features=16, bias=True)
  (relu3): ReLU()
  (fc2): Linear(in_features=16, out_features=22, bias=True)
)

In [7]:
test_loader = torch.utils.data.DataLoader(test_data,
                                             batch_size=16, shuffle=True,
                                             num_workers=4)

In [8]:
print("Size of test dataset:", len(test_data))

Size of test dataset: 1656


In [9]:
idx_to_class = {v: k for k, v in test_data.dataset.class_to_idx.items()}

In [10]:
idx_to_class

{0: 'E_Neo-Assyrian',
 1: 'E_Neo-Babylonian',
 2: 'GAR_Neo-Assyrian',
 3: 'GAR_Neo-Babylonian',
 4: 'KA_Neo-Assyrian',
 5: 'KA_Neo-Babylonian',
 6: 'KI_Neo-Assyrian',
 7: 'KI_Neo-Babylonian',
 8: 'MEŠ_Neo-Assyrian',
 9: 'MEŠ_Neo-Babylonian',
 10: 'NI_Neo-Assyrian',
 11: 'NI_Neo-Babylonian',
 12: 'RU_Neo-Assyrian',
 13: 'RU_Neo-Babylonian',
 14: 'TA_Neo-Assyrian',
 15: 'TA_Neo-Babylonian',
 16: 'TI_Neo-Assyrian',
 17: 'TI_Neo-Babylonian',
 18: 'U₂_Neo-Assyrian',
 19: 'U₂_Neo-Babylonian',
 20: 'ŠU_Neo-Assyrian',
 21: 'ŠU_Neo-Babylonian'}

In [11]:
correct_pred = {classname: 0 for classname in idx_to_class}
total_pred = {classname: 0 for classname in idx_to_class}
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = model(images)
        _, predictions = torch.max(outputs, 1)
        for label, prediction in zip(labels, predictions):
            if idx_to_class[int(label)] == idx_to_class[(int(prediction))]:
                correct_pred[int(label)] += 1
            total_pred[int(label)] += 1

In [12]:
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {idx_to_class[classname]:5s} is {accuracy:.1f} %')

Accuracy for class: E_Neo-Assyrian is 0.0 %
Accuracy for class: E_Neo-Babylonian is 0.0 %
Accuracy for class: GAR_Neo-Assyrian is 0.0 %
Accuracy for class: GAR_Neo-Babylonian is 81.8 %
Accuracy for class: KA_Neo-Assyrian is 0.0 %
Accuracy for class: KA_Neo-Babylonian is 0.0 %
Accuracy for class: KI_Neo-Assyrian is 0.0 %
Accuracy for class: KI_Neo-Babylonian is 17.4 %
Accuracy for class: MEŠ_Neo-Assyrian is 0.0 %
Accuracy for class: MEŠ_Neo-Babylonian is 0.0 %
Accuracy for class: NI_Neo-Assyrian is 0.0 %
Accuracy for class: NI_Neo-Babylonian is 0.0 %
Accuracy for class: RU_Neo-Assyrian is 0.0 %
Accuracy for class: RU_Neo-Babylonian is 0.0 %
Accuracy for class: TA_Neo-Assyrian is 0.0 %
Accuracy for class: TA_Neo-Babylonian is 0.0 %
Accuracy for class: TI_Neo-Assyrian is 0.0 %
Accuracy for class: TI_Neo-Babylonian is 0.0 %
Accuracy for class: U₂_Neo-Assyrian is 0.0 %
Accuracy for class: U₂_Neo-Babylonian is 86.5 %
Accuracy for class: ŠU_Neo-Assyrian is 0.0 %
Accuracy for class: ŠU_Neo-Bab

In [14]:
print("Overall accuracy: ",sum(correct_pred.values()) / sum(total_pred.values()))

Overall accuracy:  0.20652173913043478
