In [1]:
import os
import torch
import numpy as np
import warnings
from settings import OUT_DIR, IMAGE_HEIGHT, IMAGE_WIDTH
from core.models.nts_net import NTSModel
from tqdm import tqdm
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, Resize, ToTensor
from torchvision.datasets import FGVCAircraft
from PIL import Image

warnings.filterwarnings("ignore")

### Define the test function

In [2]:
def test(model_file, test_data, batch_size):

    # Identify device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load the model
    ckpt = torch.load(model_file)
    model = NTSModel(top_n=ckpt["proposal_num"], n_classes=ckpt["n_classes"], image_height=IMAGE_HEIGHT, image_width=IMAGE_WIDTH).to(device)
    model.load_state_dict(ckpt["state_dict"])
    model = nn.DataParallel(model)

    # Setup dataloader
    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=2)

    y_pred = []
    y_true = []
    with tqdm(total=(len(test_loader))) as pbar:
        with torch.no_grad():
            for i, (inputs, labels) in enumerate(test_loader):
                inputs, labels = inputs.to(device), labels.to(device)
                batch_size = inputs.size(0)
                y_true += labels.cpu().detach().numpy().astype(int).tolist()
                _, concat_logits, _, _, _ = model(inputs)
                y_pred += concat_logits.argmax(dim=1).cpu().detach().numpy().astype(int).tolist()

                pbar.update(1)

    y_pred = np.array(y_pred)
    y_true = np.array(y_true)

    accuracy = np.mean(y_pred == y_true)
    print("Accuracy:", accuracy)

    return y_true, y_pred


### Load the test data

In [3]:
transform = Compose([
  Resize((IMAGE_HEIGHT, IMAGE_WIDTH), Image.BILINEAR),
  ToTensor(),
])

test_data = FGVCAircraft(root="data", split="test", transform=transform, download=True)

print("Test data size:", len(test_data))

Test data size: 3333


### Test the model

In [4]:
BATCH_SIZE = 8

# Load model weights
model_file = os.path.join(OUT_DIR, "latest_model.ckpt")

y_true, y_pred = test(model_file, test_data, batch_size=BATCH_SIZE)

100%|██████████| 417/417 [00:50<00:00,  8.19it/s]

Accuracy: 0.7962796279627963





### Create classification report

In [5]:
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.preprocessing import OneHotEncoder

classes = test_data.classes
ohe = OneHotEncoder()
y_true_enc = ohe.fit_transform(y_true.reshape(-1, 1)).toarray()
y_pred_enc = ohe.transform(y_pred.reshape(-1, 1)).toarray()

report = classification_report(y_true_enc, y_pred_enc, target_names=classes, output_dict=True)

df_report = pd.DataFrame(report).transpose()[:-4]

#### Top 10 performers

In [6]:
df_report.sort_values("recall", ascending=False).head(10)

Unnamed: 0,precision,recall,f1-score,support
Tornado,0.891892,1.0,0.942857,33.0
SR-20,1.0,1.0,1.0,34.0
C-130,0.868421,1.0,0.929577,33.0
Cessna 172,1.0,0.970588,0.985075,34.0
DHC-6,0.942857,0.970588,0.956522,34.0
Eurofighter Typhoon,0.942857,0.970588,0.956522,34.0
An-12,0.969697,0.969697,0.969697,33.0
Saab 2000,0.864865,0.969697,0.914286,33.0
Model B200,0.761905,0.969697,0.853333,33.0
Cessna 525,0.969697,0.969697,0.969697,33.0


#### Top 10 worst performers

In [7]:
df_report.sort_values("recall", ascending=True).head(10)

Unnamed: 0,precision,recall,f1-score,support
C-47,0.518519,0.424242,0.466667,33.0
747-200,0.5,0.441176,0.46875,34.0
MD-87,0.64,0.484848,0.551724,33.0
MD-90,0.516129,0.484848,0.5,33.0
DC-3,0.485714,0.5,0.492754,34.0
MD-80,0.485714,0.5,0.492754,34.0
A330-300,0.6,0.545455,0.571429,33.0
737-300,0.5,0.545455,0.521739,33.0
757-200,0.76,0.558824,0.644068,34.0
737-500,0.633333,0.558824,0.59375,34.0
