In [1]:
import torch
import torch.nn as nn

from ignite.engine import *
from ignite.metrics import *

from torchvision.datasets import Country211
from torchvision import transforms

from torchvision.models.feature_extraction import create_feature_extractor
from torchvision.models import resnet50, ResNet50_Weights

from torchvision.ops import FeaturePyramidNetwork

from tqdm import tqdm

import math

import numpy as np

In [2]:
transform = transforms.Compose([
            # transforms.RandomHorizontalFlip(),
            # transforms.ColorJitter(0.4, 0.4, 0.4),
            transforms.Resize((448,448)), # resize the images to 224x224 pixels
            # transforms.RandomResizedCrop((224, 224), scale=(0.5, 1.0)),
            # transforms.GaussianBlur((5, 5), sigma=(0.1, 0.3)),
            transforms.ToTensor(), # convert the images to a PyTorch tensor
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # normalize the images color channels
        ])

test_set = Country211("../", "test", transform=transform)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=32)

In [3]:
# pretrained backbone

# loading pretrained model
device = torch.device("cuda")

model = resnet50(weights=ResNet50_Weights.DEFAULT)

layer_names = []

for name, layer in model.named_modules():
    if isinstance(layer, nn.Conv2d):
        layer_names += [name]

print(layer_names)

feature_extractor = create_feature_extractor(model, 
        return_nodes=["avgpool"]).to(device)

# feature_extractor = create_feature_extractor(model, 
#         return_nodes=["layer1.0.conv3", "layer2.0.conv3", "layer3.0.conv3", "layer4.0.conv3"]).to(device)

['conv1', 'layer1.0.conv1', 'layer1.0.conv2', 'layer1.0.conv3', 'layer1.0.downsample.0', 'layer1.1.conv1', 'layer1.1.conv2', 'layer1.1.conv3', 'layer1.2.conv1', 'layer1.2.conv2', 'layer1.2.conv3', 'layer2.0.conv1', 'layer2.0.conv2', 'layer2.0.conv3', 'layer2.0.downsample.0', 'layer2.1.conv1', 'layer2.1.conv2', 'layer2.1.conv3', 'layer2.2.conv1', 'layer2.2.conv2', 'layer2.2.conv3', 'layer2.3.conv1', 'layer2.3.conv2', 'layer2.3.conv3', 'layer3.0.conv1', 'layer3.0.conv2', 'layer3.0.conv3', 'layer3.0.downsample.0', 'layer3.1.conv1', 'layer3.1.conv2', 'layer3.1.conv3', 'layer3.2.conv1', 'layer3.2.conv2', 'layer3.2.conv3', 'layer3.3.conv1', 'layer3.3.conv2', 'layer3.3.conv3', 'layer3.4.conv1', 'layer3.4.conv2', 'layer3.4.conv3', 'layer3.5.conv1', 'layer3.5.conv2', 'layer3.5.conv3', 'layer4.0.conv1', 'layer4.0.conv2', 'layer4.0.conv3', 'layer4.0.downsample.0', 'layer4.1.conv1', 'layer4.1.conv2', 'layer4.1.conv3', 'layer4.2.conv1', 'layer4.2.conv2', 'layer4.2.conv3']


In [4]:
class ClassificationHead(torch.nn.Module):
    def __init__(self, input_size, output_size):
        super(ClassificationHead, self).__init__()

        self.flatten = nn.Flatten()

        self.linear = nn.Linear(input_size, output_size)

        self.softmax = nn.Softmax()

    def forward(self, x):
        x = self.flatten(x)

        x = self.linear(x)

        return self.softmax(x)

# pulled from Dr. Karpathy's minGPT implementation
# class GELU(nn.Module):
#     """
#     Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT).
#     Reference: Gaussian Error Linear Units (GELU) paper: https://arxiv.org/abs/1606.08415
#     """
#     def forward(self, x):
#         return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))))

# class ClassificationHead(nn.Module):
#     def __init__(self, output_size):
#         super(ClassificationHead, self).__init__()
#         # pooling to make sure dimensionality is the same for features across multiple layers
#         self.pool = nn.AdaptiveAvgPool2d(14)
#         self.upsamplex2 = nn.Upsample(scale_factor=2)

#         self.bnorm1 = nn.BatchNorm2d(256)
#         self.bnorm2 = nn.BatchNorm2d(256)
#         self.bnorm3 = nn.BatchNorm2d(256)
#         self.bnorm4 = nn.BatchNorm2d(256)

#         # final layers
#         self.final_conv1 = nn.Conv2d(1024, 1024, (3, 3), bias=False, groups=4)
#         self.bnorm5 = nn.BatchNorm2d(1024)

#         self.final_conv2 = nn.Conv2d(1024, 1024, (3, 3), bias=False, groups=4)
#         self.bnorm6 = nn.BatchNorm2d(1024)

#         self.final_conv3 = nn.Conv2d(1024, 1024, (3, 3), bias=False, groups=4)
#         self.bnorm7 = nn.BatchNorm2d(1024)

#         # flatten layers creates large number of features
#         # self.flatten = nn.Flatten()

#         self.aggregate = nn.AdaptiveAvgPool2d((1, 1)) # global feature aggregation for regression output connection

#         self.dense1 = nn.Linear(1024, output_size)

#         self.gelu = GELU()
#         self.softmax = nn.Softmax()

#     def forward(self, x):
#         out1 = self.pool(self.bnorm1(x["layer1.0.conv3"]))
#         out2 = self.pool(self.bnorm2(x["layer2.0.conv3"]))
#         out3 = self.bnorm3(x["layer3.0.conv3"])
#         out4 = self.upsamplex2(self.bnorm4(x["layer4.0.conv3"]))

#         concat_output = torch.cat((out1, out2, out3, out4), 1)

#         final = self.gelu(self.bnorm5(self.final_conv1(concat_output)))
#         final = self.gelu(self.bnorm6(self.final_conv2(final)))
#         final = self.gelu(self.bnorm7(self.final_conv3(final)))

#         final = self.aggregate(final).squeeze(2).squeeze(2)

#         # final linear output
#         final = self.softmax(self.dense1(final))

#         return final

In [5]:
device = torch.device("cuda")

#feature_extractor = torch.load("../models/feature_extractor_run_3_resnet_full_l2_0_epochs_100_lr_1e-4").to(device)
#fpn = torch.load("../models/fpn_resnet_fpn_l2_0_epochs_100_lr_1e-4_conv_0_group_4").to(device)
classification_head = torch.load("../models/resnet_simple_l2_0_epochs_100_lr_1e-3").to(device)

In [6]:
# pulled from https://pytorch.org/ignite/generated/ignite.metrics.TopKCategoricalAccuracy.html
def process_function(engine, batch):
    y_pred, y = batch
    return y_pred, y

def one_hot_to_binary_output_transform(output):
    y_pred, y = output
    y = torch.argmax(y, dim=1)  # one-hot vector to label index vector
    return y_pred, y

engine = Engine(process_function)
accuracy = TopKCategoricalAccuracy(k=1, output_transform=one_hot_to_binary_output_transform, device=device)
accuracy.attach(engine, 'top_1_accuracy')

In [7]:
test_accuracies = []

for (sample, target) in tqdm(test_loader):
    one_hot_target = nn.functional.one_hot(target, num_classes = 211).type(torch.FloatTensor).to(device)

    with torch.no_grad():
        output = feature_extractor(sample.to(device))
        output = classification_head(output["avgpool"])

    state = engine.run([[output, one_hot_target]])
    accuracy = state.metrics['top_1_accuracy']*100
    test_accuracies += [accuracy]

print("Test accuracy: " + str(np.mean(np.array(test_accuracies))))

  return self.softmax(x)
100%|██████████| 660/660 [07:47<00:00,  1.41it/s]

Test accuracy: 3.731060606060606





In [8]:
print("Test accuracy: " + str(np.mean(test_accuracies)))

Test accuracy: 3.731060606060606
