In [1]:
import torchvision
from torchvision import models
from torchvision.transforms import transforms
import torch
import CitiesData
import numpy as np
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
import time

In [2]:
# The inference transforms are available at ViT_B_16_Weights.IMAGENET1K_V1.transforms and perform the following preprocessing operations: Accepts PIL.Image, batched (B, C, H, W) and single (C, H, W) image torch.Tensor objects. 
# The images are resized to resize_size=[256] using interpolation=InterpolationMode.BILINEAR, followed by a central crop of crop_size=[224]. 
# Finally the values are first rescaled to [0.0, 1.0] and then normalized using mean=[0.485, 0.456, 0.406] and std=[0.229, 0.224, 0.225].

#models.ViT_B_16_Weights.IMAGENET1K_V1
model_name = "Inception"
model_image_size = 224

In [3]:
class CityInception(torch.nn.Module):
    def __init__(self, numClasses: int, softmax:bool = True):
        super(CityInception, self).__init__()

        self.inceptionBase = torchvision.models.inception_v3(weights='DEFAULT')
        self.inceptionBase.fc = torch.nn.Linear(in_features=2048, out_features=numClasses)
        for param in list(self.inceptionBase.parameters())[:-1]:
            param.requires_grad = False
        # for param in self.inceptionBase.parameters():
        #     print(param.requires_grad)

        self.softmax = torch.nn.Softmax(dim=-1)

    def forward(self, x):
        # print(x.shape)
        logits = self.inceptionBase(x)
        # print(type(logits))
        # print(logits)
        # print(logits.shape)
        probs = self.softmax(logits.logits)

        return probs






In [4]:
model = CityInception(10).to(device)
print(*list(model.children())[:-1])

Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stri

In [5]:
# Loss and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [6]:
batch_size = 32
transform = transforms.Compose([
    transforms.Resize(342),
    transforms.CenterCrop(299),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))
])
trainDataLoader, testDataLoader = CitiesData.getCitiesDataLoader("./Data/", transforms = transform, batchSize=batch_size)

In [7]:
print(len(trainDataLoader))
print(len(testDataLoader))
for i in trainDataLoader:
    image, cities, _, _ = i
    print(image.shape)
    break

6258
696
torch.Size([32, 3, 224, 224])


In [8]:
from fvcore.nn import FlopCountAnalysis
test_image = 0
for i in testDataLoader:
    test_image, cities, _, _ = i
    test_image = test_image.to(device)
    break
flops = FlopCountAnalysis(model, test_image)
print(str(flops.total()) + " flops")
# Ignore Reds

Unsupported operator aten::mul encountered 49 time(s)
Unsupported operator aten::add encountered 25 time(s)
Unsupported operator aten::div encountered 12 time(s)
Unsupported operator aten::unflatten encountered 12 time(s)
Unsupported operator aten::scaled_dot_product_attention encountered 12 time(s)
Unsupported operator aten::gelu encountered 12 time(s)
Unsupported operator aten::softmax encountered 1 time(s)
The following submodules of the model were never called during the trace of the graph. They may be unused, or they were accessed by direct calls to .forward() or via other python methods. In the latter case they will have zeros for statistics, though their statistics will still contribute to their parent calling module.
reference_vit.encoder.layers.encoder_layer_0.self_attention.out_proj, reference_vit.encoder.layers.encoder_layer_1.self_attention.out_proj, reference_vit.encoder.layers.encoder_layer_10.self_attention.out_proj, reference_vit.encoder.layers.encoder_layer_11.self_att

539732877312 flops


In [9]:
def evaluate_on_data(model, dataloader, criterion):
    with torch.no_grad():
        total_loss = 0
        
        num_correct = 0.0
        num_samples = 0.0
        for data in dataloader:
            image, city, _, _ = data
            city = city.to(device)
            image = image.to(device)
            outputs = model(image)
            loss = criterion(outputs, city)
            total_loss += loss.item()
            for i in range(len(city)):

                model_vote = 0
                answer = 0
                for j in range(len(outputs[i])):
                    if outputs[i][j] > outputs[i][model_vote]:
                        model_vote = j
                    if city[i][j] == 1:
                        answer = j
                
                if answer == model_vote:
                    num_correct += 1
                num_samples += 1
                
    return total_loss / len(dataloader), num_correct / num_samples

In [10]:
num_epochs = 10
count = 0
test_loss_array = np.zeros(num_epochs)
test_acc_array = np.zeros(num_epochs)
train_loss_array = np.zeros(num_epochs)
for epoch in range(num_epochs):
    start = time.time()
    temp = 0
    for data in trainDataLoader:
        
        image, city, _, _ = data
        
        city = city.to(device)
        image = image.to(device)

        optimizer.zero_grad()
        outputs = model(image)
        loss = criterion(outputs, city)
        loss.backward()
        optimizer.step()
        
        end = time.time()
        count += 1
        print(str(int(end-start)) + " sec " + str(count * batch_size) + " images " + str(loss.item()) + " loss", end='\x1b\r')

    test_loss, test_acc = evaluate_on_data(model, testDataLoader, criterion)
    print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {loss.item():.4f}, Test Loss: {test_loss}, Test ACC: {test_acc}')
    test_loss_array[epoch] = test_loss
    train_loss_array[epoch] = loss.item()
    test_acc_array[epoch] = test_acc
    

Epoch [1/10], Training Loss: 2.3985, Test Loss: 2.229107338448066, Test ACC: 0.2322905429701546
Epoch [2/10], Training Loss: 2.3673, Test Loss: 2.2291027779701627, Test ACC: 0.2322905429701546
3 sec 960 images 2.336105387657881 loss

KeyboardInterrupt: 

In [None]:
with open(model_name + '_test.npy', 'wb') as f:
    np.save(f, test_loss_array)
    
with open(model_name + '_test_acc.npy', 'wb') as f:
    np.save(f, test_acc_array)
    
with open(model_name + '_train.npy', 'wb') as f:
    np.save(f, train_loss_array)