In [1]:
from __future__ import print_function
import argparse
from tqdm import tqdm
import os
import PIL.Image as Image

import torch
from torch.autograd import Variable
from torch import  nn
import torch.nn.functional as F
import torchvision.datasets as datasets
import numpy as np
import torchvision
from torchvision import transforms

In [2]:
class FMnet(nn.Module):
    def __init__(
        self,
        n_classes,
        img_ch=1,
        channels=[16, 32, 64, 128, 200],
        device=['cuda' if torch.cuda.is_available() else 'cpu'][0],
        kernel=3,
        shape=(256, 256),
        n_upsample=4,
    ):
        super().__init__()
        self.n_upsample = n_upsample
        self.image_shape = shape
        self.device = device
        self.channels = channels

        self.Conv = nn.Sequential()
        self.Conv.add_module(
            "conv0",
            convblock(ch_in=img_ch, ch_out=channels[0], kernel_sz=kernel, block=0),
        )
        for k in range(1, len(channels)):
            self.Conv.add_module(
                f"conv{k}",
                convblock(
                    ch_in=channels[k - 1], ch_out=channels[k], kernel_sz=kernel, block=k
                ),
            )

        self.conv_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(self.channels[-1]*14*16, 350)
        self.fc2 = nn.Linear(350, n_classes)

        self.localization = nn.Sequential(
            nn.Conv2d(3, 8, kernel_size=7),
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True),
            nn.Conv2d(8, 10, kernel_size=5),
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True)
            )

        # Regressor for the 3 * 2 affine matrix
        self.fc_loc = nn.Sequential(
            nn.Linear(10 * 4 * 4, 32),
            nn.ReLU(True),
            nn.Linear(32, 3 * 2)
            )
   
        # Initialize the weights/bias with identity transformation
        self.fc_loc[2].weight.data.zero_()
        self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))

    def forward(self, x, normalize=False, verbose=False):
        # encoding path
        xout = []
        x = self.Conv[0](x)
        xout.append(x)
        for k in range(1, len(self.Conv)):
            x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1)
            x = self.Conv[k](x)
            xout.append(x)

        # transform the input
        x = self.conv_drop(x)
        x = x.view(-1, self.channels[-1]*14*16)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)


class convblock(nn.Module):
    def __init__(self, ch_in, ch_out, kernel_sz, block=-1):
        super().__init__()
        self.conv = nn.Sequential()
        self.block = block
        if self.block != 0:
            self.conv.add_module("conv_0", batchconv(ch_in, ch_out, kernel_sz))
        else:
            self.conv.add_module("conv_0", batchconv0(ch_in, ch_out, kernel_sz))
        self.conv.add_module("conv_1", batchconv(ch_out, ch_out, kernel_sz))

    def forward(self, x):
        x = self.conv[1](self.conv[0](x))
        return x


def batchconv0(ch_in, ch_out, kernel_sz):
    return nn.Sequential(
        nn.BatchNorm2d(ch_in, eps=1e-5, momentum=0.1),
        nn.Conv2d(ch_in, ch_out, kernel_sz, padding=kernel_sz // 2, bias=False),
    )


def batchconv(ch_in, ch_out, sz):
    return nn.Sequential(
        nn.BatchNorm2d(ch_in, eps=1e-5, momentum=0.1),
        nn.ReLU(inplace=True),
        nn.Conv2d(ch_in, ch_out, sz, padding=sz // 2, bias=False),
    )

In [3]:
model_file = '/home/stringlab/Desktop/DLCV_midterm_project/trained_models/model_23.pth' 
state_dict = torch.load(model_file)
model = FMnet(n_classes=43)
model.load_state_dict(state_dict)
model.eval();


In [4]:
test_dir = '/home/stringlab/Desktop/DLCV_midterm_project/GTSRB_Final_Test_Images/GTSRB/Final_Test/Images'
output_file = open("pred.csv", "w")

transform = transforms.Compose([
    # you can add other transformations in this list
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize([212, 256]),
    transforms.ToTensor()
    #transforms.Resize((32, 32)),
    #transforms.ToTensor(),
    #transforms.Normalize((0.3337, 0.3064, 0.3171), ( 0.2672, 0.2564, 0.2629))
])

def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        with Image.open(f) as img:
            return img.convert('RGB')

output_file.write("Filename,ClassId\n")

for f in tqdm(sorted(os.listdir(test_dir))):
    if 'ppm' in f:
        output = torch.zeros([1, 43], dtype=torch.float32)
        with torch.no_grad():
            data = transform(pil_loader(test_dir + '/' + f))
            data = data.view(1, data.size(0), data.size(1), data.size(2))
            data = Variable(data)
            output = output.add(model(data))
            pred = output.data.max(1, keepdim=True)[1]
            file_id = f[0:5]
            output_file.write("%s,%d\n" % (file_id, pred))
        
output_file.close()

100%|██████████| 12631/12631 [04:48<00:00, 43.77it/s]


In [8]:
# Calculate test accuracy
import pandas as pd

gt_file = '/home/stringlab/Desktop/DLCV_midterm_project/GTSRB_Final_Test_GT/GT-final_test.csv'
gt = pd.read_csv(gt_file, sep=';')
pred_file = '/home/stringlab/Desktop/DLCV_midterm_project/pred.csv'
pred = pd.read_csv(pred_file, sep=',')

In [12]:
print("Accuracy: ", (gt['ClassId']==pred['ClassId']).sum()/len(gt)*100, "%")

Accuracy:  97.9889152810768 %
