In [1]:
import os
import tqdm
import cv2
import random
import copy
import numpy as np
import matplotlib.image as mpimage
import matplotlib.pyplot as plt

import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor, ToPILImage
import torchvision.transforms as transforms
import torch.optim as optim

In [2]:
def get_accuracy(outputs, labels):
    outputs_cpy = copy.copy(outputs)
    labels_cpy  = copy.copy(labels)
    outputs_cpy = torch.max(outputs_cpy, 1)[1].cpu().detach().numpy()
    #print(outputs_cpy)
    labels_cpy  = labels_cpy.cpu().detach().numpy()
    acc     = (outputs_cpy == labels_cpy).sum()
    return acc/outputs_cpy.shape[0]

In [3]:
class ClassifierDataset(Dataset):
    def __init__(self, root, image_names, Dict, classes_unique, transform = None):
        self.root           = root
        self.names          = image_names
        self.classes_unique = classes_unique
        self.Dict           = Dict
        self.transform      = transform        
            
            
    def __getitem__(self, idx):
        image = cv2.imread(self.root +self.names[idx])
        if len(image.shape)!=3:
            image = cv2.cvtColor(image,cv2.COLOR_GRAY2RGB)
        label_name = Dict[self.names[idx]]
        label      = self.classes_unique.index(label_name)
        if self.transform is not None:
            image = self.transform(image)
        return image, label
    
    def __len__(self):
        return len(self.names)

In [4]:
image_names = os.listdir('./tiny-imagenet-200//val/images/')
valid_names = random.sample(image_names, 3000)
train_names = [i for i in image_names if i not in valid_names]

labels_file = open('./tiny-imagenet-200/val/val_annotations.txt', 'r')
text = labels_file.read()
labels_file.close()
text = text.split()
i = 1
n = len(text)
classes = []
Dict = {}
while(1):
    classes.append(text[i])
    Dict[text[i-1]] = text[i]
    i += 6
    if(i > n):
        break
        
classes_unique = list(set(classes))
classes_unique.sort()

In [5]:
transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        #transforms.Normalize(mean=[0.485, 0.456, 0.406],
        #                     std=[0.229, 0.224, 0.225])
    ])
trainDataset = ClassifierDataset('./tiny-imagenet-200/val/images/', train_names, Dict, classes_unique, transform = transform)
validDataset = ClassifierDataset('./tiny-imagenet-200/val/images/', valid_names, Dict, classes_unique, transform = transform)
trainLoader  = DataLoader(trainDataset, batch_size = 16, shuffle = True)
validLoader  = DataLoader(validDataset, batch_size = 16, shuffle = False)

In [6]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16*13 * 13, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 200)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        #print(x.size())
        x = x.view(-1, 16 * 13 * 13)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [7]:
class Classifier(nn.Module):
    def __init__(self, n_classes):
        super(Classifier, self).__init__()
        self.conv1a = nn.Conv2d(3,  64, 3, padding = 1)
        self.conv1b = nn.Conv2d(64, 64, 3, padding = 1)
        self.pool1  = nn.MaxPool2d(2, 2, return_indices = True)
        
        self.conv2a = nn.Conv2d(64,  128, 3, padding = 1)
        self.conv2b = nn.Conv2d(128, 128, 3, padding = 1)
        self.pool2  = nn.MaxPool2d(2, 2, return_indices = True)
        
        self.conv3a = nn.Conv2d(128, 256, 3, padding = 1)
        self.conv3b = nn.Conv2d(256, 256, 3, padding = 1)
        self.conv3c = nn.Conv2d(256, 256, 3, padding = 1)
        self.conv3d = nn.Conv2d(256, 256, 3, padding = 1)
        self.pool3  = nn.MaxPool2d(2, 2, return_indices = True)
        
        self.conv4a = nn.Conv2d(256, 512, 3, padding = 1)
        self.conv4b = nn.Conv2d(512, 512, 3, padding = 1)
        self.conv4c = nn.Conv2d(512, 512, 3, padding = 1)
        self.conv4d = nn.Conv2d(512, 512, 3, padding = 1)
        self.pool4  = nn.MaxPool2d(2, 2, return_indices = True)
        self.fc1  = nn.Linear(512*4*4, 1024)
        self.fc2  = nn.Linear(1024, n_classes)
                                
        
    def forward(self, x):
        conv1a = F.relu(self.conv1a(x), inplace = True)
        conv1b = F.relu(self.conv1b(conv1a), inplace = True)
        pool1, idxs1 = self.pool1(conv1b)
        
        conv2a = F.relu(self.conv2a(pool1), inplace = True)
        conv2b = F.relu(self.conv2b(conv2a), inplace = True)
        pool2, idxs2 = self.pool2(conv2b)
        
        conv3a = F.relu(self.conv3a(pool2), inplace = True)
        conv3b = F.relu(self.conv3b(conv3a), inplace = True)
        conv3c = F.relu(self.conv3c(conv3b), inplace = True)
        conv3d = F.relu(self.conv3d(conv3c), inplace = True)
        pool3, idxs3 = self.pool3(conv3d)
        
        conv4a = F.relu(self.conv4a(pool3), inplace = True)
        conv4b = F.relu(self.conv4b(conv4a), inplace = True)
        conv4c = F.relu(self.conv4c(conv4b), inplace = True)
        conv4d = F.relu(self.conv4d(conv4c), inplace = True)
        pool4, idxs4 = self.pool4(conv4d)
        flatten = pool4.view(-1, 512*4*4)
        fc1  = F.relu(self.fc1(flatten), inplace = True)
        fc2 = self.fc2(fc1)
        return fc2

In [12]:
class Classifier(nn.Module):
    def __init__(self, n_classes):
        super(Classifier, self).__init__()
        self.conv1a = nn.Conv2d(3, 64, 3,  padding = 1)
        self.conv1b = nn.Conv2d(64, 64, 3, padding = 1)
        self.pool1  = nn.MaxPool2d(2, 2)
        
        self.conv2a = nn.Conv2d(64, 128, 3,  padding = 1)
        self.conv2b = nn.Conv2d(128, 128, 3, padding = 1)
        self.pool2  = nn.MaxPool2d(2, 2)
        
        self.fc1 = nn.Linear(128*16*16, 1024)
        self.fc2 = nn.Linear(1024, n_classes)
        
    def forward(self, x):
        conv1a = F.relu(self.conv1a(x), inplace = True)
        conv1b = F.relu(self.conv1b(conv1a), inplace = True)
        pool1  = self.pool1(conv1b)
        
        conv2a = F.relu(self.conv2a(pool1), inplace = True)
        conv2b = F.relu(self.conv2b(conv2a), inplace = True)
        pool2  = self.pool2(conv2b)
        
        flatten = pool2.view(-1, 128*16*16)
        fc1 = F.relu(self.fc1(flatten), inplace = True)
        fc2 = self.fc2(fc1)
        return fc2

In [13]:
class Net(nn.Module):
    ### TODO: choose an architecture, and complete the class
    def __init__(self):
        super(Net, self).__init__()
        
        ## Define layers of a CNN
        # convolutional layer (sees 224x224x3 image tensor)
        self.conv1 = nn.Conv2d(3, 16, 3, padding = 1 )        
        # convolutional layer (sees 112x112x16 image tensor)
        self.conv2 = nn.Conv2d(16, 32, 3, padding = 1)
        # convolutional layer (sees 56x56x32 image tensor)
        self.conv3 = nn.Conv2d(32, 64, 3, padding = 1)
        # convolutional layer (sees 28x28x64 image tensor)
        self.conv4 = nn.Conv2d(64, 128, 3, padding = 1)        
        # convolutional layer (sees 14x14x128 image tensor)
        self.conv5 = nn.Conv2d(128, 256, 3, padding = 1 )
        
        # max pooling layer
        self.pool = nn.MaxPool2d(2, 2)
        # dropout layer (p=0.2)
        self.dropout = nn.Dropout(0.2)
        
        self.conv_bn1 = nn.BatchNorm2d(224,3)
        self.conv_bn2 = nn.BatchNorm2d(16)
        self.conv_bn3 = nn.BatchNorm2d(32)
        self.conv_bn4 = nn.BatchNorm2d(64)
        self.conv_bn5 = nn.BatchNorm2d(128)
        self.conv_bn6 = nn.BatchNorm2d(256)
        
        # linear layer (256 * 7 * 7 -> 512)
        self.fc1 = nn.Linear(256*2*2, 512)
        # linear layer (256 * 7 * 7 -> n_classes (133))
        self.fc2 = nn.Linear(512, 200)
    
    def forward(self, x):
        ## Define forward behavior
        # add sequence of convolutional and max pooling layers
        x = self.pool(F.relu(self.conv1(x)))
        x = self.conv_bn2(x)
        x = self.pool(F.relu(self.conv2(x)))
        x = self.conv_bn3(x)
        x = self.pool(F.relu(self.conv3(x)))
        x = self.conv_bn4(x)
        x = self.pool(F.relu(self.conv4(x)))
        x = self.conv_bn5(x)
        x = self.pool(F.relu(self.conv5(x)))
        x = self.conv_bn6(x)
        
        # flatten image input
        x = x.view(-1, 256 * 2 * 2)  
        #print(x.size())
        # add dropout layer
        x = self.dropout(x)
        # add second hidden layer
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


In [14]:
netC = Classifier(200)
use_gpu = True
if use_gpu: 
    netC.cuda()

criterion = nn.CrossEntropyLoss(reduction = 'mean')
optimizer = optim.Adam(netC.parameters(), lr = 1e-2)
#optimizer = optim.SGD(netC.parameters(), lr=0.001, momentum=0.9)

In [15]:
for data in trainLoader:
    images, labels  = data
    break

In [16]:
images.max()

tensor(1.)

In [17]:
epochs = 100
trainLoss = []
validLoss = []
trainAcc  = []
validAcc  = []
for epoch in range(epochs):
    epochTrainLoss = 0
    epochValidLoss = 0
    epochTrainAcc  = 0
    epochValidAcc  = 0

    netC.train(True)
    for data in trainLoader:
        images, labels = data
        if use_gpu:
            images = images.cuda()
            labels = labels.long().cuda()
        outputs = netC(images)
        #print(outputs)
        #print(outputs[0].sum())
        optimizer.zero_grad()
        loss    = criterion(outputs, labels)
        predicted = torch.max(outputs, 1)[1]
        loss.backward()
        optimizer.step()
        epochTrainLoss += loss.item()
        epochTrainAcc  += (predicted == labels).sum().item()


    netC.train(False)
    for data in validLoader:
        images, labels = data
        if use_gpu:
            images = images.cuda()
            labels = labels.long().cuda()
        outputs = netC(images)
        loss    = criterion(outputs, labels)
        predicted = torch.max(outputs, 1)[1]
        epochValidLoss += loss.item()
        epochValidAcc  += (predicted == labels).sum().item()
    epochTrainAcc  = epochTrainAcc#/len(trainLoader)
    epochValidAcc  = epochValidAcc#/len(validLoader)
    epochTrainLoss = epochTrainLoss/len(trainLoader)
    epochValidLoss = epochValidLoss/len(validLoader)
    trainLoss.append(epochTrainLoss)
    validLoss.append(epochValidLoss)
    trainAcc.append(epochTrainAcc  )
    validAcc.append(epochTrainAcc  )

    if epoch!=0:
        if(epochValidAcc > bestValidAcc):
            bestValidAcc = epochValidAcc
            torch.save(netC.state_dict(), 'classifier.pth')
    else: 
        bestValidAcc = epochValidAcc     
    print('[Epoch: {:.0f}/{:.0f}| Train Loss: {:.5f}| Valid Loss: {:.5f}| Train Acc: {:.5f}| Valid Acc: {:.5f}]'.format(epoch+1, epochs, epochTrainLoss, epochValidLoss, epochTrainAcc, epochValidAcc))

[Epoch: 1/100| Train Loss: 7.86763| Valid Loss: 5.31500| Train Acc: 24.00000| Valid Acc: 9.00000]
[Epoch: 2/100| Train Loss: 5.30876| Valid Loss: 5.32050| Train Acc: 24.00000| Valid Acc: 10.00000]
[Epoch: 3/100| Train Loss: 5.30834| Valid Loss: 5.32377| Train Acc: 37.00000| Valid Acc: 10.00000]
[Epoch: 4/100| Train Loss: 5.30832| Valid Loss: 5.32454| Train Acc: 37.00000| Valid Acc: 8.00000]
[Epoch: 5/100| Train Loss: 5.30853| Valid Loss: 5.32417| Train Acc: 29.00000| Valid Acc: 7.00000]
[Epoch: 6/100| Train Loss: 5.30869| Valid Loss: 5.32425| Train Acc: 35.00000| Valid Acc: 10.00000]
[Epoch: 7/100| Train Loss: 5.30830| Valid Loss: 5.32417| Train Acc: 32.00000| Valid Acc: 7.00000]
[Epoch: 8/100| Train Loss: 5.30881| Valid Loss: 5.32358| Train Acc: 24.00000| Valid Acc: 7.00000]
[Epoch: 9/100| Train Loss: 5.30859| Valid Loss: 5.32463| Train Acc: 23.00000| Valid Acc: 7.00000]
[Epoch: 10/100| Train Loss: 5.30829| Valid Loss: 5.32415| Train Acc: 31.00000| Valid Acc: 11.00000]
[Epoch: 11/100|

KeyboardInterrupt: 

In [None]:
x = torch.tensor([[1, 1, 1], [1, 2, 3]])
a = nn.Softmax(1)
a(x.float())

In [None]:
images.shape

In [None]:
plt.imshow(images[7].cpu().detach().numpy().transpose(1, 2, 0))

In [10]:
len(trainLoader)

438

In [11]:
classes_unique