In [28]:
import torch

import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim

import torchvision
from torchvision import datasets, transforms

In [None]:
device = torch.device('cuda')
device

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip drive/MyDrive/ann_imgs/train.zip

In [None]:
!pip install split-folders
import splitfolders
splitfolders.ratio('./train', output="output", seed=1337, ratio=(0.8, 0.0,0.2)) 

In [33]:
train_transform = transforms.Compose(
    [transforms.RandomHorizontalFlip(0.5),
     transforms.RandomRotation(24),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

test_transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [34]:
ITERATIONS = 30
LR = 1e-3
BATCH_SIZE = 256
DROPOUT = 0.3

In [35]:
train_dataset = datasets.ImageFolder('./output/train', transform=train_transform)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [36]:
test_dataset = datasets.ImageFolder('./output/test', transform=test_transform)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
classes = dict(zip(train_dataset.class_to_idx.values(), train_dataset.class_to_idx.keys()))
classes

In [None]:
print(f"Train dataset lenght: {len(train_dataset)}")
print(f"Test dataset lenght: {len(test_dataset)}")
print(f"Image shape: {train_dataset[0][0].shape}")
print(f"Number of classes: {len(classes)}")

In [None]:
images, labels = next(iter(train_dataloader))
labels

In [None]:
import matplotlib.pyplot as plt
import numpy as np
plt.figure(figsize = (20,10))

def imshow(img):
    img = img / 2 + 0.5
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

imshow(torchvision.utils.make_grid(images))
print(' '.join('%5s' % classes[int(labels[j])] for j in range(32)))

In [41]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=17, kernel_size=4, stride=1, padding=0)
        self.cbn1 = nn.BatchNorm2d(17)
        self.conv2 = nn.Conv2d(in_channels=17, out_channels=17, kernel_size=4, stride=1, padding=0)
        self.cbn2 = nn.BatchNorm2d(17)
        self.conv3 = nn.Conv2d(in_channels=17, out_channels=17, kernel_size=4, stride=1, padding=0)
        self.cbn3 = nn.BatchNorm2d(17)
        self.conv4 = nn.Conv2d(in_channels=17, out_channels=17, kernel_size=4, stride=1, padding=0)
        self.cbn4 = nn.BatchNorm2d(17)
        self.conv5 = nn.Conv2d(in_channels=17, out_channels=17, kernel_size=4, stride=1, padding=0)
        self.cbn5 = nn.BatchNorm2d(17)

        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(9792, 5000)
        self.bn1 = nn.BatchNorm1d(5000)
        self.d1 = nn.Dropout(DROPOUT)
        self.fc2 = nn.Linear(5000, 2500)
        self.bn2 = nn.BatchNorm1d(2500)
        self.d2 = nn.Dropout(DROPOUT)
        self.fc3 = nn.Linear(2500, 1000)
        self.bn3 = nn.BatchNorm1d(1000)
        self.d3 = nn.Dropout(DROPOUT)
        self.fc4 = nn.Linear(1000, 500)
        self.bn4 = nn.BatchNorm1d(500)
        self.d4 = nn.Dropout(DROPOUT)
        self.fc5 = nn.Linear(500, 50)

        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight.data)
                
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.cbn1(x)
        x = F.relu(self.conv2(x))
        x = self.cbn2(x)
        x = F.relu(self.conv3(x))
        x = self.cbn3(x)
        x = F.relu(self.conv4(x))
        x = self.cbn4(x)
        x = self.conv5(x)
        x = self.cbn5(x)
        x = F.relu(self.pool1(x))

        x = torch.flatten(x, 1)
        x = F.relu(self.d1(self.bn1(self.fc1(x))))
        x = F.relu(self.d2(self.bn2(self.fc2(x))))
        x = F.relu(self.d3(self.bn3(self.fc3(x))))
        x = F.relu(self.d4(self.bn4(self.fc4(x))))
        x = self.fc5(x)
        return x

In [None]:
net = Net().to(device)
net

In [43]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=LR)

In [None]:
for epoch in range(ITERATIONS): 

    running_loss = 0.0
    for inputs, labels in train_dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print('[%d/%d] loss: %.3f' %
          (epoch+1 , ITERATIONS,  running_loss / 2000))
    running_loss = 0.0


In [None]:
numb_examples = 8
examples = enumerate(test_dataloader)
idx, (images, labels) = next(examples)
images = images[:numb_examples]

imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[int(labels[j])] for j in range(numb_examples)))

In [46]:
outputs = net(images.to(device))

In [47]:
_, predicted = torch.max(outputs, 1)

print('Predicted: ', ' '.join('%5s' % classes[int(predicted[j])]
                              for j in range(numb_examples)))

Predicted:  bridge   egg   tea swine kangaroo  bean  fish camera


In [None]:
net.eval()

corrects = 0
total = 0

for inputs, labels in test_dataloader:

    inputs, labels = inputs.to(device), labels.to(device)
    outputs = net(inputs)

    _, predicted = torch.max(outputs, 1)

    total += len(labels)
    corrects += (labels == predicted).sum().item()

print(f"Accuracy: {corrects / total}")

In [None]:
correct_pred = {classname: 0 for classname in train_dataset.classes}
total_pred = {classname: 0 for classname in train_dataset.classes}

with torch.no_grad():
    for images, labels in test_dataloader:
        labels = labels.to(device)
        images = images.to(device)
        outputs = net(images)
        _, predictions = torch.max(outputs, 1)
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[int(label)]] += 1
            total_pred[classes[int(label)]] += 1

  
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print("Accuracy for class {:5s} is: {:.1f} %".format(classname, 
                                                   accuracy))