<a href="https://colab.research.google.com/github/MikolajKita/Neural-Networks/blob/mc/Image_recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# !unzip test_all.zip

In [2]:
!unzip train.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: train/monkey/n02488702_1549.JPEG  
  inflating: train/monkey/n02484473_925.JPEG  
  inflating: train/monkey/n02484473_1980.JPEG  
  inflating: train/monkey/n02488894_658.JPEG  
  inflating: train/monkey/n02484473_2389.JPEG  
  inflating: train/monkey/n02484473_1320.JPEG  
  inflating: train/monkey/n02484473_1446.JPEG  
  inflating: train/monkey/n02484473_1426.JPEG  
  inflating: train/monkey/n02488702_72.JPEG  
  inflating: train/monkey/n02488702_2650.JPEG  
  inflating: train/monkey/n02488702_4894.JPEG  
  inflating: train/monkey/n02488702_1992.JPEG  
  inflating: train/monkey/n02488702_4695.JPEG  
  inflating: train/monkey/n02484473_1574.JPEG  
  inflating: train/monkey/n02488702_3344.JPEG  
  inflating: train/monkey/n02488702_6259.JPEG  
  inflating: train/monkey/n02488702_5925.JPEG  
  inflating: train/monkey/n02484473_649.JPEG  
  inflating: train/monkey/n02488702_5615.JPEG  
  inflating: train/monkey/n0

In [20]:
import torch
from torch import nn
import torchvision
import torchvision.transforms as transforms
import pandas as pd
import numpy as np
import torch.optim as optim

In [21]:
torch.cuda.set_device(0)
device = torch.device("cuda")

In [22]:
torch.manual_seed(42)
if torch.cuda.is_available(): 
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)
    
# Additionally, some operations on a GPU are implemented stochastic for efficiency
# We want to ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.determinstic = True
torch.backends.cudnn.benchmark = False

In [23]:
transform = transforms.Compose([
     transforms.RandomHorizontalFlip(0.5),
     transforms.RandomVerticalFlip(0.5),
     transforms.RandomRotation(24),
     transforms.ToTensor(),
     transforms.RandomErasing(0.2),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

batch_size = 128

In [24]:
full_dataset = torchvision.datasets.ImageFolder("train", transform=transform)


In [25]:
classes = full_dataset.classes

In [26]:
train_part = int(0.8 * len(full_dataset))
train_size = int(0.8 * train_part)
validation_size = train_part - train_size
test_size = len(full_dataset) - train_part
train_set, validation_set, test_set = torch.utils.data.random_split(full_dataset, [train_size, validation_size, test_size])

In [27]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

In [28]:
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

In [29]:
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size,
                                          shuffle=False, num_workers=2)

In [30]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()

        self.start_conv = nn.Conv2d(in_channels=3, out_channels=100, kernel_size=9, stride=2, padding=4, bias=False)
        self.start_bn = nn.BatchNorm2d(100)
        self.star_relu = nn.ReLU(inplace=True)
        self.start_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.conv1_1 = nn.Conv2d(in_channels=100, out_channels=100, kernel_size=3, padding=1, bias=False)
        self.bn1_1 = nn.BatchNorm2d(100)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv1_2 = nn.Conv2d(in_channels=100, out_channels=100, kernel_size=3, padding=1, bias=False)
        self.bn1_2 = nn.BatchNorm2d(100)

        self.conv2_1 = nn.Conv2d(in_channels=100, out_channels=200, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn2_1 = nn.BatchNorm2d(200)
        self.relu2 = nn.ReLU(inplace=True)
        self.conv2_2 = nn.Conv2d(in_channels=200, out_channels=200, kernel_size=3, padding=1, bias=False)
        self.bn2_2 = nn.BatchNorm2d(200)
        self.res_conv1 = nn.Conv2d(in_channels=100, out_channels=200, kernel_size=1, stride=2, bias=False)
        self.res_bn1 = nn.BatchNorm2d(200)

        self.conv3_1 = nn.Conv2d(in_channels=200, out_channels=400, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn3_1 = nn.BatchNorm2d(400)
        self.relu3 = nn.ReLU(inplace=True)
        self.conv3_2 = nn.Conv2d(in_channels=400, out_channels=400, kernel_size=3, padding=1, bias=False)
        self.bn3_2 = nn.BatchNorm2d(400)
        self.res_conv2 = nn.Conv2d(in_channels=200, out_channels=400, kernel_size=1, stride=2, bias=False)
        self.res_bn2 = nn.BatchNorm2d(400)

        self.end_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(400, 50)

    def forward(self, x):
        x = self.start_conv(x)
        x = self.start_bn(x)
        x = self.star_relu(x)
        x = self.start_pool(x)


        res_val = x
        x = self.conv1_1(x)
        x = self.bn1_1(x)
        x = self.relu1(x)
        x = self.conv1_2(x)
        x = self.bn1_2(x)
        x += res_val
        x = self.relu1(x)


        res_val = x
        x = self.conv2_1(x)
        x = self.bn2_1(x)
        x = self.relu2(x)
        x = self.conv2_2(x)
        x = self.bn2_2(x)

        res_val = self.res_conv1(res_val)
        res_val = self.res_bn1(res_val)
        x += res_val
        x = self.relu2(x)


        res_val = x
        x = self.conv3_1(x)
        x = self.bn3_1(x)
        x = self.relu3(x)
        x = self.conv3_2(x)
        x = self.bn3_2(x)

        res_val = self.res_conv2(res_val)
        res_val = self.res_bn2(res_val)
        x += res_val
        x = self.relu3(x)


        x = self.end_pool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x


In [31]:
net = Net().to(device)
net

Net(
  (start_conv): Conv2d(3, 100, kernel_size=(9, 9), stride=(2, 2), padding=(4, 4), bias=False)
  (start_bn): BatchNorm2d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (star_relu): ReLU(inplace=True)
  (start_pool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (conv1_1): Conv2d(100, 100, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1_1): BatchNorm2d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU(inplace=True)
  (conv1_2): Conv2d(100, 100, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1_2): BatchNorm2d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2_1): Conv2d(100, 200, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn2_1): BatchNorm2d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu2): ReLU(inplace=True)
  (conv2_2): Conv2d(200, 200, kernel_size=(3, 3), stride=(1, 1), p

In [32]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [33]:
training_loss = 0
validation_loss = 0
num_epochs = 100

# Training loop
for epoch in range(num_epochs):
  net.train()
  for i, data in enumerate(train_loader, 0):
    inputs, labels = data
    inputs, labels = inputs.to(device), labels.to(device)
    # zero the parameter gradients
    optimizer.zero_grad()
    # forward + backward + optimize
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    training_loss += loss.item()
    
  net.eval()
  with torch.no_grad():
      for i, data in enumerate(validation_loader, 0):
          inputs, labels = data
          inputs, labels = inputs.to(device), labels.to(device)
          outputs = net(inputs)
          loss = criterion(outputs, labels) 
          validation_loss += loss.item()

  if epoch and (epoch % 1 == 0 or epoch == num_epochs - 1):    
      print(f"Epoch: {epoch}, training loss: {training_loss/1:.3}, validation loss: {validation_loss/1:.3}")
      training_loss = 0
      validation_loss = 0

Epoch: 1, training loss: 2.51e+03, validation loss: 6.21e+02
Epoch: 2, training loss: 1.05e+03, validation loss: 2.67e+02
Epoch: 3, training loss: 9.87e+02, validation loss: 2.53e+02
Epoch: 4, training loss: 9.31e+02, validation loss: 2.35e+02
Epoch: 5, training loss: 8.85e+02, validation loss: 2.35e+02
Epoch: 6, training loss: 8.48e+02, validation loss: 2.29e+02
Epoch: 7, training loss: 8.1e+02, validation loss: 2.41e+02
Epoch: 8, training loss: 7.85e+02, validation loss: 2.17e+02
Epoch: 9, training loss: 7.53e+02, validation loss: 2.19e+02
Epoch: 10, training loss: 7.3e+02, validation loss: 2.15e+02
Epoch: 11, training loss: 7.04e+02, validation loss: 2.12e+02
Epoch: 12, training loss: 6.83e+02, validation loss: 1.99e+02
Epoch: 13, training loss: 6.63e+02, validation loss: 2e+02
Epoch: 14, training loss: 6.44e+02, validation loss: 1.98e+02
Epoch: 15, training loss: 6.28e+02, validation loss: 2.05e+02
Epoch: 16, training loss: 6.05e+02, validation loss: 1.88e+02
Epoch: 17, training lo

In [34]:
torch.save(net.state_dict(), 'model.tar.gz')

In [35]:
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}
acc_sum = 0
with torch.no_grad():
    for i, data in enumerate(test_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = net(inputs)
        loss = criterion(outputs, labels) 
        validation_loss += loss.item()
        _, predictions = torch.max(outputs, 1)
        for label, prediction in zip(labels, predictions):
          if label == prediction:
              correct_pred[classes[label]] += 1
          total_pred[classes[label]] += 1

for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    acc_sum += accuracy

    print("Accuracy for class {:5s} is: {:.1f} %".format(classname, 
                                                   accuracy))

Accuracy for class acoustic is: 39.2 %
Accuracy for class antenna is: 52.3 %
Accuracy for class bacteria is: 67.6 %
Accuracy for class battery is: 53.3 %
Accuracy for class bean  is: 59.4 %
Accuracy for class beetle is: 72.4 %
Accuracy for class bicycle is: 77.0 %
Accuracy for class birch is: 51.6 %
Accuracy for class bird  is: 40.2 %
Accuracy for class bomb  is: 58.8 %
Accuracy for class bread is: 41.3 %
Accuracy for class bridge is: 55.0 %
Accuracy for class camera is: 64.6 %
Accuracy for class carbon is: 47.1 %
Accuracy for class cat   is: 51.4 %
Accuracy for class corn  is: 48.6 %
Accuracy for class crab  is: 37.6 %
Accuracy for class crocodilian is: 52.2 %
Accuracy for class echinoderm is: 51.8 %
Accuracy for class egg   is: 45.2 %
Accuracy for class elephant is: 60.1 %
Accuracy for class fish  is: 62.1 %
Accuracy for class flower is: 90.9 %
Accuracy for class frog  is: 37.4 %
Accuracy for class fungus is: 67.0 %
Accuracy for class gauge is: 55.5 %
Accuracy for class hammer is: 73

In [36]:
acc_average_all_classes = acc_sum / len(classes)
acc_average_all_classes

56.7309749538385