In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
# !pip3 install torch
# !pip3 install torchvision

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch import optim
from torch.autograd import Variable

In [0]:
base_url = '/content/drive/My Drive/CVIT_WORKSHOP_2020/day10'

# **CNN & Pytorch**

In [0]:
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,)),
                              ])

In [0]:
trainset = datasets.CIFAR10('./data', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=50, shuffle=True)
# trainset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
# trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

Files already downloaded and verified


In [0]:
testset = datasets.CIFAR10('./data', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=50, shuffle=True)

# testset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=False, transform=transform)
# testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)

Files already downloaded and verified


In [0]:
print(testset[1][0].size())
for data, labels in testloader:
  print(data.shape)
  print(labels.shape)
  break

torch.Size([3, 32, 32])
torch.Size([50, 3, 32, 32])
torch.Size([50])


In [0]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        # Convolution 1
        self.cnn1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1, padding=2)
        self.relu1 = nn.ReLU()
        
        # Max pool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
     
        # Convolution 2
        self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2)
        self.relu2 = nn.ReLU()
        
        # Max pool 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        
        # Fully connected 1 (readout)
        self.fc1 = nn.Linear(32 * 8 * 8, 10) 

        self.softmax = torch.nn.Softmax(dim = 1)
    
    def forward(self, x):
        # Convolution 1
        out = self.cnn1(x)
        out = self.relu1(out)
        
        # Max pool 1
        out = self.maxpool1(out)
        
        # Convolution 2 
        out = self.cnn2(out)
        out = self.relu2(out)
        
        # Max pool 2 
        out = self.maxpool2(out)
        
        # Resize
        # Original size: (100, 32, 8, 8)
        # out.size(0): 100
        # New out size: (100, 32*8*8)
        out = out.view(out.size(0), -1)

        # Linear function (readout)
        out = self.fc1(out)
        out = self.softmax(out)
        return out

In [0]:
# Create an object of class MLP
model = CNNModel()

# Define the loss
criterion = torch.nn.CrossEntropyLoss()

# Optimizers require the parameters to optimize and a learning rate
optimizer = optim.SGD(model.parameters(), lr = 0.05)

In [0]:
epochs = 100
for e in range(epochs):
    print("Epoch ", (e + 1))
    running_loss = 0
    for images, labels in trainloader:
        optimizer.zero_grad()
        logits = model.forward(images)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    else:
        accuracy = 0
        with torch.no_grad():
            for images, labels in testloader:
                logit = model(images)
                ps = logit#torch.exp(logit)
                top_p, top_class = ps.topk(1, dim = 1)
                equals = top_class == labels.view(*top_class.shape)
                accuracy += torch.mean(equals.type(torch.FloatTensor))
        print("Loss on training set: ", running_loss)
        print("Accuracy on test set: ", end = " ")
        print((accuracy / len(testloader)).data.numpy())

## **Task**

1. Choose any dataset available online for multi class classification(minimum 3 classes) and try and train a CNN on that dataset.
2. You should try and write your own dataloader function and don't use pytorch's inbuilt one.
3. Experiment with the number of layers, kernel size, strides, padding.
4. Get the best accuracy you can get.
5. Don't use the datasets which are inbuilt in pytorch.

In [0]:
import os
import random
import numpy as np
import cv2
from google.colab.patches import cv2_imshow
from matplotlib import pyplot as plt
import plotly
import plotly.express as px
import plotly.graph_objects as go
from torch.utils.data import TensorDataset, DataLoader

In [0]:
url = base_url + '/8fruit_dataset'
transform = transforms.Compose([transforms.ToTensor()])
batch_size = 50
train_ratio, test_ratio = 0.8, 0.1
lr = 0.05
epochs = 100

In [0]:
def load_data():
  train_x, train_y = [], []
  valid_x, valid_y = [], []
  test_x, test_y = [], []
  labels = []

  def shuffle_list(xx, yy):
    data = list(zip(xx, yy))
    random.shuffle(data)
    xx, yy = zip(*data)
    return xx, yy

  for label, folder in enumerate(os.listdir(url)):
      labels.append(folder)
      folder_path = os.path.join(url, folder)
      X, Y = [], []
      for file in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file)
        img = cv2.imread(file_path, cv2.COLOR_BGR2RGB)
        X.append(img)
        Y.append(label)
      X, Y = shuffle_list(X, Y)
      train_split = int(train_ratio * len(X))
      test_split = int(test_ratio * len(X)) + train_split
      train_x.extend(X[:train_split]) 
      train_y.extend(Y[:train_split])
      test_x.extend(X[train_split:test_split]) 
      test_y.extend(Y[train_split:test_split]) 
      valid_x.extend(X[test_split:]) 
      valid_y.extend(Y[test_split:]) 

  labels = np.array(labels)

  train_x = torch.Tensor(train_x)
  train_y = torch.Tensor(train_y)
  trainset = TensorDataset(train_x, train_y)
  trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True)

  valid_x = torch.Tensor(valid_x)
  valid_y = torch.Tensor(valid_y)
  validset = TensorDataset(valid_x, valid_y)
  validloader = DataLoader(validset, batch_size=batch_size, shuffle=True)

  test_x = torch.Tensor(test_x)
  test_y = torch.Tensor(test_y)
  testset = TensorDataset(test_x, test_y)
  testloader = DataLoader(testset, batch_size=batch_size, shuffle=True)

  return labels, trainloader, validloader, testloader

labels, trainloader, validloader, testloader = load_data()

In [0]:
class CNNModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.cnn1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1, padding=2)
    self.maxpool1 = nn.MaxPool2d(kernel_size=2)

    self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2)
    self.maxpool2 = nn.MaxPool2d(kernel_size=2)

    self.cnn3 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=0)
    self.maxpool3 = nn.MaxPool2d(kernel_size=3)

    self.fc1 = nn.Linear(32 * 7 * 7, 8)
    self.softmax = nn.Softmax(dim=1)
    self.relu = nn.ReLU()

  def forward(self, x):
    x = self.cnn1(x)
    x = self.relu(x)
    x = self.maxpool1(x)
    x = self.cnn2(x)
    x = self.relu(x)
    x = self.maxpool2(x)
    x = self.cnn3(x)
    x = self.relu(x)
    x = self.maxpool3(x)
    x = x.view(x.shape[0], -1)
    x = self.fc1(x)
    x = self.softmax(x)
    return x

In [0]:
model = CNNModel()

criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(model.parameters(), lr=lr)

In [79]:
for i,l in trainloader:
  i = i.permute(0, 3, 1, 2)
  print(i.shape)
  print(l.shape)
  break

torch.Size([50, 3, 100, 100])
torch.Size([50])


In [80]:
for e in range(epochs):
  print('Epoch: ' + str(e+1))
  running_loss = 0
  for image, label in trainloader:
    optimizer.zero_grad()
    image = image.permute(0, 3, 1, 2)
    label = label.type(torch.LongTensor)
    logits = model(image)
    loss = criterion(logits, label)
    loss.backward()
    optimizer.step()
    running_loss += loss.item()
  else:
    accuracy = 0
    with torch.no_grad():
      for image, label in validloader:
        image = image.permute(0, 3, 1, 2)
        label = label.type(torch.LongTensor)
        logit = model(image)
        ps = logit
        top_p, top_class = ps.topk(1, dim=1)
        equals = top_class == label.view(*top_class.shape)
        accuracy += torch.mean(equals.type(torch.FloatTensor))
    print("Loss on training set: ", running_loss)
    print("Accuracy on validation set: ", end = " ")
    print((accuracy / len(validloader)).data.numpy())

Epoch: 1
Loss on training set:  69.82560873031616
Accuracy on validation set:  0.14533332
Epoch: 2
Loss on training set:  69.73945927619934
Accuracy on validation set:  0.164
Epoch: 3
Loss on training set:  69.73945927619934
Accuracy on validation set:  0.14533333
Epoch: 4
Loss on training set:  69.78660130500793
Accuracy on validation set:  0.164
Epoch: 5
Loss on training set:  69.73945903778076
Accuracy on validation set:  0.14533333
Epoch: 6
Loss on training set:  69.72374439239502
Accuracy on validation set:  0.164
Epoch: 7
Loss on training set:  69.77088785171509
Accuracy on validation set:  0.15466666
Epoch: 8
Loss on training set:  69.69231581687927
Accuracy on validation set:  0.136
Epoch: 9
Loss on training set:  69.75517296791077
Accuracy on validation set:  0.164
Epoch: 10
Loss on training set:  69.73945879936218
Accuracy on validation set:  0.14533332
Epoch: 11
Loss on training set:  69.75517344474792
Accuracy on validation set:  0.14533333
Epoch: 12
Loss on training set:  

In [82]:
accuracy = 0
with torch.no_grad():
  for image, label in testloader:
    image = image.permute(0, 3, 1, 2)
    label = label.type(torch.LongTensor)
    logit = model(image)
    ps = logit
    top_p, top_class = ps.topk(1, dim=1)
    equals = top_class == label.view(*top_class.shape)
    accuracy += torch.mean(equals.type(torch.FloatTensor))
print("Accuracy on test set: ", end = " ")
print((accuracy / len(testloader)).data.numpy())

Accuracy on test set:  0.16061224
