<a href="https://colab.research.google.com/github/Suraj-Kaple/Simple-CNN-classifier-pytorch/blob/main/cnn_scratch_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# mount google drive to access folders and files present on drive
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [2]:
%cd /content/drive/MyDrive/Colab Notebooks/1_Intel Image Classification

/content/drive/MyDrive/Colab Notebooks/1_Intel Image Classification


In [3]:
# import required libraries
import os
import numpy as np
import torch
import glob
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import torchvision
import pathlib

In [4]:
# check for device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [5]:
# Transforms
transformer = transforms.Compose([
    transforms.ToTensor(),  # convert a numpy.ndarray (H x W x C) in the range [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
    transforms.Resize((150,150)), # resize the image to size (150,150) ~ (h,w) 
    transforms.RandomHorizontalFlip(),  # flip the image horizontally with probability 0.5
    transforms.Normalize([0.5,0.5,0.5],  # [0.0,1.0] to [-1.0,-1.0], formula (input-mean)/std 
                         [0.5,0.5,0.5])
])

In [6]:
# Dataloader (reading data and feeding data to the model for training in batches)
# Batches are required to avoid memory overload

train_path = '/content/drive/MyDrive/Colab Notebooks/1_Intel Image Classification/scene_detection/seg_train/seg_train'
test_path = '/content/drive/MyDrive/Colab Notebooks/1_Intel Image Classification/scene_detection/seg_test/seg_test'

train_loader = DataLoader(
    torchvision.datasets.ImageFolder(train_path,transform=transformer),
    batch_size=256, shuffle=True
)
test_loader = DataLoader(
    torchvision.datasets.ImageFolder(test_path,transform=transformer),
    batch_size=256, shuffle=True
)

In [7]:
# Categories (output classes)
root = pathlib.Path(train_path)
classes = sorted([j.name for j in root.iterdir()])  # iterate over the files in this directory and create a classes list 
print(classes)

['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']


In [8]:
# CNN class
class ConvNet(nn.Module):
  def __init__(self,num_classes=6):
    super(ConvNet,self).__init__()    # initialize the parent class to use its properties

    # Output size afetr convolution filter
    # ((w-f+2P)/s) + 1

    # Input shape = (256,3,150,150)

    self.conv1 = nn.Conv2d(in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1)
    # shape = (256,12,150,150)
    self.bn1 = nn.BatchNorm2d(num_features=12)
    # shape = (256,12,150,150)
    self.relu1 = nn.ReLU()
    # shape = (256,12,150,150)
    self.pool = nn.MaxPool2d(kernel_size=2)
    # shape = (256,12,75,75)

    self.conv2 = nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
    # shape = (256,20,75,75)
    self.relu2 = nn.ReLU()
    # shape = (256,20,75,75)

    self.conv3 = nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
    # shape = (256,32,75,75)
    self.bn3 = nn.BatchNorm2d(num_features=32)
    # shape = (256,32,75,75)
    self.relu3 = nn.ReLU()
    # shape = (256,32,75,75)

    self.fc = nn.Linear(in_features=32*75*75,out_features=num_classes)

  # Feed forward function
  def forward(self,input):
    output = self.conv1(input)
    output = self.bn1(output)
    output = self.relu1(output)

    output = self.pool(output)

    output = self.conv2(output)
    output = self.relu2(output)

    output = self.conv3(output)
    output = self.bn3(output)
    output = self.relu3(output)

    # Above output will be in matrix form with shape (256,32,75,75)
    output = output.view(-1,32*75*75)

    output = self.fc(output)

    return output

In [9]:
model = ConvNet(num_classes=len(classes)).to(device)  # creating an instance of out CNN
print(model)

ConvNet(
  (conv1): Conv2d(3, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(12, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU()
  (conv3): Conv2d(20, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): ReLU()
  (fc): Linear(in_features=180000, out_features=6, bias=True)
)


In [10]:
# Optimizer and loss function
optimizer = Adam(model.parameters(),lr=0.001,weight_decay=0.0001)
loss_function = nn.CrossEntropyLoss()

In [11]:
num_epochs = 10

In [12]:
# calculating the size of the training and testing images
train_count = len(glob.glob(train_path+'/**/*.jpg'))
test_count = len(glob.glob(test_path+'/**/*.jpg'))
print(train_count,test_count)

14034 3000


In [14]:
#Model training and saving best model

best_accuracy=0.0
for epoch in range(num_epochs):
    
  #Evaluation and training on training dataset
  model.train()   # set model in training mode
  train_accuracy=0.0
  train_loss=0.0
  
  for i, (images,labels) in enumerate(train_loader):
    if torch.cuda.is_available():
        images=Variable(images.cuda())
        labels=Variable(labels.cuda())
        
    optimizer.zero_grad()
    
    outputs=model(images)
    loss=loss_function(outputs,labels)
    loss.backward()
    optimizer.step()
    
    train_loss+= loss.cpu().data*images.size(0)
    _,prediction=torch.max(outputs.data,1)
    
    train_accuracy+=int(torch.sum(prediction==labels.data))
    
  train_accuracy=train_accuracy/train_count
  train_loss=train_loss/train_count
  
  # Evaluation on testing dataset
  model.eval()    # set model in testing mode
  test_accuracy=0.0
  for i, (images,labels) in enumerate(test_loader):
    if torch.cuda.is_available():
        images=Variable(images.cuda())
        labels=Variable(labels.cuda())
        
    outputs=model(images)
    _,prediction=torch.max(outputs.data,1)
    test_accuracy+=int(torch.sum(prediction==labels.data))
  
  test_accuracy=test_accuracy/test_count
  
  print('Epoch: '+str(epoch)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy)+' Test Accuracy: '+str(test_accuracy))
  
  #Save the best model
  if test_accuracy>best_accuracy:
    torch.save(model.state_dict(),'best_checkpoint.model')
    best_accuracy=test_accuracy

Epoch: 0 Train Loss: tensor(9.7150) Train Accuracy: 0.5360552942853071 Test Accuracy: 0.5776666666666667
Epoch: 1 Train Loss: tensor(1.3424) Train Accuracy: 0.7092774690038478 Test Accuracy: 0.593
Epoch: 2 Train Loss: tensor(0.9612) Train Accuracy: 0.7731224169873165 Test Accuracy: 0.7196666666666667
Epoch: 3 Train Loss: tensor(0.6583) Train Accuracy: 0.8339033775117571 Test Accuracy: 0.7143333333333334
Epoch: 4 Train Loss: tensor(0.6189) Train Accuracy: 0.8452330055579308 Test Accuracy: 0.6423333333333333
Epoch: 5 Train Loss: tensor(0.4705) Train Accuracy: 0.8746615362690608 Test Accuracy: 0.6716666666666666
Epoch: 6 Train Loss: tensor(0.3632) Train Accuracy: 0.904232578024797 Test Accuracy: 0.7193333333333334
Epoch: 7 Train Loss: tensor(0.2728) Train Accuracy: 0.9249679350149637 Test Accuracy: 0.719
Epoch: 8 Train Loss: tensor(0.1739) Train Accuracy: 0.9494085791648853 Test Accuracy: 0.7403333333333333
Epoch: 9 Train Loss: tensor(0.1191) Train Accuracy: 0.9665811600399031 Test Accura