## CNN

#### Rule
Output image size is ..*.. with num_channels = num of filters

``
(W-F+2P)/S + 1
``

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

In [14]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper_parameters
num_epochs = 4
batch_size = 4
learning_rate = 0.001

# Dataset has PILImage images of range [0,1]
# We transform them to Tensors of normalized range [-1,1]
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])

train_dataset=torchvision.datasets.CIFAR10(root='./data',train=True,transform=transform,download=True)
test_dataset=torchvision.datasets.CIFAR10(root='./data',train=False,transform=transform)


train_loader = torch.utils.data.DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=False)

classes= ('plane','car','bird','cat','deer','dog','frog','horse','ship','truck')


Files already downloaded and verified


In [15]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet,self).__init__()
        # Conv Layers for features extractions
        # in_channel has 3 channels
        self.conv1=nn.Conv2d(in_channels=3,out_channels=6,kernel_size=5)
        self.pool=nn.MaxPool2d(kernel_size=2,stride=2)
        self.conv2=nn.Conv2d(in_channels=6,out_channels=16,kernel_size=5)

        # FC layers for Classification
        # Flatten the image to be the input for the fc layer
        self.fc1=nn.Linear(in_features=16*5*5,out_features=120) 
        self.fc2=nn.Linear(in_features=120,out_features=84) 
        self.fc3=nn.Linear(in_features=84,out_features=10) #out_features are 10 as we have 10 classes 

    def forward(self,x):
        # Note: x = F.relu(x) # Activation Function (It doesn't change the Size)
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))

        # Flatten
        x = x.view(-1,16*5*5)

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x

In [16]:

model= ConvNet().to(device)

criterion=nn.CrossEntropyLoss()
optimizer=torch.optim.SGD(model.parameters(),lr=learning_rate)

n_total_steps=len(train_loader)
for epoch in range(num_epochs):
    for i,(images,labels) in enumerate(train_loader):
        # origin shape: [4,3,32,32] = 4,3,1024
        images=images.to(device)
        labels=labels.to(device)

        # Forward Pass
        outputs = model(images)
        loss=criterion(outputs,labels)

        # BackWard and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1)%100==0:
            print(f'epoch {epoch+1}/{num_epochs}, step {i+1}/{n_total_steps} , loss = {loss.item()} ')
print('Finished Training')


epoch 1/4, step 100/12500 , loss = 2.3117666244506836 
epoch 1/4, step 200/12500 , loss = 2.298588275909424 
epoch 1/4, step 300/12500 , loss = 2.279989004135132 
epoch 1/4, step 400/12500 , loss = 2.3123879432678223 
epoch 1/4, step 500/12500 , loss = 2.3288931846618652 
epoch 1/4, step 600/12500 , loss = 2.2747626304626465 
epoch 1/4, step 700/12500 , loss = 2.3366780281066895 
epoch 1/4, step 800/12500 , loss = 2.299072027206421 
epoch 1/4, step 900/12500 , loss = 2.239468574523926 
epoch 1/4, step 1000/12500 , loss = 2.2557449340820312 
epoch 1/4, step 1100/12500 , loss = 2.3008711338043213 
epoch 1/4, step 1200/12500 , loss = 2.3287734985351562 
epoch 1/4, step 1300/12500 , loss = 2.2942214012145996 
epoch 1/4, step 1400/12500 , loss = 2.355987071990967 
epoch 1/4, step 1500/12500 , loss = 2.2758355140686035 
epoch 1/4, step 1600/12500 , loss = 2.3334975242614746 
epoch 1/4, step 1700/12500 , loss = 2.3390257358551025 
epoch 1/4, step 1800/12500 , loss = 2.3376643657684326 
epoch 

In [17]:
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    n_class_correct= [0 for i in range(10)]
    n_class_samples= [0 for i in range(10)]

    for images,labels in test_loader:
        images=images.to(device)
        labels=labels.to(device)

        # Forward Pass
        outputs = model(images)

        # Max returns val,index
        _,prediction=torch.max(outputs,dim=1)
        n_samples+=labels.size(0)
        n_correct+=(prediction==labels).sum().item()

        for i in range(batch_size):
            label=labels[i]
            pred=prediction[i]
            if(label==pred):
                n_class_correct[label]+=1
            n_class_samples[label]+=1

    acc = 100.0*n_correct/n_samples
    print(f"Accuracy of the network: {acc} %")

    for i in range(10):
        acc = 100.0*n_class_correct[i]/n_class_samples[i]
        print(f'Accuracy of {classes[i]}: {acc} %')

Accuracy of the network: 44.37 %
Accuracy of plane: 50.1 %
Accuracy of car: 74.1 %
Accuracy of bird: 11.4 %
Accuracy of cat: 36.3 %
Accuracy of deer: 20.7 %
Accuracy of dog: 45.9 %
Accuracy of frog: 52.2 %
Accuracy of horse: 50.6 %
Accuracy of ship: 58.4 %
Accuracy of truck: 44.0 %


In [18]:
# Save on GPU
device=torch.device("cuda")
torch.save(model.state_dict(),'./Saved_models/OCR.pth')
print("Saved OCR Model :D")

Saved OCR Model :D
