<a href="https://colab.research.google.com/github/MisterEkole/ocr_recognition-model/blob/main/ocr_recognition_gpu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch 
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import math

import torch.optim as optim
import torchvision.transforms as transforms
import numpy as np

import matplotlib.pyplot as plt 
torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)

<torch.autograd.grad_mode.set_grad_enabled at 0x7fb3891cdf50>

In [None]:
import tarfile

tar= tarfile.open('/content/drive/MyDrive/Colab Notebooks/ocr_recognition_model/EnglishFnt.tgz')
tar.extractall('./EnglishFnt')
tar.close()

In [None]:
#Applying Transforms

dataset= torchvision.datasets.ImageFolder(
    root= './EnglishFnt/English/Fnt',
    transform= transforms.Compose(
        [
        transforms.Resize((48,48)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))])
)

In [None]:
#create fxn to split dataset

def split_data(dts, batch_size, test_split=0.3):
    shuffle_dataset= True
    random_seed= 42
    dataset_size=len(dts)
    
    indices= list(range(dataset_size))
    split=int(np.floor(test_split*dataset_size))
    
    if shuffle_dataset:
        np.random.seed(random_seed)
        np.random.shuffle(indices)
        
    train_indices, test_indices=indices[split:], indices[:split]
    
    test_size=len(test_indices)
    indices= list(range(test_size))
    split=int(np.floor(0.5*test_size))
    
    if shuffle_dataset:
        np.random.seed(random_seed)
        np.random.shuffle(indices)
    val_indices, test_indices= indices[split:], indices[:split]
    
    
    

    #data samplers and loaders
    train_sampler=torch.utils.data.SubsetRandomSampler(train_indices)
    test_sampler= torch.utils.data.SubsetRandomSampler(test_indices)
    val_sampler= torch.utils.data.SubsetRandomSampler(val_indices)
    
    train_loader=torch.utils.data.DataLoader(dts,batch_size, sampler=train_sampler)
    
    val_loader=torch.utils.data.DataLoader(dts, batch_size, sampler=val_sampler)
    
    test_loader= torch.utils.data.DataLoader(dts,batch_size, sampler=test_sampler)
    
    return train_loader, test_loader, val_loader
    
    
    
    

In [None]:
batch_size=36
train_loader,test_loader,val_loader= split_data(dataset, batch_size,test_split=0.3)

In [None]:
#Definint the neural network

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1=nn.Conv2d(3,16,3)
        self.conv2=nn.Conv2d(16,32,3)
        self.conv3=nn.Conv2d(32,64,3)
        self.fc1=nn.Linear(64*9*9,62)
        
        self.max_pool=nn.MaxPool2d(2,2, ceil_mode=True)
        self.dropout= nn.Dropout(0.2)
        
        self.conv_bn1=nn.BatchNorm2d(48,3)
        self.conv_bn2= nn.BatchNorm2d(16)
        self.conv_bn3= nn.BatchNorm2d(32)
        self.conv_bn4= nn.BatchNorm2d(64)
    
    def forward(self, x):
        x=F.relu(self.conv1(x))
        x=self.max_pool(x)
        x=self.conv_bn2(x)
        
        x=F.relu(self.conv2(x))
        
        x=self.max_pool(x)
        x=self.conv_bn3(x)
        
        
        x=F.relu(self.conv3(x))
        x=self.conv_bn4(x)
        
        x=x.view(-1,64*9*9)
        
        x=self.dropout(x)
        x=self.fc1(x)
        return x
    
        

In [None]:
#One Hot encoding

def one_hot_encode(lables, pred_size):
    encoded=torch.zeros(len(lables), pred_size)
    y=0
    for x in lables:
        encoded[y][x]=1
        y+=1
    return encoded

In [None]:
#Defining the loss and optimiser

class LossFxn(torch.autograd.Function):
    @staticmethod
    def forward(ctx, pred, lables):
        y=one_hot_encode(lables, len(pred[0]))
        y=y.cuda()
        
        ctx.save_for_backward(y, pred)
        loss=-y*torch.log(pred)
        
        loss=loss.sum()/len(lables)
        
        return loss
    
    @staticmethod
    
    def backward(ctx,grad_output):
        y, pred=ctx.saved_tensors
        grad_input=(-y/pred)-y
        grad_input= grad_input/len(pred)
        
        return grad_input, grad_output

In [None]:
class loss_cell(torch.nn.Module):
    def __init__(self):
        super(loss_cell, self).__init__()
        
    def forward(self, pred, lables):
        y=one_hot_encode(lables, len(pred[0]))
        y=y.cuda()
        
        loss=-y*torch.log(pred)
        loss=loss.sum()/len(lables)
        
        return loss
        

In [None]:
neural_net=CNN()

use_cuda=True

if use_cuda and torch.cuda.is_available():
    neural_net.cuda()
    optimiser= optim.SGD(neural_net.parameters(), lr=0.001, momentum=0.9)
    
    epoch=0
    max_epoch=10
    end=False
    myloss=loss_cell()
    
    while epoch< max_epoch and not end:
        epoch+=1
        total_loss=0
        total_correct=0
        total_val=0
        total_train=0
        
        for dataset in (train_loader):
            images, lables=dataset
            if use_cuda and torch.cuda.is_available():
                images=images.cuda()
                lables=lables.cuda()
            pred=neural_net(images)
            pred=F.softmax(pred)
            loss=myloss(pred, lables)
            
            total_loss+=loss.item()
            total_train+=len(pred)
            
            optimiser.zero_grad()
            loss.backward()
            optimiser.step()
            
            total_correct +=pred.argmax(dim=1).eq(lables).sum()
            train_acc= (total_correct*1.0)/total_train
            
            print("Epoch: ", epoch, "Training accu:", train_acc, "Train Loss:", total_loss*1.0/len(train_loader))
            
            if total_correct*1.0/total_train>=0.98:
                end=True
            total_loss=0
            val_total_correct=0
            
            for batch in (val_loader):
                images, lables=batch
                
                if use_cuda  and torch.cuda.is_available():
                    images=images.cuda()
                    lables=lables.cuda()
                pred=neural_net(images)
                
                loss=F.cross_entropy(pred, lables)
                total_loss+=loss.item()
                total_val+=len(pred)
                
                val_total_correct+=pred.argmax(dim=1).eq(lables).sum()
                val_acc= (val_total_correct*1.0)/total_val
                
                print("Epoch: ", epoch,"Val Acc: ", val_acc,"Val Loss:", total_loss*1.0/len(val_loader))
                
            torch.cuda.empty_cache()




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch:  1 Val Acc:  tensor(0.0064, device='cuda:0') Val Loss: 1.4337273314878514
Epoch:  1 Val Acc:  tensor(0.0064, device='cuda:0') Val Loss: 1.4418248069603634
Epoch:  1 Val Acc:  tensor(0.0064, device='cuda:0') Val Loss: 1.4484431059188263
Epoch:  1 Val Acc:  tensor(0.0065, device='cuda:0') Val Loss: 1.4566385369790371
Epoch:  1 Val Acc:  tensor(0.0065, device='cuda:0') Val Loss: 1.4625175058161803
Epoch:  1 Val Acc:  tensor(0.0066, device='cuda:0') Val Loss: 1.46887199914954
Epoch:  1 Val Acc:  tensor(0.0066, device='cuda:0') Val Loss: 1.4773178757823466
Epoch:  1 Val Acc:  tensor(0.0066, device='cuda:0') Val Loss: 1.485204306392162
Epoch:  1 Val Acc:  tensor(0.0067, device='cuda:0') Val Loss: 1.4935375261669377
Epoch:  1 Val Acc:  tensor(0.0067, device='cuda:0') Val Loss: 1.5017099321568421
Epoch:  1 Val Acc:  tensor(0.0067, device='cuda:0') Val Loss: 1.5095223197465615
Epoch:  1 Val Acc:  tensor(0.0068, device='cuda

KeyboardInterrupt: ignored

In [None]:
test_total_correct=0
total_test=0

x=0

for batch  in (test_loader):
    images, lables=batch
    
    if use_cuda and torch.cuda.is_available():
        images=images.cuda()
        lables=lables.cuda()
    else:
        images=images.cpu()
        lables=lables.cpu()
    pred= neural_net(images)
    total_test+=len(pred)
    
    x+=1
    
    test_total_correct+=pred.argmax(dim=1).eq(lables).sum()
    
    print("Test Acc:", test_total_correct*1.0/total_test)
    
  

Test Acc: tensor(0.6111, device='cuda:0')
Test Acc: tensor(0.5417, device='cuda:0')
Test Acc: tensor(0.5833, device='cuda:0')
Test Acc: tensor(0.6042, device='cuda:0')
Test Acc: tensor(0.6056, device='cuda:0')
Test Acc: tensor(0.5880, device='cuda:0')
Test Acc: tensor(0.5833, device='cuda:0')
Test Acc: tensor(0.5903, device='cuda:0')
Test Acc: tensor(0.6019, device='cuda:0')
Test Acc: tensor(0.6000, device='cuda:0')
Test Acc: tensor(0.5833, device='cuda:0')
Test Acc: tensor(0.5856, device='cuda:0')
Test Acc: tensor(0.5940, device='cuda:0')
Test Acc: tensor(0.5933, device='cuda:0')
Test Acc: tensor(0.5889, device='cuda:0')
Test Acc: tensor(0.5938, device='cuda:0')
Test Acc: tensor(0.5931, device='cuda:0')
Test Acc: tensor(0.5941, device='cuda:0')
Test Acc: tensor(0.5965, device='cuda:0')
Test Acc: tensor(0.5958, device='cuda:0')
Test Acc: tensor(0.5966, device='cuda:0')
Test Acc: tensor(0.5934, device='cuda:0')
Test Acc: tensor(0.5918, device='cuda:0')
Test Acc: tensor(0.5995, device='c

In [None]:
path="model.pth"

torch.save(neural_net, path)