In [1]:
import torch 
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import math

import torch.optim as optim
import torchvision.transforms as transforms
import numpy as np

import matplotlib.pyplot as plt 
torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)

<torch.autograd.grad_mode.set_grad_enabled at 0x28495d8c7c0>

In [3]:
import tarfile

tar= tarfile.open('D:/Dev Projects/AI_Projects/ocr_recognition_model/EnglishFnt.tgz')
tar.extractall('./EnglishFnt')
tar.close()

In [2]:
#Applying Transforms

dataset= torchvision.datasets.ImageFolder(
    root= './EnglishFnt/English/Fnt',
    transform= transforms.Compose(
        [
        transforms.Resize((48,48)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))])
)

In [3]:
#create fxn to split dataset

def split_data(dts, batch_size, test_split=0.3):
    shuffle_dataset= True
    random_seed= 42
    dataset_size=len(dts)
    
    indices= list(range(dataset_size))
    split=int(np.floor(test_split*dataset_size))
    
    if shuffle_dataset:
        np.random.seed(random_seed)
        np.random.shuffle(indices)
        
    train_indices, test_indices=indices[split:], indices[:split]
    
    test_size=len(test_indices)
    indices= list(range(test_size))
    split=int(np.floor(0.5*test_size))
    
    if shuffle_dataset:
        np.random.seed(random_seed)
        np.random.shuffle(indices)
    val_indices, test_indices= indices[split:], indices[:split]
    
    
    

    #data samplers and loaders
    train_sampler=torch.utils.data.SubsetRandomSampler(train_indices)
    test_sampler= torch.utils.data.SubsetRandomSampler(test_indices)
    val_sampler= torch.utils.data.SubsetRandomSampler(val_indices)
    
    train_loader=torch.utils.data.DataLoader(dts,batch_size, sampler=train_sampler)
    
    val_loader=torch.utils.data.DataLoader(dts, batch_size, sampler=val_sampler)
    
    test_loader= torch.utils.data.DataLoader(dts,batch_size, sampler=test_sampler)
    
    return train_loader, test_loader, val_loader
    
    
    
    

In [4]:
batch_size=36
train_loader,test_loader,val_loader= split_data(dataset, batch_size,test_split=0.3)

In [5]:
#Definint the neural network

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1=nn.Conv2d(3,16,3)
        self.conv2=nn.Conv2d(16,32,3)
        self.conv3=nn.Conv2d(32,64,3)
        self.fc1=nn.Linear(64*9*9,62)
        
        self.max_pool=nn.MaxPool2d(2,2, ceil_mode=True)
        self.dropout= nn.Dropout(0.2)
        
        self.conv_bn1=nn.BatchNorm2d(48,3)
        self.conv_bn2= nn.BatchNorm2d(16)
        self.conv_bn3= nn.BatchNorm2d(32)
        self.conv_bn4= nn.BatchNorm2d(64)
    
    def forward(self, x):
        x=F.relu(self.conv1(x))
        x=self.max_pool(x)
        x=self.conv_bn2(x)
        
        x=F.relu(self.conv2(x))
        
        x=self.max_pool(x)
        x=self.conv_bn3(x)
        
        
        x=F.relu(self.conv3(x))
        x=self.conv_bn4(x)
        
        x=x.view(-1,64*9*9)
        
        x=self.dropout(x)
        x=self.fc1(x)
        return x
    
        

In [6]:
#One Hot encoding

def one_hot_encode(lables, pred_size):
    encoded=torch.zeros(len(lables), pred_size)
    y=0
    for x in lables:
        encoded[y][x]=1
        y+=1
    return encoded

In [7]:
#Defining the loss and optimiser

class LossFxn(torch.autograd.Function):
    @staticmethod
    def forward(ctx, pred, lables):
        y=one_hot_encode(lables, len(pred[0]))
        y=y.cpu()
        
        ctx.save_for_backward(y, pred)
        loss=-y*torch.log(pred)
        
        loss=loss.sum()/len(lables)
        
        return loss
    
    @staticmethod
    
    def backward(ctx,grad_output):
        y, pred=ctx.saved_tensors
        grad_input=(-y/pred)-y
        grad_input= grad_input/len(pred)
        
        return grad_input, grad_output

In [8]:
class loss_cell(torch.nn.Module):
    def __init__(self):
        super(loss_cell, self).__init__()
        
    def forward(self, pred, lables):
        y=one_hot_encode(lables, len(pred[0]))
        y=y.cpu()
        
        loss=-y*torch.log(pred)
        loss=loss.sum()/len(lables)
        
        return loss
        

In [9]:
neural_net=CNN()

use_cuda=True

if use_cuda and torch.cuda.is_available():
    neural_net.cuda()
    optimiser= optim.SGD(neural_net.parameters(), lr=0.001, momentum=0.9)
    
    epoch=0
    max_epoch=3
    end=False
    myloss=loss_cell()
    
    while epoch< max_epoch and not end:
        epoch+=1
        total_loss=0
        total_correct=0
        total_val=0
        total_train=0
        
        for dataset in (train_loader):
            images, lables=dataset
            if use_cuda and torch.cuda.is_available():
                images=images.cuda()
                lables=lables.cuda()
            pred=neural_net(images)
            pred=F.softmax(pred)
            loss=myloss(pred, lables)
            
            total_loss+=loss.item()
            total_train+=len(pred)
            
            optimiser.zero_grad()
            loss.backward()
            optimiser.step()
            
            total_correct +=pred.argmax(dim=1).eq(lables).sum()
            train_acc= (total_correct*1.0)/total_train
            
            print("Epoch: ", epoch, "Training accu:", train_acc, "Train Loss:", total_loss*1.0/len(train_loader))
            
            if total_correct*1.0/total_train>=0.98:
                end=True
            total_loss=0
            val_total_correct=0
            
            for batch in (val_loader):
                images, lables=batch
                
                if use_cuda  and torch.cuda.is_available():
                    images=images.cuda()
                    lables=lables.cuda()
                pred=neural_net(images)
                
                loss=F.cross_entropy(pred, lables)
                total_loss+=loss.item()
                total_val+=len(pred)
                
                val_total_correct+=pred.argmax(dim=1).eq(lables).sum()
                val_acc= (val_total_correct*1.0)/total_val
                
                print("Epoch: ", epoch,"Val Acc: ", val_acc,"Val Loss:", total_loss*1.0/len(val_loader))
                
            torch.cuda.empty_cache()
else:
    neural_net.cpu()
    optimiser= optim.SGD(neural_net.parameters(), lr=0.001, momentum=0.9)
    
    epoch=0
    max_epoch=10
    end=False
    myloss=loss_cell()
    
    while epoch< max_epoch and not end:
        epoch+=1
        total_loss=0
        total_correct=0
        total_val=0
        total_train=0
        
        for dataset in (train_loader):
            images, lables=dataset
         
            images=images.cpu()
            lables=lables.cpu()
            pred=neural_net(images)
            pred=F.softmax(pred)
            loss=myloss(pred, lables)
            
            total_loss+=loss.item()
            total_train+=len(pred)
            
            optimiser.zero_grad()
            loss.backward()
            optimiser.step()
            
            total_correct +=pred.argmax(dim=1).eq(lables).sum()
            
            train_acc= (total_correct*1.0)/total_train
            
            print("Epoch: ", epoch, "Training accu:", train_acc, "Train Loss:", total_loss*1.0/len(train_loader))
            
            if total_correct*1.0/total_train>=0.98:
                end=True
            total_loss=0
            val_total_correct=0
            
            for batch in (val_loader):
                images, lables=batch
               
                images=images.cpu()
                lables=lables.cpu()
                pred=neural_net(images)
                
                loss=F.cross_entropy(pred, lables)
                total_loss+=loss.item()
                total_val+=len(pred)
                
                val_total_correct+=pred.argmax(dim=1).eq(lables).sum()
                val_acc= (val_total_correct*1.0)/total_val
                
                print("Epoch: ", epoch,"Val Acc: ", val_acc,"Val Loss:", total_loss*1.0/len(val_loader))
                
        
    
            

  pred=F.softmax(pred)


Epoch:  1 Training accu: tensor(0.) Train Loss: 0.003452882572096221
Epoch:  1 Val Acc:  tensor(0.) Val Loss: 0.0165542794724381
Epoch:  1 Val Acc:  tensor(0.0139) Val Loss: 0.03333368772789553
Epoch:  1 Val Acc:  tensor(0.0093) Val Loss: 0.04961327998810395
Epoch:  1 Val Acc:  tensor(0.0139) Val Loss: 0.06586002487646762
Epoch:  1 Val Acc:  tensor(0.0111) Val Loss: 0.0821609007541671
Epoch:  1 Val Acc:  tensor(0.0139) Val Loss: 0.09756580867694811
Epoch:  1 Val Acc:  tensor(0.0159) Val Loss: 0.11359157852346906
Epoch:  1 Val Acc:  tensor(0.0174) Val Loss: 0.12939241721149633
Epoch:  1 Val Acc:  tensor(0.0185) Val Loss: 0.1458010329039831
Epoch:  1 Val Acc:  tensor(0.0194) Val Loss: 0.16197615793902612
Epoch:  1 Val Acc:  tensor(0.0227) Val Loss: 0.17826242954558746
Epoch:  1 Val Acc:  tensor(0.0255) Val Loss: 0.1944825531411987
Epoch:  1 Val Acc:  tensor(0.0256) Val Loss: 0.21078388953843497
Epoch:  1 Val Acc:  tensor(0.0238) Val Loss: 0.22700063538641985
Epoch:  1 Val Acc:  tensor(0.

KeyboardInterrupt: 

In [None]:
test_total_correct=0
total_test=0

x=0

for batch  in (test_loader):
    images, lables=batch
    
    if use_cuda and torch.cuda.is_available():
        images=images.cuda()
        lables=lables.cuda()
    else:
        images=images.cpu()
        lables=lables.cpu()
    pred= neural_net(images)
    total_test+=len(pred)
    
    x+=1
    
    test_total_correct+=pred.argmax(dim=1).eq(lables).sum()
    
    print("Test Acc:", test_total_correct*1.0/total_test)
    
  

Test Acc: tensor(0.0278)
Test Acc: tensor(0.0139)
Test Acc: tensor(0.0278)
Test Acc: tensor(0.0347)
Test Acc: tensor(0.0333)
Test Acc: tensor(0.0324)
Test Acc: tensor(0.0278)
Test Acc: tensor(0.0278)
Test Acc: tensor(0.0278)
Test Acc: tensor(0.0250)
Test Acc: tensor(0.0253)
Test Acc: tensor(0.0255)
Test Acc: tensor(0.0235)
Test Acc: tensor(0.0218)
Test Acc: tensor(0.0222)
Test Acc: tensor(0.0208)
Test Acc: tensor(0.0229)
Test Acc: tensor(0.0247)
Test Acc: tensor(0.0234)
Test Acc: tensor(0.0236)
Test Acc: tensor(0.0238)
Test Acc: tensor(0.0253)
Test Acc: tensor(0.0266)
Test Acc: tensor(0.0266)
Test Acc: tensor(0.0256)
Test Acc: tensor(0.0246)
Test Acc: tensor(0.0257)
Test Acc: tensor(0.0248)
Test Acc: tensor(0.0249)
Test Acc: tensor(0.0259)
Test Acc: tensor(0.0260)
Test Acc: tensor(0.0252)
Test Acc: tensor(0.0253)
Test Acc: tensor(0.0245)
Test Acc: tensor(0.0246)
Test Acc: tensor(0.0247)
Test Acc: tensor(0.0248)
Test Acc: tensor(0.0256)
Test Acc: tensor(0.0278)
Test Acc: tensor(0.0278)


KeyboardInterrupt: 

In [10]:


path="model.pth"

torch.save(neural_net.state_dict(), path)

In [11]:
local_machine=True

def load_model(path):
    if local_machine:
        checkpoint= torch.load(path, map_location='cpu')
    else:
        checkpoint= torch.load(path)
    model=neural_net
    
    for params in model.parameters():
        params.requires_grad= False
    
    return model

In [12]:
model= load_model("model.pth")

In [13]:
from PIL import Image

def process_image(image):
    img_transforms=transforms.Compose(
        [
        transforms.Resize((48,48)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))])
    image=img_transforms(Image.open(image))
    
    return image


In [14]:
def display_img(image, ax=None, title=None):
    if ax is None:
        fig,ax=plt.subplots()
        
    image=image.numpy().transpose((1,2,0))
        
    mean= np.array([0.5])
    std=np.array([0.5])
        
    image= std*image+mean
        
    ax.display_img(image)
    
    return ax

In [15]:
img= process_image('./EnglishFnt/English/Fnt/Sample001/img001-00007.png')
display_img(img)

In [None]:
def transcribe(image_path, neural_net):
    image_data= process_image(image_path)
    model=load_model(path)
    model_p= model.eval()
    inputs= Variable(image_data.unsqueeze(0))
    output=model_p(inputs)
    
    return output
     
    