In [7]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np 
import pandas as pd 
import os
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset,DataLoader
from sklearn import preprocessing 
from torchvision import transforms
import imageio as iio
from PIL import Image


In [8]:
names = ['Amin','Farzad','Maziar','Mehrdad','Sina','Soheil','Vahid']
name_file = {name:[] for name in names}

for dirname, _, filenames in os.walk('/kaggle/input/arabicpersian-handwritten-cities-for-postal-apps/scan splited/scan splited'):
    filenames = sorted(filenames) # i want it to be on the same order
    for filename in filenames:
        name = dirname[89:]
        name_file[name].append(os.path.join(dirname, filename))

# I just want to take subset of the dataset

In [9]:
alphabit = "ابتثجحخدذرزسشصضطظعغفقكلمنهويئ"
num_output = len(alphabit)
alphabit = [a for a in alphabit]


In [10]:
def pad_words(targets):
    """
    Padding words to make all of them on the same size
    """
    targets_new = []
    for target in targets:
        pad = 6-len(target)
        targets_new.append(np.concatenate((np.zeros(pad),target),axis=0).astype(np.longlong))
        
        
    return targets_new

In [11]:
words = (" خيابان ميدان نمين ديلم مهران كاشان برحوار نائين البرز يزد تبريز سيريك") # only these words will be considerd
words = words*4 # four persons wrote these words
words = words.split(' ')
words.__delitem__(0) # first element is just empty


imgs = []
for name,files in name_file.items():
    if  name.__eq__('Maziar') or name.__eq__('Mehrdad') or name.__eq__('Sina'):
        continue
    for i,file in enumerate(files):
        if i == 12:
            break
        imgs.append(file)
    

targets = [[w for w in word] for word in words]
lbl_enc = preprocessing.LabelEncoder()
lbl_enc.fit(alphabit)
targets_enc = [lbl_enc.transform(word)+1 for word in targets]
targets_enc = pad_words(targets_enc)
dataset = {'imgs':imgs, "labels":targets_enc}
df = pd.DataFrame(dataset)


# Dataset and DataLoader

In [12]:
class Mydataset(Dataset):
    
    def __init__(self,df,trans=None):
        self.df = df
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self,idx):
        x = Image.open(self.df['imgs'][idx]).convert('RGB')
        if trans:
            x = trans(x)
        y = self.df['labels'][idx] +1 
        return (x,y)
    
    

In [13]:
# std = torch.tensor([0.0603, 0.0606, 0.0347])
# mean = torch.tensor([[0.8753, 0.8924, 0.9244]])


In [14]:
trans = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(size=(20,80)),
    transforms.Normalize(mean=(0.8753,0.8924,0.9244),std=(0.0603,0.0606,0.0347))
                           ])
all_imgs = []
dataset = Mydataset(df,trans)

    

dataloader = DataLoader(dataset,batch_size=48)
for batch in dataloader:
    print(batch[1].shape)
    break

    



torch.Size([48, 6])


# Model

In [15]:
class MyCNN(nn.Module):
    
    def __init__(self):
        super(MyCNN,self).__init__()
        self.conv1 = nn.Conv2d(3,12,3,2)
        self.conv2 = nn.Conv2d(12,36,3,2)
        self.conv3 = nn.Conv2d(36,84,3,2)
        
    def forward(self,x):
        x = self.conv1(F.relu(x))
        x = self.conv2(F.relu(x))
        x = self.conv3(F.relu(x))
        return x
    
class MyRNN(nn.Module):
    
    def __init__(self,input_size,hidden_size,num_layers,bi):
        super(MyRNN,self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers        
        self.gru = nn.GRU(input_size,hidden_size,num_layers,bidirectional=bi,batch_first=True)
        self.linear1 = nn.Linear(hidden_size,hidden_size)
        self.linear2 = nn.Linear(hidden_size,num_output+1) # +1 for the blank ctc
        
        
    def forward(self,x):
        h0 = torch.zeros(2*self.num_layers,x.size(0),self.hidden_size)
        out,hn = self.gru(x,h0)
        print(out.shape)
        out = self.linear1(F.relu(out))
        print(out.shape)
        out = self.linear2(F.log_softmax(out,dim=1))
        return out
        
        
# class denseLayer(nn.Module):
#     def __init__
    
    
class MyModel(nn.Module):
    
    def __init__(self,cnn,rnn:
        self.cnn = cnn
        self.rnn = rnn
        
    def forward(self,x):
        x = self.cnn(x)
        print(x.shape) # --> (bs,c,w,h)
        # for rnn , we need (time_stamps,bs,featuers)
        x = x.view(-1,x.shape(1),x.shape(2)*x.shape(3)) # --> (bs,c,w*h)
        # i will assume that num of channels is the time_stamps
        x = x.permute(1,0,2)
        x = self.rnn(x)
        return x


torch.Size([5, 3, 2])

In [17]:
# !git remote add origin https://github.com/Mohamad-Atif1/CTC_loss_PyTorch.git


fatal: not a git repository (or any parent up to mount point /kaggle)
Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).
