In [15]:
import os
import glob 
import pandas as pd
import string
import collections
from tqdm import tqdm
from PIL import Image
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
from torch.utils.data import DataLoader
import torch.optim as optim


# Path of dataset
path = './data/'
# The function for creating the dataframe from dataset
def Load_dataset():
    # Open the dataset
    data = glob.glob(os.path.join('./data/', '*.png'))
    # try to encoding labels with ascii letters and save them in a Dataframe
    all_letters = string.ascii_uppercase + string.digits+string.ascii_lowercase
    mapping={}
    mapping_inv = {}
    i = 1
    for x in all_letters:
        mapping[x] = i
        mapping_inv[i] = x
        i += 1
    # The number of class
    num_class = len(mapping)
    print(num_class)
    # make a dataset
    images = [] # list for saving images
    labels = [] # list for saving labels
    # create a dictionary
    datas = collections.defaultdict(list)
    for d in data:
        x = d.split('/')[-1]
        datas['image'].append(x)
        datas['label'].append([mapping[i] for i in x.split('.')[0]])
    # Save dictionary to DataFrame
    df = pd.DataFrame(datas)
    
    return df



# create a captchadatset 
class CaptchaDataset:
    def __init__(self, df, transform=None):
        self.df = df
        # torchvision.transforms to Composes several transforms together
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        data = self.df.iloc[idx]
        # Use PIL Library: ‘L’ convert function converts the image from it’s regular RGB colors to simple black and white (gray-scale).
        image = Image.open(os.path.join(path, data['image'])).convert('L')
        # Convert labels to torch
        label = torch.tensor(data['label'], dtype=torch.int32)
        # print(image)
        # print(label)
        
        if self.transform is not None:
            image = self.transform(image)
            
        return image, label

 
# Create Bidirectional 
class Bidirectional(nn.Module):
    def __init__(self, inp, hidden, out, lstm=True):
        super(Bidirectional, self).__init__()
        if lstm:
            self.rnn = nn.LSTM(inp, hidden, bidirectional=True)
        else:
            self.rnn = nn.GRU(inp, hidden, bidirectional=True)
        self.embedding = nn.Linear(hidden*2, out)
    def forward(self, X):
        recurrent, _ = self.rnn(X)
        out = self.embedding(recurrent)     
        return out



# CRNN model for Captcha
class CRNN(nn.Module):
    def __init__(self, in_channels, output):
        super(CRNN, self).__init__()

        self.cnn = nn.Sequential(
                nn.Conv2d(in_channels, 256, 9, stride=1, padding=1),
                nn.ReLU(),
                nn.BatchNorm2d(256),
                nn.MaxPool2d(3, 3),
                nn.Conv2d(256, 256, (4, 3), stride=1, padding=1),
                nn.ReLU(),
                nn.BatchNorm2d(256))
        
        self.linear = nn.Linear(5888, 256)
        self.bn1 = nn.BatchNorm1d(256)
        self.rnn = Bidirectional(256, 1024, output+1)

    def forward(self, X, y=None, criterion = None):
        out = self.cnn(X)
        N, C, w, h = out.size()
        out = out.view(N, -1, h)
        # It returns a view of the input tensor with its dimension permuted. 
        out = out.permute(0, 2, 1)
        out = self.linear(out)

        out = out.permute(1, 0, 2)
        out = self.rnn(out)
            
        if y is not None:
            T = out.size(0)
            N = out.size(1)

            #Creates a tensor of size,size filled with fill_value,and can select dtype
            input_lengths = torch.full(size=(N,), fill_value=T, dtype=torch.int32)
            target_lengths = torch.full(size=(N,), fill_value=5, dtype=torch.int32)
        
            loss = criterion(out, y, input_lengths, target_lengths)
            
            return out, loss
        
        return out, None
    
    def _ConvLayer(self, inp, out, kernel, stride, padding, bn=False):
        if bn:
            conv = [
                nn.Conv2d(inp, out, kernel, stride=stride, padding=padding),
                nn.ReLU(),
                nn.BatchNorm2d(out)
            ]
        else:
            conv = [
                nn.Conv2d(inp, out, kernel, stride=stride, padding=padding),
                nn.ReLU()
            ]
        return nn.Sequential(*conv)


   

# Train the model
def final():
    # Load dataset
    df=Load_dataset()
    df_train, df_test = train_test_split(df, test_size=0.2, shuffle=True)
    # Composes several transforms together
    transform = T.Compose([
    T.ToTensor()
    ])

    # Load data as CaptchaDataset
    train_data = CaptchaDataset(df_train, transform)
    test_data = CaptchaDataset(df_test, transform)

    # Pytorch’s DataLoader is responsible for managing batches
    # And use train_loader in each epochs
    train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=8)

    # Create Device to check CPU or GPU
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    # The output of model is number of classes that is equal to 62
    model = CRNN(in_channels=1, output=62).to(device)
    print(model)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    # Add CTCLoss at each steps
    # CTCLoss:Calculates loss between a continuous (unsegmented) time series and a target sequence
    criterion = nn.CTCLoss()
    

    # Within each epoch run the subsets of data = batch sizes
    hist_loss = []
    for epoch in range(100):
            model.train()
            # tqdm is used to create a smart progress bar for the loops
            tk = tqdm(train_loader, total=len(train_loader))
            for data, target in tk:
                data = data.to(device=device)
                target = target.to(device=device)

                optimizer.zero_grad() # Clearing all previous gradients, setting to zero 
                out, loss = model(data, target, criterion=criterion) # Loss Computation
                loss.backward() # Back Propagation
                optimizer.step() # Updating the parameters
                tk.set_postfix({'Epoch':epoch+1, 'Loss' : loss.item()})
    print('Last iteration loss value: '+str(loss.item()))
final()   

62
CRNN(
  (cnn): Sequential(
    (0): Conv2d(1, 256, kernel_size=(9, 9), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(256, 256, kernel_size=(4, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (linear): Linear(in_features=5888, out_features=256, bias=True)
  (bn1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (rnn): Bidirectional(
    (rnn): LSTM(256, 1024, bidirectional=True)
    (embedding): Linear(in_features=2048, out_features=63, bias=True)
  )
)


100%|██████████| 54/54 [00:07<00:00,  7.57it/s, Epoch=1, Loss=3.82]
100%|██████████| 54/54 [00:07<00:00,  7.43it/s, Epoch=2, Loss=4.47]
100%|██████████| 54/54 [00:07<00:00,  7.40it/s, Epoch=3, Loss=4.48]
100%|██████████| 54/54 [00:07<00:00,  7.48it/s, Epoch=4, Loss=4.46]
100%|██████████| 54/54 [00:07<00:00,  7.66it/s, Epoch=5, Loss=4.49]
100%|██████████| 54/54 [00:06<00:00,  7.77it/s, Epoch=6, Loss=4.5]
100%|██████████| 54/54 [00:06<00:00,  7.81it/s, Epoch=7, Loss=4.51]
100%|██████████| 54/54 [00:06<00:00,  7.97it/s, Epoch=8, Loss=4.44]
100%|██████████| 54/54 [00:06<00:00,  7.91it/s, Epoch=9, Loss=4.49]
100%|██████████| 54/54 [00:06<00:00,  7.98it/s, Epoch=10, Loss=4.45]
100%|██████████| 54/54 [00:06<00:00,  7.85it/s, Epoch=11, Loss=4.46]
100%|██████████| 54/54 [00:06<00:00,  7.86it/s, Epoch=12, Loss=4.49]
100%|██████████| 54/54 [00:06<00:00,  7.74it/s, Epoch=13, Loss=4.43]
100%|██████████| 54/54 [00:07<00:00,  7.71it/s, Epoch=14, Loss=4.42]
100%|██████████| 54/54 [00:07<00:00,  7.70it

Last iteration loss value: 0.004853360820561647



