In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import cv2
from keras import layers,callbacks
from keras.models import Model,load_model
import string
print(os.listdir('Dataset/captcha-version-2-images'))

['samples']


Symbols contains the letters posssible in images.
Images are of shape 50*200*1

In [18]:
Symbols = string.ascii_lowercase + string.ascii_uppercase + "0123456789"
num_Symbols = len(Symbols)
print(num_Symbols)
img_shape = (50, 200, 1)


62


In [20]:
def data_pre_processing():
    num_samples = len(os.listdir('Dataset/captcha-version-2-images/samples'))
    print('Number of images ', num_samples)
    X = np.zeros((num_samples, 50, 200))                    # 1070*50*200*1
    Y = np.zeros((num_samples, 5, num_Symbols))               # 1070*5*62
    # print(X.shape)
    # print(Y.shape)
    
    for i, pic in enumerate(os.listdir('Dataset/captcha-version-2-images/samples')):
        
        # i is used as enumerating index
        # pic contains the name of image e.g. "name".png
        img = cv2.imread(os.path.join('Dataset/captcha-version-2-images/samples', pic), cv2.IMREAD_GRAYSCALE)
        
        name_of_image = pic[:-4]
        
        # if valid CAPTCHA resize the image
        if len(name_of_image) < 6:
            # scaling and resizing the image
            img = img/255
            img = cv2.resize(img, (200, 50))
            
            # define a matrix to ark all the charaters present in image as 1 and rest all as 0
            chars = np.zeros((5,num_Symbols))
            
            for index in range(0, len(name_of_image)):
                l = Symbols.find(name_of_image[index])
                chars[index, l] = 1
            
            X[i] = img
            Y[i] = chars
            
    return X, Y

# X contains the images and Y their labels 
# i.e. X[i] has the CAPTCHA image and Y[i] has its CAPTCHA label


X, Y = data_pre_processing()

# divide the datasest into test and train dataset
# out of 1070 images we will take 900 as training images and 170 as test images

# TODO
#     add validation set 
#     take the sizes of test and training set as hyperparameters
    
    
X_train, Y_train = X[:900], Y[:900]
X_test, Y_test = X[900:], Y[900:]
print("Size of training dataset(images and labels) ", X_train.shape, Y_train.shape)
print("Size of testing dataset(images and labels) ", X_test.shape, Y_test.shape)

Number of images  1070
Size of training dataset(images and labels) 

 (900, 50, 200) (900, 5, 62)
Size of testing dataset(images and labels)  (170, 50, 200) (170, 5, 62)



Now we define the model:
The model will be sequential then Conv and dense layers.

((CONV(RELU)->DROP)*2->POOL)*2->(FC(RELU)->DROP)*2




In [54]:
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPool2D, Dense, Dropout, Flatten


def create_model():
    # define the input
    # img = layers.Input(img_shape)
    
    # define sequential model
    model = Sequential()
    
    # (CONV(RELU)->DROP)*2->POOL)*2
    
    # 32 filter each with (3,3) size, stride =1 and no padding, relu activation 
    # specify input shape which is done only in first layer rest all layers take care of it automatically 
    model.add(Conv2D(32, (3, 3), padding='valid', activation='relu', input_shape=img_shape))
    model.add(Dropout(0.2))
    model.add(Conv2D(32, (3, 3), padding='valid', activation='relu'))
    model.add(Dropout(0.2))
    # max pool with pool size = 2 and strides = 2 will discard 75% activations
    model.add(MaxPool2D(pool_size=(2, 2), strides=2))
    model.add(Conv2D(32, (3, 3), padding='valid', activation='relu'))
    model.add(Dropout(0.2))
    model.add(Conv2D(32, (3, 3), padding='valid', activation='relu'))
    model.add(Dropout(0.2))
    model.add(MaxPool2D(pool_size=(2, 2), strides=2))

    # (FC(RELU)->DROP)
    
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    
    # layer = model.layers[len(model.layers)-1].output
    # print(layer)
    
    # now flatten the model and split into 5 branches 
    # each branch will predict one letter in CAPTCHA
    model.add(Flatten())
    outs = []
    for i in range(0, 5):
        temp = model.layers[len(model.layers)-1].output
        temp1 = layers.Dense(64, activation='relu')(temp)
        temp2 = layers.Dropout(0.5)(temp1)
        temp3 = layers.Dense(num_Symbols, activation='sigmoid')(temp2)
        outs.append(temp3)
        
    # making a new model with input as the one in previous and output as outs layer
    model = Model(inputs=model.input, outputs=outs)
    
    # visualize the architecture of model
    model.summary()
    
    # compile the model using 
    model.compile(optimizer=, loss='categorical_crossentropy', metrics='accuracy')

create_model()















__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
conv2d_133_input (InputLayer)   (None, 50, 200, 1)   0                                            
__________________________________________________________________________________________________
conv2d_133 (Conv2D)             (None, 48, 198, 32)  320         conv2d_133_input[0][0]           
__________________________________________________________________________________________________
dropout_277 (Dropout)           (None, 48, 198, 32)  0           conv2d_133[0][0]                 
__________________________________________________________________________________________________
conv2d_134 (Conv2D)             (None, 46, 196, 32)  9248        dropout_277[0][0]                
__________________________________________________________________________________________________
dropout_27