In [23]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import cv2
from keras import layers
from keras.models import Model,load_model
import string
import tensorflow as tf
print(os.listdir('../Dataset/captcha-version-2-images'))

['samples', 'dmw8n.png']


Symbols contains the letters posssible in images.
Images are of shape 50*200*1

In [24]:
Symbols = string.ascii_lowercase + string.ascii_uppercase + "0123456789"
num_Symbols = len(Symbols)
print(num_Symbols)
img_shape = (50, 200, 1)


62


In [61]:
def data_pre_processing():
    num_samples = len(os.listdir('../Dataset/captcha-version-2-images/samples'))
    print('Number of images ', num_samples)
    X = np.zeros((num_samples, 50, 200,1))                    # 1070*50*200*1
    Y = np.zeros((num_samples, 5, num_Symbols))               # 1070*5*62
    # print(X.shape)
    # print(Y.shape)
    
    for i, pic in enumerate(os.listdir('../Dataset/captcha-version-2-images/samples')):
        
        # i is used as enumerating index
        # pic contains the name of image e.g. "name".png
        img = cv2.imread(os.path.join('../Dataset/captcha-version-2-images/samples', pic), cv2.IMREAD_GRAYSCALE)
        
        name_of_image = pic[:-4]
        
        # if valid CAPTCHA resize the image
        if len(name_of_image) < 6:
            # scaling and resizing the image
            img = img/255
            img = cv2.resize(img,(50,200))
            img = np.reshape(img,(50,200,1))
            
            # define a matrix to ark all the charaters present in image as 1 and rest all as 0
            chars = np.zeros((5,num_Symbols))
            
            for index in range(0, len(name_of_image)):
                l = Symbols.find(name_of_image[index])
                chars[index, l] = 1
            
            X[i] = img
            Y[i] = chars
            
    return X, Y

# X contains the images and Y their labels 
# i.e. X[i] has the CAPTCHA image and Y[i] has its CAPTCHA label


X, Y = data_pre_processing()

# divide the datasest into test and train dataset
# out of 1070 images we will take 900 as training images and 170 as test images

# TODO
#     add validation set 
#     take the sizes of test and training set as hyperparameters
    
    
X_train, Y_train = X[:900], Y[:900]
X_test, Y_test = X[900:], Y[900:]
print("Size of training dataset(images and labels) ", X_train.shape, Y_train.shape)
print("Size of testing dataset(images and labels) ", X_test.shape, Y_test.shape)

Number of images  1070
Size of training dataset(images and labels)  (900, 50, 200, 1) (900, 5, 62)
Size of testing dataset(images and labels)  (170, 50, 200, 1) (170, 5, 62)



Now we define and create the model:
The model will be sequential then Conv and dense layers.

((CONV(RELU)->DROP)*2->POOL)*2->(FC(RELU)->DROP)*3  (32 filters used in Conv2D)--> getting approx 21% loss 
((CONV(RELU)->DROP)->POOL)*2->(FC(RELU)->DROP)*3    (32 filters used in Conv2D)--> getting approx 12% loss
((CONV(RELU)->DROP)->POOL)->(FC(RELU)->DROP)*3      (32 filters used in Conv2D)--> getting approx 11% loss 
((CONV(RELU)->DROP)*3->POOL)->(FC(RELU)->DROP)*2    (16 filters used in Conv2D)--> getting approx 9% loss
((CONV(RELU))*3->DROP->POOL)->(FC(RELU)->DROP)->FC(RELU)    --> 7% loss


In [69]:
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPool2D, Dense, Dropout, Flatten, InputLayer, BatchNormalization


def create_model():
    # define the input
    
    # define sequential model
    model = Sequential()
    
    # (CONV(RELU)->DROP)*2->POOL)*2
    
    # 32 filter each with (3,3) size, stride =1 and no padding, relu activation 
    # specify input shape which is done only in first layer rest all layers take care of it automatically 
    model.add(InputLayer(input_shape=img_shape))
    model.add(Conv2D(16, (3, 3), padding='same', activation='relu', input_shape=img_shape))
    model.add(MaxPool2D(padding='same'))
    # model.add(Dropout(0.2))
    model.add(Conv2D(16, (3, 3), padding='same', activation='relu'))
    # model.add(Dropout(0.2))
    # max pool with pool size = 2 and strides = 2 will discard 75% activations
    model.add(MaxPool2D())
    model.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
    model.add(MaxPool2D())
    model.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
    # model.add(Dropout(0.2))
    # model.add(Conv2D(32, (3, 3), padding='valid', activation='relu'))
    # model.add(Dropout(0.2))
    model.add(BatchNormalization())
    model.add(MaxPool2D(padding='same'))

    # (FC(RELU)->DROP)
    
    # model.add(Dense(64, activation='relu'))
    # model.add(Dropout(0.5))
    
    # now flatten the model and split into 5 branches 
    # each branch will predict one letter in CAPTCHA
    model.add(Flatten())
    outs = []
    for i in range(0, 5):
        temp = model.layers[len(model.layers)-1].output
        temp1 = layers.Dense(64, activation='relu')(temp)
        # temp2 = layers.Dropout(0.5)(temp1)
        temp3 = layers.Dense(num_Symbols, activation='sigmoid')(temp1)
        outs.append(temp3)
        
    # making a new model with input as the one in previous and output as outs layer
    model = Model(inputs=model.input, outputs=outs)
    
    # visualize the architecture of model
    model.summary()
    
    # compile the model using 
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=["accuracy"])
    return model



In [70]:
model = create_model()
hist = model.fit(X_train,[Y_train[:,0,:],Y_train[:,1,:],Y_train[:,2,:],Y_train[:,3,:],Y_train[:,4,:]],batch_size=32, epochs=30, verbose=1, validation_split=0.2)


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_12 (InputLayer)           (None, 50, 200, 1)   0                                            
__________________________________________________________________________________________________
conv2d_39 (Conv2D)              (None, 50, 200, 16)  160         input_12[0][0]                   
__________________________________________________________________________________________________
max_pooling2d_37 (MaxPooling2D) (None, 25, 100, 16)  0           conv2d_39[0][0]                  
__________________________________________________________________________________________________
conv2d_40 (Conv2D)              (None, 25, 100, 16)  2320        max_pooling2d_37[0][0]           
__________________________________________________________________________________________________
max_poolin

In [71]:
score = model.evaluate(X_test,[Y_test[:,0,:],Y_test[:,1,:],Y_test[:,2,:],Y_test[:,3,:],Y_test[:,4,:]],batch_size=50,verbose=0)
for i in range(0,len(model.metrics_names)):
    print(model.metrics_names[i],"  ",score[i])

loss    10.098685096291934
dense_115_loss    0.968825885478188
dense_117_loss    1.324532053049873
dense_119_loss    1.9093132019042969
dense_121_loss    3.322683208128985
dense_123_loss    2.5733306407928467
dense_115_acc    0.7352941106347477
dense_117_acc    0.6941176512662102
dense_119_acc    0.5470588242306429
dense_121_acc    0.047058822696699816
dense_123_acc    0.07058823481202126


In [73]:

def predict(file_path):
    img = cv2.imread(file_path,cv2.IMREAD_GRAYSCALE)
    
    if img is not None:
        img = img/255
        img = cv2.resize(img, (50,200))
        img = np.reshape(img,(50,200,1))
        img = img[np.newaxis,:,:,:]
    else :
        print("Not an image!")
    
    res = np.array(model.predict(img))
    # print(res)
    # res = cv2.resize(res,(50,200))
    res = res.reshape(5,num_Symbols)
    # max_prob_indices = np.zeros(5)
    max_prob_indices = np.argmax(res,1)
    # print(max_prob_indices.shape)
    ans =""
    for i in max_prob_indices:
        ans+=Symbols[i]
    print(ans)
    

In [74]:

predict('../Dataset/captcha-version-2-images/samples/yyn57.png')
predict('../Dataset/captcha-version-2-images/samples/yf424.png')
predict('../Dataset/captcha-version-2-images/samples/xxw44.png')
predict('../Dataset/captcha-version-2-images/samples/pym7p.png')
predict('../Dataset/captcha-version-2-images/abcde.jpg')
predict('../Dataset/captcha-version-2-images/Ab1eZ.jpg')


yynnn
yf4nn
xxwnn
pymen
abceZ
AbceZ
AbceZ
