<a href="https://colab.research.google.com/github/HimanshuMK/Captcha-Recognition-Model/blob/main/Captcha_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Importing Libraries

In [2]:
import numpy as np
import os
import cv2 #OpenCV(Open Source computer vision lib), containg CV algos
import string
import matplotlib.pyplot as plt #for graphs

### Importing data

In [3]:
data_dir = '/content/drive/MyDrive/OCR_model/samples'
labels = []
images = []

### Extracting Images and labels From data

In [4]:
for filename in os.listdir(data_dir):
    # read image
    img = cv2.imread(os.path.join(data_dir, filename), cv2.IMREAD_GRAYSCALE)
    images.append(img)

    # extract labels from filename
    label = filename.split('.')[0]
    labels.append(label)

In [5]:
print(np.array(images).shape)
print(np.array(labels).shape)

(1070, 50, 200)
(1070,)


#### For Encoding Purpose

In [6]:
characters= string.ascii_lowercase + string.digits # All symbols captcha can contain
nchar = len(characters) #total number of char possible

### Encoding Functions

In [7]:
char_to_num = {}
for idx, char in enumerate(characters):
    char_to_num[char] = idx

# number to character conversion
num_to_char = {}
for char, idx in char_to_num.items():
    num_to_char[idx] = char

Since there are 1070 images. <br>
We will define first shapes of X and y <br>



In [8]:
n = len(images)
print(n)

1070


In [9]:
#defining size of image
imgshape=(50,200,1) #50-height, 200-width, 1-no of channels

### Creating X and y for Model

In [10]:
X = np.zeros((n, 50, 200, 1))  # n * 50 * 200 array with all entries 0
y = np.zeros((5, n, nchar))    # 5 * n * 36 (5 letters in captcha) with all entries 0

In [11]:
for i, (img, label) in enumerate(zip(images, labels)):
    if len(label) < 6:  # captcha is not more than 5 letters
        img = img / 255.0  # scales the image between 0 and 1
        img = np.reshape(img, (50, 200, 1))  # reshapes image to width 200, height 50, channel 1

        target = np.zeros((5, nchar))  # creates an array of size 5*36 with all entries 0

        for j, k in enumerate(label):
            # j iterates from 0 to 4 (5 letters in captcha)
            # k denotes the letter in captcha which is to be scanned
            index = char_to_num[k]  # index stores the position of letter k of captcha in the char_to_num dictionary
            target[j, index] = 1  # replaces 0 with 1 in the target array at the position of the letter in captcha

        X[i] = img  # stores all the images
        y[:, i] = target  # stores all the info about the letters in captcha of all images

In [12]:
print(X.shape)
print(y.shape)

(1070, 50, 200, 1)
(5, 1070, 36)


Since now our X and y are ready, <br>
we will Divide it in training and testing set

Train Test Split

In [13]:
import numpy as np

# Set a random seed for reproducibility
np.random.seed(42)

# Shuffle the indices
indices = np.arange(X.shape[0])
np.random.shuffle(indices)

# Shuffle the data accordingly
X_shuffled = X[indices]
y_shuffled = y[:, indices]

# Split the data into training and test sets
train_size = 970
X_train, y_train = X_shuffled[:train_size], y_shuffled[:, :train_size]
X_test, y_test = X_shuffled[train_size:], y_shuffled[:, train_size:]

# Verify the shapes
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

# (970, 50, 200, 1)
# (5, 970, 36)
# (100, 50, 200, 1)
# (5, 100, 36)


(970, 50, 200, 1)
(5, 970, 36)
(100, 50, 200, 1)
(5, 100, 36)


# Creating a Model

In [14]:
from keras import layers #for building layers of neural net
from keras.models import Model
from keras.models import load_model
from keras import callbacks #for training logs, saving to disk periodically

In [15]:
#create model
def createmodel():
    img = layers.Input(shape=imgshape) # Get image as an input of size 50,200,1
    conv1 = layers.Conv2D(16, (3, 3), padding='same', activation='relu')(img) #50*200
    mp1 = layers.MaxPooling2D(padding='same')(conv1)  # 25*100
    conv2 = layers.Conv2D(32, (3, 3), padding='same', activation='relu')(mp1)
    mp2 = layers.MaxPooling2D(padding='same')(conv2)  # 13*50
    conv3 = layers.Conv2D(32, (3, 3), padding='same', activation='relu')(mp2)
    bn = layers.BatchNormalization()(conv3) #to improve the stability of model
    mp3 = layers.MaxPooling2D(padding='same')(bn)  # 7*25

    flat = layers.Flatten()(mp3) #convert the layer into 1-D

    outs = []
    for _ in range(5): #for 5 letters of captcha
        dens1 = layers.Dense(64, activation='relu')(flat)
        drop = layers.Dropout(0.5)(dens1) #drops 0.5 fraction of nodes
        res = layers.Dense(nchar, activation='sigmoid')(drop)

        outs.append(res) #result of layers

    # Compile model and return it
    model = Model(img, outs) #create model
    model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=["accuracy"])
    return model

In [16]:
#Create model
model=createmodel();
model.summary();

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 50, 200, 1)]         0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 50, 200, 16)          160       ['input_1[0][0]']             
                                                                                                  
 max_pooling2d (MaxPooling2  (None, 25, 100, 16)          0         ['conv2d[0][0]']              
 D)                                                                                               
                                                                                                  
 conv2d_1 (Conv2D)           (None, 25, 100, 32)          4640      ['max_pooling2d[0][0]']   

Trying to fit in that model

In [17]:
#Applying the model
hist = model.fit(X_train, [y_train[0], y_train[1], y_train[2], y_train[3], y_train[4]], batch_size=32, epochs=60, validation_split=0.2)
#batch size- 32 defines no. of samples per gradient update
#Validation split=0.2 splits the training set in 80-20% for training nd testing

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


### Saving the Model for furthur use

In [20]:
# Define the save path
save_path = '/content/drive/MyDrive/OCR_model/captcha_model_v2.keras'
# Save the model
model.save(save_path)


## Evaluating Model on Training data

In [18]:
#Loss on training set
#Finding Loss on training set
preds = model.evaluate(X_train, [y_train[0], y_train[1], y_train[2], y_train[3], y_train[4]])
print ("Loss on training set= " + str(preds[0]))

Loss on training set= 0.44039222598075867


## Evaluating Model on Testing data

In [19]:
#Finding loss on test set
preds = model.evaluate(X_test, [y_test[0], y_test[1], y_test[2], y_test[3], y_test[4]])
print ("Loss on testing set= " + str(preds[0]))

Loss on testing set= 1.7589656114578247


## Our Model works well on Training and Testing data

# The END