<a href="https://colab.research.google.com/github/HimanshuMK/Captcha-Recognition-Model/blob/main/My_Captcha_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Importing Libraries

In [None]:
import numpy as np
import os
import cv2 #OpenCV(Open Source computer vision lib), containg CV algos
import string
import matplotlib.pyplot as plt #for graphs
from sklearn.model_selection import train_test_split

###Importing data

In [None]:
data_dir = '/content/drive/MyDrive/OCR_model/samples'
labels = []
images = []

In [None]:
for filename in os.listdir(data_dir):
    # read image
    img = cv2.imread(os.path.join(data_dir, filename), cv2.IMREAD_GRAYSCALE)
    images.append(img)

    # extract labels from filename
    label = filename.split('.')[0]
    labels.append(label)

In [None]:
print(np.array(images).shape)
print(np.array(labels).shape)

(1070, 50, 200)
(1070,)


### Encoding

In [None]:
characters= string.ascii_lowercase + string.digits # All symbols captcha can contain
nchar = len(characters) #total number of char possible

Encoding and Decoding Functions

In [None]:
# character to number conversion
char_to_num = {}
for idx, char in enumerate(characters):
    char_to_num[char] = idx

# number to character conversion
num_to_char = {}
for char, idx in char_to_num.items():
    num_to_char[idx] = char

In [None]:
n = len(images)
print(n)

1070


Preprocessing


In [None]:
# size of image is 50 rows x 200 cols
# resize images
resized_images = [cv2.resize(img, (200, 50)) for img in images]

In [None]:
# normalizing pixel values
normalized_images = [img / 255.0 for img in resized_images]

In [None]:
max_length = max(len(label) for label in labels)
num_classes = len(characters)
print(max_length)
print(num_classes)

5
36


Label Encoding

In [None]:
def encode_label(label):
    encoded = np.zeros((max_length, num_classes), dtype=np.float32)
    for i, char in enumerate(label):
        encoded[i, char_to_num[char]] = 1.0
    return encoded

In [None]:
encoded_labels = np.array([encode_label(label) for label in labels])

In [None]:
X = np.array(normalized_images)  # Add channel dimension for grayscale
y = np.array(encoded_labels)

In [None]:
print(len(normalized_images))
print(X.shape)
print(y.shape)

1070
(1070, 50, 200)
(1070, 5, 36)


## Train Test Split


In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

## Creating Model

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, Model
import numpy as np

# Define the improved model creation function
def create_improved_model(imgshape, nchar, captcha_length):
    img = layers.Input(shape=imgshape)  # Input image shape: (50, 200, 1)

    # First convolutional block
    conv1 = layers.Conv2D(32, (3, 3), padding='same', activation='relu')(img)
    conv1 = layers.Conv2D(32, (3, 3), padding='same', activation='relu')(conv1)
    mp1 = layers.MaxPooling2D(pool_size=(2, 2))(conv1)  # 25*100

    # Second convolutional block
    conv2 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(mp1)
    conv2 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(conv2)
    mp2 = layers.MaxPooling2D(pool_size=(2, 2))(conv2)  # 13*50

    # Third convolutional block
    conv3 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(mp2)
    conv3 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(conv3)
    bn = layers.BatchNormalization()(conv3)
    mp3 = layers.MaxPooling2D(pool_size=(2, 2))(bn)  # 7*25

    # Flatten the output
    flat = layers.Flatten()(mp3)

    # Fully connected layer
    dens1 = layers.Dense(256, activation='relu')(flat)
    drop1 = layers.Dropout(0.5)(dens1)
    dens2 = layers.Dense(256, activation='relu')(drop1)
    drop2 = layers.Dropout(0.5)(dens2)

    # Output layer
    res = layers.Dense(captcha_length * nchar, activation='softmax')(drop2)

    # Reshape the output to (captcha_length, nchar)
    reshaped = layers.Reshape((captcha_length, nchar))(res)

    # Compile the model
    model = Model(img, reshaped)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])
    return model

# Assuming imgshape is (50, 200, 1), nchar is 36, and captcha_length is 5
imgshape = (50, 200, 1)
nchar = 36
captcha_length = 5

# Create the improved model
model = create_improved_model(imgshape, nchar, captcha_length)

# Print the model summary
model.summary()

# Assuming you have your data ready as X_train and y_train
# X_train shape: (1070, 50, 200, 1)
# y_train shape: (1070, 5, 36)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
# Create dummy data for demonstration (replace this with your actual data)
# X_train = np.random.random((1070, 50, 200, 1))
# y_train = np.random.random((1070, 5, 36))

# Fit the model
model.fit(X_train, y_train, epochs=80, batch_size=32, validation_split=0.2)


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 50, 200, 1)]      0         
                                                                 
 conv2d (Conv2D)             (None, 50, 200, 32)       320       
                                                                 
 conv2d_1 (Conv2D)           (None, 50, 200, 32)       9248      
                                                                 
 max_pooling2d (MaxPooling2  (None, 25, 100, 32)       0         
 D)                                                              
                                                                 
 conv2d_2 (Conv2D)           (None, 25, 100, 64)       18496     
                                                                 
 conv2d_3 (Conv2D)           (None, 25, 100, 64)       36928     
                                                             

<keras.src.callbacks.History at 0x785ab97babc0>

In [None]:
# Evaluate the model on the training data
train_loss, train_accuracy = model.evaluate(X_train, y_train)
print(f'Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.4f}')

# Evaluate the model on the testing data
test_loss, test_accuracy = model.evaluate(X_val, y_val)
print(f'Testing Loss: {test_loss:.4f}, Testing Accuracy: {test_accuracy:.4f}')


Training Loss: 0.2019, Training Accuracy: 0.9456
Testing Loss: 0.8178, Testing Accuracy: 0.7374


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# Generate predictions
y_pred = model.predict(X_val)

# Compute additional metrics
# Convert one-hot encoded labels back to integers
y_test_int = np.argmax(y_val, axis=-1).reshape(-1, captcha_length)
y_pred_int = np.argmax(y_pred, axis=-1).reshape(-1, captcha_length)

# Compute accuracy for each position in the captcha
accuracy_per_position = np.mean([accuracy_score(y_test_int[:, i], y_pred_int[:, i]) for i in range(captcha_length)])
print(f'Accuracy per position: {accuracy_per_position:.4f}')

# Compute overall accuracy, precision, recall, and F1 score
overall_accuracy = accuracy_score(y_test_int.flatten(), y_pred_int.flatten())
precision = precision_score(y_test_int.flatten(), y_pred_int.flatten(), average='macro')
recall = recall_score(y_test_int.flatten(), y_pred_int.flatten(), average='macro')
f1 = f1_score(y_test_int.flatten(), y_pred_int.flatten(), average='macro')

print(f'Overall Accuracy: {overall_accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')

Accuracy per position: 0.7374
Overall Accuracy: 0.7374
Precision: 0.7475
Recall: 0.7382
F1 Score: 0.7392


In [None]:
# Define the save path
save_path = '/content/drive/MyDrive/OCR_model/my_captcha_model_v1.keras'
# Save the model
model.save(save_path)


To increase its accuracy we will try to run it for more epochs

In [None]:
imgshape = (50, 200, 1)
nchar = 36
captcha_length = 5

In [None]:
from tensorflow.keras.models import load_model

# Load the trained model
model = load_model('/content/drive/MyDrive/OCR_model/my_captcha_model_v1.keras')


In [None]:
# Continue training the loaded model for additional epochs
model.fit(X_train, y_train, epochs=30, batch_size=32, validation_split=0.2)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x7968576a11e0>

In [None]:
# Evaluate the model on the training data
train_loss, train_accuracy = model.evaluate(X_train, y_train)
print(f'Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.4f}')

# Evaluate the model on the testing data
test_loss, test_accuracy = model.evaluate(X_val, y_val)
print(f'Testing Loss: {test_loss:.4f}, Testing Accuracy: {test_accuracy:.4f}')


Training Loss: 0.1778, Training Accuracy: 0.9477
Testing Loss: 0.8046, Testing Accuracy: 0.7692


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# Generate predictions
y_pred = model.predict(X_val)

# Compute additional metrics
# Convert one-hot encoded labels back to integers
y_test_int = np.argmax(y_val, axis=-1).reshape(-1, captcha_length)
y_pred_int = np.argmax(y_pred, axis=-1).reshape(-1, captcha_length)

# Compute accuracy for each position in the captcha
accuracy_per_position = np.mean([accuracy_score(y_test_int[:, i], y_pred_int[:, i]) for i in range(captcha_length)])
print(f'Accuracy per position: {accuracy_per_position:.4f}')

# Compute overall accuracy, precision, recall, and F1 score
overall_accuracy = accuracy_score(y_test_int.flatten(), y_pred_int.flatten())
precision = precision_score(y_test_int.flatten(), y_pred_int.flatten(), average='macro')
recall = recall_score(y_test_int.flatten(), y_pred_int.flatten(), average='macro')
f1 = f1_score(y_test_int.flatten(), y_pred_int.flatten(), average='macro')

print(f'Overall Accuracy: {overall_accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')

Accuracy per position: 0.7692
Overall Accuracy: 0.7692
Precision: 0.7737
Recall: 0.7677
F1 Score: 0.7674


Accuracy is now greater than 75 %, so its quite acceptable

In [None]:
# Define the save path
save_path = '/content/drive/MyDrive/OCR_model/my_captcha_model_v1_1.keras'
# Save the model
model.save(save_path)


Trying to predict for particular image of Captcha

In [None]:
img1 = cv2.imread('/content/drive/MyDrive/OCR_model/samples/x37bf.png', cv2.IMREAD_GRAYSCALE)
img1 = img1 / 255.0
res1 = np.array(model.predict(img1[np.newaxis, :, :, np.newaxis]))
result1 = np.reshape(res1, (5, 36)) #reshape the array
k_ind = []
probs = []
for i in result1:
    k_ind.append(np.argmax(i)) #adds the index of the char found in captcha

capt = '' #string to store predicted captcha
for k in k_ind:
    capt += num_to_char[k] #finds the char corresponding to the index
print(capt)

x37bf


# The End