# Captcha Solver

## 1. Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import cv2
import glob
import imutils
from imutils import paths
import os
import os.path
import shutil
import pickle

from keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Flatten, Dense, Dropout
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

## 2. Reading and Moving Images

In [2]:
images_dir = r"C:\Users\ragha\Downloads\captcha_assignment_images\Gujarat_Rural_Captchas"
training_images_dir = r"C:\Users\ragha\Downloads\captcha_assignment_images\Training_Images"
testing_images_dir = r"C:\Users\ragha\Downloads\captcha_assignment_images\Testing_Images"
extracted_images = r"C:\Users\ragha\Downloads\captcha_assignment_images\Extracted_Traning_Images"

In [3]:
images = []
lables = []

for image in os.listdir(images_dir):
    if image.endswith('.png'):
        images.append(image)
        lable = os.path.basename(image)
        lable = lable.split('.')[0]
        lables.append(lable)
        
training_images, testing_images, training_lables, testing_lables = train_test_split(images, lables, test_size=0.2, random_state=10)

for training_image in training_images:
    shutil.copy(os.path.join(images_dir, training_image), os.path.join(training_images_dir, training_image))
    
for testing_image in testing_images:
    shutil.copy(os.path.join(images_dir, testing_image), os.path.join(testing_images_dir, testing_image))

## 3. Extracting each element from the training image

In [4]:
solved_captchas = glob.glob(os.path.join(training_images_dir, "*"))
counts = {}

# loop over the image paths
for (i, captcha) in enumerate(solved_captchas):
    print("processing image {}/{}".format(i + 1, len(solved_captchas)))

    # grab the base filename as the text
    filename = os.path.basename(captcha)
    captcha_text = os.path.splitext(filename)[0]

    # Load the image and convert it to grayscale
    image = cv2.imread(captcha)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Add some extra padding around the image
    gray = cv2.copyMakeBorder(gray, 4, 4, 4, 4, cv2.BORDER_REPLICATE)

    # applying threshold
    # blockSize = 7  # You can adjust this value
    # C = 3
    # thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, blockSize, C)
    thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV, cv2.THRESH_OTSU)[1]

    # finding the contours
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # creating empty list for holding the coordinates of the letters
    letter_image_regions = []

    # Now we will loop through each of the contours and extract the letter
    for contour in contours:
        # Get the rectangle that contains the contour
        (x, y, w, h) = cv2.boundingRect(contour)

        # checking if any counter is too wide
        # if countour is too wide then there could be two letters joined together or are very close to each other
        if w / h > 1.25:
            # Split it in half into two letter regions
            half_width = int(w / 2)
            letter_image_regions.append((x, y, half_width, h))
            letter_image_regions.append((x + half_width, y, half_width, h))
        else:
            letter_image_regions.append((x, y, w, h))

    # Sort the detected letter images based on the x coordinate to make sure
    # Sort the six largest contours based on the product of their width and height (area)
    six_largest_contours = sorted(letter_image_regions, key=lambda x: x[2] * x[3], reverse=True)
    
    # Get the six largest contours from the sorted list
    six_largest_contours = six_largest_contours[:6]
    
    # we get them from left-to-right so that we match the right image with the right letter
    six_largest_contours = sorted(six_largest_contours, key=lambda x: x[0])

    # Save each letter as a single image
    for letter_bounding_box, letter_text in zip(six_largest_contours, captcha_text):
        # Grab the coordinates of the letter in the image
        x, y, w, h = letter_bounding_box

        # Extract the letter from the original image with a 2-pixel margin around the edge
        letter_image = gray[y - 2:y + h + 2, x - 2:x + w + 2]

        if letter_image.shape[0] == 0 or letter_image.shape[1] == 0:
            print("Error: Empty letter image")
            continue

        # Get the folder to save the image in
        save_path = os.path.join(extracted_images, letter_text)

        # creating different output folder for storing different letters
        if not os.path.exists(save_path):
            os.makedirs(save_path)

        # write the letter image to a file
        count = counts.get(letter_text, 1)
        p = os.path.join(save_path, "{}.png".format(str(count)))
        cv2.imwrite(p, letter_image)

        # increment the count
        counts[letter_text] = count + 1

processing image 1/1419
processing image 2/1419
processing image 3/1419
processing image 4/1419
processing image 5/1419
processing image 6/1419
processing image 7/1419
processing image 8/1419
processing image 9/1419
processing image 10/1419
processing image 11/1419
processing image 12/1419
processing image 13/1419
processing image 14/1419
processing image 15/1419
processing image 16/1419
processing image 17/1419
processing image 18/1419
processing image 19/1419
processing image 20/1419
processing image 21/1419
processing image 22/1419
processing image 23/1419
processing image 24/1419
processing image 25/1419
processing image 26/1419
processing image 27/1419
processing image 28/1419
processing image 29/1419
processing image 30/1419
processing image 31/1419
processing image 32/1419
processing image 33/1419
processing image 34/1419
processing image 35/1419
processing image 36/1419
processing image 37/1419
processing image 38/1419
processing image 39/1419
processing image 40/1419
processin

processing image 394/1419
processing image 395/1419
processing image 396/1419
processing image 397/1419
processing image 398/1419
processing image 399/1419
processing image 400/1419
processing image 401/1419
processing image 402/1419
processing image 403/1419
processing image 404/1419
processing image 405/1419
processing image 406/1419
processing image 407/1419
processing image 408/1419
processing image 409/1419
processing image 410/1419
processing image 411/1419
processing image 412/1419
processing image 413/1419
processing image 414/1419
processing image 415/1419
processing image 416/1419
processing image 417/1419
processing image 418/1419
processing image 419/1419
processing image 420/1419
processing image 421/1419
processing image 422/1419
processing image 423/1419
processing image 424/1419
processing image 425/1419
processing image 426/1419
processing image 427/1419
processing image 428/1419
processing image 429/1419
processing image 430/1419
processing image 431/1419
processing i

processing image 733/1419
processing image 734/1419
processing image 735/1419
processing image 736/1419
processing image 737/1419
processing image 738/1419
processing image 739/1419
processing image 740/1419
processing image 741/1419
processing image 742/1419
processing image 743/1419
processing image 744/1419
processing image 745/1419
processing image 746/1419
processing image 747/1419
processing image 748/1419
processing image 749/1419
processing image 750/1419
processing image 751/1419
processing image 752/1419
processing image 753/1419
processing image 754/1419
processing image 755/1419
processing image 756/1419
processing image 757/1419
processing image 758/1419
processing image 759/1419
processing image 760/1419
processing image 761/1419
processing image 762/1419
processing image 763/1419
processing image 764/1419
processing image 765/1419
processing image 766/1419
processing image 767/1419
processing image 768/1419
processing image 769/1419
Error: Empty letter image
processing i

processing image 1073/1419
processing image 1074/1419
processing image 1075/1419
processing image 1076/1419
processing image 1077/1419
processing image 1078/1419
processing image 1079/1419
processing image 1080/1419
processing image 1081/1419
processing image 1082/1419
processing image 1083/1419
processing image 1084/1419
processing image 1085/1419
processing image 1086/1419
processing image 1087/1419
processing image 1088/1419
processing image 1089/1419
processing image 1090/1419
processing image 1091/1419
processing image 1092/1419
processing image 1093/1419
processing image 1094/1419
processing image 1095/1419
processing image 1096/1419
processing image 1097/1419
processing image 1098/1419
processing image 1099/1419
processing image 1100/1419
processing image 1101/1419
processing image 1102/1419
processing image 1103/1419
processing image 1104/1419
processing image 1105/1419
processing image 1106/1419
processing image 1107/1419
processing image 1108/1419
processing image 1109/1419
p

## 4. Trainging model with extracted images

In [5]:
# creating empty lists for storing image data and labels
data = []
labels = []
for image in paths.list_images(extracted_images):
    img = cv2.imread(image)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.resize(img, (30, 30))

    # adding a 3rd dimension to the image
    img = np.expand_dims(img, axis=2)

    # grabing the name of the letter based on the folder it is present in
    label = image.split(os.path.sep)[-2]

    # appending to the empty lists
    data.append(img)
    labels.append(label)

# converting data and labels to np array
data = np.array(data, dtype="float")
labels = np.array(labels)

#building model
model = Sequential()
model.add(Conv2D(20, (5, 5), padding="same", input_shape=(30, 30, 1), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Conv2D(50, (5, 5), padding="same", activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(10, activation="softmax"))

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])


#scaling the values of  data between 0 and 1
data = data/255.0

# Split the training data into separate train and test sets

(train_x, val_x, train_y, val_y) = train_test_split(data, labels, test_size=0.2, random_state=0)

#one hot encoding
lb = LabelBinarizer().fit(train_y)
train_y = lb.transform(train_y)
val_y = lb.transform(val_y)

# using early stoping for avoiding overfitting
estop = EarlyStopping(patience=10, mode='min', min_delta=0.001, monitor='val_loss')

model.fit(train_x, train_y, validation_data=(val_x, val_y), batch_size=32, epochs=50, verbose=1, callbacks = [estop])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50


<keras.src.callbacks.History at 0x1c0929bc2b0>

## 5. Testing the trained model

In [6]:
def solve_captcha(image_path):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Add some extra padding around the image
    gray = cv2.copyMakeBorder(gray, 8, 8, 8, 8, cv2.BORDER_REPLICATE)

    # threshold the image
    thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV, cv2.THRESH_OTSU)[1]

    # find the contours
    contours, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    letter_image_regions = []

    # Now we can loop through each of the contours and extract the letter

    for contour in contours:
        # Get the rectangle that contains the contour
        (x, y, w, h) = cv2.boundingRect(contour)

        # checking if any counter is too wide
        # if countour is too wide then there could be two letters joined together or are very close to each other
        if w / h > 1.25:
            # Split it in half into two letter regions
            half_width = int(w / 2)
            letter_image_regions.append((x, y, half_width, h))
            letter_image_regions.append((x + half_width, y, half_width, h))
        else:
            letter_image_regions.append((x, y, w, h))

    # Sort the detected letter images based on the x coordinate to make sure
    # Sort the six largest contours based on the product of their width and height (area)
    six_largest_contours = sorted(letter_image_regions, key=lambda x: x[2] * x[3], reverse=True)
    
    # Get the six largest contours from the sorted list
    six_largest_contours = six_largest_contours[:6]
    
    # we get them from left-to-right so that we match the right image with the right letter
    six_largest_contours = sorted(six_largest_contours, key=lambda x: x[0])

    # Create an output image and a list to hold our predicted letters
    output = cv2.merge([gray] * 3)
    predictions = []

    # Creating an empty list for storing predicted letters
    predictions = []

    # Save out each letter as a single image
    for letter_bounding_box in six_largest_contours:
        # Grab the coordinates of the letter in the image
        x, y, w, h = letter_bounding_box
        
        # Extract the letter from the original image with a 2-pixel margin around the edge
        letter_image = gray[y - 2:y + h + 2, x - 2:x + w + 2]
        
        if letter_image.shape[0] == 0 or letter_image.shape[1] == 0:
            print("Error: Empty letter image")
            continue

        letter_image = cv2.resize(letter_image, (30, 30))

        # Turn the single image into a 4d list of images
        letter_image = np.expand_dims(letter_image, axis=2)
        letter_image = np.expand_dims(letter_image, axis=0)

        # making prediction
        pred = model.predict(letter_image)

        # Convert the one-hot-encoded prediction back to a normal letter
        letter = lb.inverse_transform(pred)[0]
        predictions.append(letter)

    # Print the captcha's text
    captcha_text = "".join(predictions)
    return captcha_text

In [7]:
predictions = []
for testing_image in testing_images:
    predictions.append(solve_captcha(os.path.join(testing_images_dir, testing_image)))













Error: Empty letter image
Error: Empty letter image






Error: Empty letter image






Error: Empty letter image




## 6. Calculating the Accuracy 

In [8]:
result = pd.DataFrame({"Actual": testing_lables, "Predicted":predictions})
result['Is_Predicted_correctly'] = result.Actual == result.Predicted
result

Unnamed: 0,Actual,Predicted,Is_Predicted_correctly
0,104190,004190,False
1,771904,771904,True
2,103302,103302,True
3,109106,109106,True
4,501678,501678,True
...,...,...,...
350,109393,109393,True
351,106990,106990,True
352,117617,117617,True
353,114820,114820,True


In [9]:
accuracy  = len(result[result["Is_Predicted_correctly"] == True])/len(result)
print('Accuracy of the model is', accuracy * 100, '%')

Accuracy of the model is 89.85915492957747 %


In [10]:
pickle.dump(model,open('captcha_solver_model.pkl', 'wb'))
pickle.dump(lb, open('label_binarizer.pkl', 'wb'))