# Setup

In [35]:
!pip install imutils -q

## Import Libraries

In [36]:
import cv2
import imutils
import glob
import matplotlib.pyplot as plt
import numpy as np
import zipfile
import os
import pickle
import tensorflow as tf
import tensorflow.keras.layers as layers

from imutils import paths
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split

# Data Loading

[dataset_link](https://drive.google.com/file/d/1VyaFnIwxNPGkJirxbVhLsTLMVV-33Kb_/view?usp=sharing)

In [37]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [38]:
ZIP_FILE = "/content/drive/MyDrive/Dataset/captcha_images.zip"
zip_read = zipfile.ZipFile(ZIP_FILE, 'r')
zip_read.extractall('/content')
zip_read.close()

In [39]:
IMAGE_FOLDER = '/content/generated_captcha_images'
OUTPUT_FOLDER = "extracted_letter_images"

# Split the captcha into individual letters

## Get all the captcha images

In [40]:
captcha_image_files = glob.glob(os.path.join(IMAGE_FOLDER, "*"))
counts = {}

## Loop over the images

In [41]:
for (i, captcha_image_file) in enumerate(captcha_image_files):
  print("[INFO] processing image {}/{}".format(i + 1, len(captcha_image_files)))

  # Grab the label from the file name
  filename = os.path.basename(captcha_image_file)
  captcha_correct_label = os.path.splitext(filename)[0]

  # Convert the image to grayscale
  image = cv2.imread(captcha_image_file)
  gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

  # Add padding to image
  gray_image = cv2.copyMakeBorder(gray_image, 8, 8, 8, 8, cv2.BORDER_REPLICATE)

  # Threshold the image to convert it to pure black and white
  thresh = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]

  # Find the contours of the image
  contours = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

  contours = contours[1] if imutils.is_cv3() else contours[0]

  letter_image_regions = []

  # Loop through each of the six contours and extract letter
  for contour in contours:
    # Get the rectangle that contains the contour
    (x, y, w, h) = cv2.boundingRect(contour)

    # Compare the width and height of the contour to detect if a letter is conjoined
    if w / h > 1.25:
      # Conjoined letter
      # Split into half
      half_width = int(w / 2)
      letter_image_regions.append((x, y, half_width, h))
      letter_image_regions.append((x + half_width, y, half_width, h))
    else:
      # Normal letter
      letter_image_regions.append((x, y, w, h))

  # If no 6 letters are found then skip the image
  if len(letter_image_regions) != 4:
    continue

  # Sort the image
  letter_image_regions = sorted(letter_image_regions, key=lambda x: x[0])

  # Save each letter as single image
  for letter_bounding_box, letter_text in zip(letter_image_regions, captcha_correct_label):
    x, y, w, h = letter_bounding_box

    # Extract the letter from the original image with a 2 pixel margin
    letter_image = gray_image[y-2:y + h + 2, x-2: x + w + 2]
    save_path = os.path.join(OUTPUT_FOLDER, letter_text)

    # If the output directory does not exist then create it
    if not os.path.exists(save_path):
      os.makedirs(save_path)

    # Write the letter image to a file
    count = counts.get(letter_text, 1)
    p = os.path.join(save_path, "{}.png".format(str(count).zfill(6)))
    cv2.imwrite(p, letter_image)

    # Increment the count for the current key
    counts[letter_text] = count + 1

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[INFO] processing image 4956/9955
[INFO] processing image 4957/9955
[INFO] processing image 4958/9955
[INFO] processing image 4959/9955
[INFO] processing image 4960/9955
[INFO] processing image 4961/9955
[INFO] processing image 4962/9955
[INFO] processing image 4963/9955
[INFO] processing image 4964/9955
[INFO] processing image 4965/9955
[INFO] processing image 4966/9955
[INFO] processing image 4967/9955
[INFO] processing image 4968/9955
[INFO] processing image 4969/9955
[INFO] processing image 4970/9955
[INFO] processing image 4971/9955
[INFO] processing image 4972/9955
[INFO] processing image 4973/9955
[INFO] processing image 4974/9955
[INFO] processing image 4975/9955
[INFO] processing image 4976/9955
[INFO] processing image 4977/9955
[INFO] processing image 4978/9955
[INFO] processing image 4979/9955
[INFO] processing image 4980/9955
[INFO] processing image 4981/9955
[INFO] processing image 4982/9955
[INFO] processing

# Data Preprocessing

In [42]:
LETTER_IMAGES_FOLDER = '/content/extracted_letter_images'
MODEL_FILENAME = 'captcha_model.keras'
MODEL_LABELS_FILENAME = 'model_labels.dat'

## Initialize the data and labels

In [43]:
data = []
labels = []

## Loop over the input images

In [44]:
def resize_to_fit(image, width, height):
  # Grab the dimension of the image, then initialize the padding values
  (h,w) = image.shape[:2]

  # If width is greater than the heigh resize along the width
  if w > h:
    image = imutils.resize(image, width=width)

  # If height is greater that width resize along height
  else:
    image = imutils.resize(image, height=height)

  # Determine padding values for the width and height to obtain the target dimensions
  padW = int((width - image.shape[1]) / 2.0)
  padH = int((height - image.shape[0]) / 2.0)

  # Pad the image then apply one more resizing to handle any rounding issues
  image = cv2.copyMakeBorder(image, padH, padH, padW, padW, cv2.BORDER_REPLICATE)
  image = cv2.resize(image, (width, height))

  return image

## Create the dataset

In [45]:
for image_file in paths.list_images(LETTER_IMAGES_FOLDER):
  # Load image and convert to grayscale
  image = cv2.imread(image_file)
  gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

  # Resize the letter to 20x20 pixel
  gray_image = resize_to_fit(gray_image, 20, 20)

  gray_image = np.expand_dims(gray_image, axis=2)

  # Grab the letter name from the folder
  label = image_file.split(os.path.sep)[-2]

  # Append the data
  data.append(gray_image)
  labels.append(label)

In [46]:
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)

## Split into train and test

In [47]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=69)

## Convert the labels into one-hot-encoding

In [48]:
lb = LabelBinarizer().fit(y_train)
y_train = lb.transform(y_train)
y_test = lb.transform(y_test)

## Save the mapping from labels to one-hot-encodings

In [49]:
with open(MODEL_LABELS_FILENAME, 'wb') as f:
  pickle.dump(lb, f)

# Model Building

In [50]:
model = tf.keras.Sequential([
    layers.Conv2D(32, (5,5), padding='same', activation='relu', input_shape=(20,20,1)),
    layers.MaxPooling2D(pool_size=(2,2), strides=(2,2)),
    layers.Conv2D(64, (5,5), padding='same', activation='relu'),
    layers.MaxPooling2D(pool_size=(2,2), strides=(2,2)),
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dense(32, activation='softmax')
])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 20, 20, 32)        832       
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 10, 10, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_3 (Conv2D)           (None, 10, 10, 64)        51264     
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 5, 5, 64)          0         
 g2D)                                                            
                                                                 
 flatten_1 (Flatten)         (None, 1600)              0         
                                                                 
 dense_2 (Dense)             (None, 256)              

In [51]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [52]:
model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=10,
    batch_size=32,
    verbose=2
)

Epoch 1/10
969/969 - 6s - loss: 0.2532 - accuracy: 0.9360 - val_loss: 0.0207 - val_accuracy: 0.9951 - 6s/epoch - 6ms/step
Epoch 2/10
969/969 - 4s - loss: 0.0127 - accuracy: 0.9967 - val_loss: 0.0136 - val_accuracy: 0.9959 - 4s/epoch - 4ms/step
Epoch 3/10
969/969 - 4s - loss: 0.0068 - accuracy: 0.9982 - val_loss: 0.0091 - val_accuracy: 0.9975 - 4s/epoch - 4ms/step
Epoch 4/10
969/969 - 4s - loss: 0.0022 - accuracy: 0.9996 - val_loss: 0.0085 - val_accuracy: 0.9981 - 4s/epoch - 4ms/step
Epoch 5/10
969/969 - 5s - loss: 0.0052 - accuracy: 0.9983 - val_loss: 0.0118 - val_accuracy: 0.9970 - 5s/epoch - 5ms/step
Epoch 6/10
969/969 - 4s - loss: 0.0057 - accuracy: 0.9983 - val_loss: 0.0158 - val_accuracy: 0.9972 - 4s/epoch - 4ms/step
Epoch 7/10
969/969 - 4s - loss: 0.0028 - accuracy: 0.9990 - val_loss: 0.0075 - val_accuracy: 0.9978 - 4s/epoch - 4ms/step
Epoch 8/10
969/969 - 4s - loss: 0.0014 - accuracy: 0.9996 - val_loss: 0.0065 - val_accuracy: 0.9982 - 4s/epoch - 4ms/step
Epoch 9/10
969/969 - 5s 

<keras.src.callbacks.History at 0x7a2b481c9ab0>

In [64]:
model.save(MODEL_FILENAME)