# Setup

In [2]:
!pip install -q opendatasets

## Import libraries

In [3]:
import pandas as pd
import opendatasets as od
import numpy as np
import os
import tensorflow as tf
from PIL import Image
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten, Reshape, Input, concatenate
from tensorflow.keras.models import Model

# Data Loading

In [4]:
od.download("https://www.kaggle.com/datasets/johnbergmann/captcha-image-dataset")

Dataset URL: https://www.kaggle.com/datasets/johnbergmann/captcha-image-dataset
Downloading captcha-image-dataset.zip to ./captcha-image-dataset


100%|██████████| 39.5M/39.5M [00:01<00:00, 37.6MB/s]





In [5]:
DATA_URL = "/content/captcha-image-dataset/captchas"
train_dir = os.path.join(DATA_URL, "train")
test_dir = os.path.join(DATA_URL, "test")

# Data Preparation

In [6]:
BATCH_SIZE = 64
IMG_SIZE = (250,50)
N_LABELS = 36
D = 6 # number per image

In [7]:
files_train = []
labels_train = []

for filename in os.listdir(train_dir):
  files_train.append(os.path.join(train_dir, filename))
  labels_train.append(os.path.splitext(filename.split('_')[1])[0].lower())

files_val = []
labels_val = []

for filename in os.listdir(test_dir):
  files_val.append(os.path.join(test_dir, filename))
  labels_val.append(os.path.splitext(filename.split('_')[1])[0].lower())

In [8]:
df_train = pd.DataFrame({'file': files_train, 'label': labels_train})
df_val = pd.DataFrame({'file': files_val, 'label': labels_val})

In [9]:
df_train.head()

Unnamed: 0,file,label
0,/content/captcha-image-dataset/captchas/train/...,khhhpm
1,/content/captcha-image-dataset/captchas/train/...,armacx
2,/content/captcha-image-dataset/captchas/train/...,ghbzsc
3,/content/captcha-image-dataset/captchas/train/...,x26htf
4,/content/captcha-image-dataset/captchas/train/...,pzp2hz


## Label Encoding

In [10]:
charset = "0123456789abcdefghijklmnopqrstuvwxyz"
chars_to_index = {char: i for i, char in enumerate(charset)}

In [11]:
def labels_to_indices(label):
  result = [chars_to_index[char] for char in label]
  return result

In [12]:
label_1 = []
label_2 = []
label_3 = []
label_4 = []
label_5 = []
label_6 = []

for label in df_train['label']:
  l1, l2, l3, l4, l5, l6 = labels_to_indices(label)
  label_1.append(l1)
  label_2.append(l2)
  label_3.append(l3)
  label_4.append(l4)
  label_5.append(l5)
  label_6.append(l6)

df_train['label_1'] = label_1
df_train['label_2'] = label_2
df_train['label_3'] = label_3
df_train['label_4'] = label_4
df_train['label_5'] = label_5
df_train['label_6'] = label_6

In [13]:
label_1 = []
label_2 = []
label_3 = []
label_4 = []
label_5 = []
label_6 = []

for label in df_val['label']:
  l1, l2, l3, l4, l5, l6 = labels_to_indices(label)
  label_1.append(l1)
  label_2.append(l2)
  label_3.append(l3)
  label_4.append(l4)
  label_5.append(l5)
  label_6.append(l6)

df_val['label_1'] = label_1
df_val['label_2'] = label_2
df_val['label_3'] = label_3
df_val['label_4'] = label_4
df_val['label_5'] = label_5
df_val['label_6'] = label_6

In [14]:
df_train.drop(columns='label', inplace=True)
df_val.drop(columns='label', inplace=True)

In [15]:
df_train.head()

Unnamed: 0,file,label_1,label_2,label_3,label_4,label_5,label_6
0,/content/captcha-image-dataset/captchas/train/...,20,17,17,17,25,22
1,/content/captcha-image-dataset/captchas/train/...,10,27,22,10,12,33
2,/content/captcha-image-dataset/captchas/train/...,16,17,11,35,28,12
3,/content/captcha-image-dataset/captchas/train/...,33,2,6,17,29,15
4,/content/captcha-image-dataset/captchas/train/...,25,35,25,2,17,35


## Image processing

In [16]:
X_train = []
X_val = []

for filepath in df_train['file']:
  img = Image.open(filepath)
  img = np.array(img)
  X_train.append(img)

X_train = np.array(X_train)

for filepath in df_val['file']:
  img = Image.open(filepath)
  img = np.array(img)
  X_val.append(img)

X_val = np.array(X_val)

## Create tf dataset

In [40]:
train_labels = tf.stack([
    df_train['label_1'], df_train['label_2'], df_train['label_3'],
    df_train['label_4'], df_train['label_5'], df_train['label_6']
], axis=1)

val_labels = tf.stack([
    df_val['label_1'], df_val['label_2'], df_val['label_3'],
    df_val['label_4'], df_val['label_5'], df_val['label_6']
], axis=1)

train_dataset = tf.data.Dataset.from_tensor_slices((
  X_train, train_labels
)).shuffle(buffer_size=1000).batch(BATCH_SIZE)

val_dataset = tf.data.Dataset.from_tensor_slices((
  X_val, val_labels
)).shuffle(buffer_size=1000).batch(BATCH_SIZE)

In [41]:
for features, labels in train_dataset.take(1):
    print("Batch shape:", features.shape)
    print("First image shape:", features[0].shape)
    print("First labels:", labels[0].numpy())

Batch shape: (64, 50, 250, 3)
First image shape: (50, 250, 3)
First labels: [16 29 25 17 20 15]


# Modelling

In [28]:
input_layer = Input(shape=(50,250,3))
x = Conv2D(64, (3,3), activation="relu")(input_layer)
x = MaxPooling2D(2,2)(x)
x = Conv2D(32, (3,3), activation="relu")(x)
x = MaxPooling2D(2,2)(x)
x = Flatten(name="flatten")(x)
x = Dense(32, activation='relu')(x)

output_layers = []
for _ in range(6):
  output_layers.append(Dense(N_LABELS, activation="softmax")(x))

model = Model(inputs=input_layer, outputs=output_layers)

In [None]:
model.summary()

In [44]:
model = tf.keras.models.Sequential([
    # input layer
    Conv2D(32, (3,3), activation="relu", input_shape=(50,250,3)),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation="relu"),
    MaxPooling2D(2,2),
    Conv2D(32, (3,3), activation="relu"),
    MaxPooling2D(2,2),
    Flatten(),
    # output layer
    Dense(64, activation='relu'),
    Dense(D * N_LABELS, activation="softmax"),
    Reshape((D, N_LABELS))
])

In [45]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_12 (Conv2D)          (None, 48, 248, 32)       896       
                                                                 
 max_pooling2d_12 (MaxPooli  (None, 24, 124, 32)       0         
 ng2D)                                                           
                                                                 
 conv2d_13 (Conv2D)          (None, 22, 122, 64)       18496     
                                                                 
 max_pooling2d_13 (MaxPooli  (None, 11, 61, 64)        0         
 ng2D)                                                           
                                                                 
 conv2d_14 (Conv2D)          (None, 9, 59, 32)         18464     
                                                                 
 max_pooling2d_14 (MaxPooli  (None, 4, 29, 32)        

In [46]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

In [49]:
history = model.fit(
    train_dataset,
    epochs=3,
    validation_data=(val_dataset),
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [50]:
from google.colab import files
uploaded = files.upload()

Saving 8514_bszavb.jpeg to 8514_bszavb.jpeg
Saving 8522_nprxdm.jpeg to 8522_nprxdm.jpeg
Saving 8549_cusan2.jpeg to 8549_cusan2.jpeg


In [53]:
X_predict = []

for i, fn in enumerate(uploaded.keys()):
  img = Image.open(fn)
  img = np.array(img) /255.0
  X_predict.append(img)

X_predict = np.array(X_predict)

predictions = model.predict(X_predict)

predicted_indices = np.argmax(predictions, axis=2)
predicted_indices



array([[14, 23,  8, 15,  7, 16],
       [14, 23,  8, 15,  7, 16],
       [14, 23,  8, 15,  7, 16]])