# Setup

In [1]:
!pip install -q opendatasets

## Import libraries

In [2]:
import pandas as pd
import opendatasets as od
import numpy as np
import os
import tensorflow as tf
from PIL import Image
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten, Reshape, Input, BatchNormalization
from tensorflow.keras.models import Model

# Data Loading

In [3]:
od.download("https://www.kaggle.com/datasets/johnbergmann/captcha-image-dataset")

Skipping, found downloaded files in "./captcha-image-dataset" (use force=True to force download)


In [4]:
DATA_URL = "/content/captcha-image-dataset/captchas"
train_dir = os.path.join(DATA_URL, "train")
test_dir = os.path.join(DATA_URL, "test")

# Data Preparation

In [5]:
BATCH_SIZE = 32
IMG_SIZE = (250,50)
N_LABELS = 36
D = 6 # number per image

In [6]:
files_train = []
labels_train = []

for filename in os.listdir(train_dir):
  files_train.append(os.path.join(train_dir, filename))
  labels_train.append(os.path.splitext(filename.split('_')[1])[0].lower())

files_val = []
labels_val = []

for filename in os.listdir(test_dir):
  files_val.append(os.path.join(test_dir, filename))
  labels_val.append(os.path.splitext(filename.split('_')[1])[0].lower())

In [7]:
df_train = pd.DataFrame({'file': files_train, 'label': labels_train})
df_val = pd.DataFrame({'file': files_val, 'label': labels_val})

In [8]:
df_train.head()

Unnamed: 0,file,label
0,/content/captcha-image-dataset/captchas/train/...,d6pnmz
1,/content/captcha-image-dataset/captchas/train/...,nehsvv
2,/content/captcha-image-dataset/captchas/train/...,fz6bn6
3,/content/captcha-image-dataset/captchas/train/...,6nfgbn
4,/content/captcha-image-dataset/captchas/train/...,nu2s2c


## Label Encoding

In [9]:
charset = "0123456789abcdefghijklmnopqrstuvwxyz"
chars_to_index = {char: i for i, char in enumerate(charset)}

In [10]:
def labels_to_indices(label):
  result = [chars_to_index[char] for char in label]
  return result

In [11]:
label_1 = []
label_2 = []
label_3 = []
label_4 = []
label_5 = []
label_6 = []

for label in df_train['label']:
  l1, l2, l3, l4, l5, l6 = labels_to_indices(label)
  label_1.append(l1)
  label_2.append(l2)
  label_3.append(l3)
  label_4.append(l4)
  label_5.append(l5)
  label_6.append(l6)

df_train['label_1'] = label_1
df_train['label_2'] = label_2
df_train['label_3'] = label_3
df_train['label_4'] = label_4
df_train['label_5'] = label_5
df_train['label_6'] = label_6

In [12]:
label_1 = []
label_2 = []
label_3 = []
label_4 = []
label_5 = []
label_6 = []

for label in df_val['label']:
  l1, l2, l3, l4, l5, l6 = labels_to_indices(label)
  label_1.append(l1)
  label_2.append(l2)
  label_3.append(l3)
  label_4.append(l4)
  label_5.append(l5)
  label_6.append(l6)

df_val['label_1'] = label_1
df_val['label_2'] = label_2
df_val['label_3'] = label_3
df_val['label_4'] = label_4
df_val['label_5'] = label_5
df_val['label_6'] = label_6

In [13]:
df_train.drop(columns='label', inplace=True)
df_val.drop(columns='label', inplace=True)

In [14]:
df_train.head()

Unnamed: 0,file,label_1,label_2,label_3,label_4,label_5,label_6
0,/content/captcha-image-dataset/captchas/train/...,13,6,25,23,22,35
1,/content/captcha-image-dataset/captchas/train/...,23,14,17,28,31,31
2,/content/captcha-image-dataset/captchas/train/...,15,35,6,11,23,6
3,/content/captcha-image-dataset/captchas/train/...,6,23,15,16,11,23
4,/content/captcha-image-dataset/captchas/train/...,23,30,2,28,2,12


## Image processing

In [15]:
X_train = []
X_val = []

for filepath in df_train['file']:
  img = Image.open(filepath)
  img = np.array(img)
  X_train.append(img)

X_train = np.array(X_train)/255

for filepath in df_val['file']:
  img = Image.open(filepath)
  img = np.array(img)
  X_val.append(img)

X_val = np.array(X_val)/255

## Create tf dataset

In [16]:
train_labels = tf.stack([
    df_train['label_1'], df_train['label_2'], df_train['label_3'],
    df_train['label_4'], df_train['label_5'], df_train['label_6']
], axis=1)

val_labels = tf.stack([
    df_val['label_1'], df_val['label_2'], df_val['label_3'],
    df_val['label_4'], df_val['label_5'], df_val['label_6']
], axis=1)

train_dataset = tf.data.Dataset.from_tensor_slices((
  X_train, train_labels
)).shuffle(buffer_size=1000).batch(BATCH_SIZE)

val_dataset = tf.data.Dataset.from_tensor_slices((
  X_val, val_labels
)).shuffle(buffer_size=1000).batch(BATCH_SIZE)

In [None]:
# Prepare the labels as a tuple of tensors
train_labels = (
    df_train['label_1'].values,
    df_train['label_2'].values,
    df_train['label_3'].values,
    df_train['label_4'].values,
    df_train['label_5'].values,
    df_train['label_6'].values
)

val_labels = (
    df_val['label_1'].values,
    df_val['label_2'].values,
    df_val['label_3'].values,
    df_val['label_4'].values,
    df_val['label_5'].values,
    df_val['label_6'].values
)

# Convert labels to tuple of tensors
train_labels = tuple([tf.convert_to_tensor(label) for label in train_labels])
val_labels = tuple([tf.convert_to_tensor(label) for label in val_labels])

# Create the tf.data.Dataset
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, train_labels)).shuffle(buffer_size=1000).batch(BATCH_SIZE)
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, val_labels)).shuffle(buffer_size=1000).batch(BATCH_SIZE)


In [17]:
for features, labels in train_dataset.take(1):
    print("Batch shape:", features.shape)
    print("First image shape:", features[0].shape)
    print("First labels:", labels[0].numpy())

Batch shape: (32, 50, 250, 3)
First image shape: (50, 250, 3)
First labels: [14 23 15 27 35 11]


# Modelling

In [18]:
input_layer = Input(shape=(50,250,3))
x = Conv2D(64, (3,3), activation="relu")(input_layer)
x = MaxPooling2D(2,2)(x)
x = Conv2D(32, (3,3), activation="relu")(x)
x = MaxPooling2D(2,2)(x)
x = Flatten(name="flatten")(x)
x = Dense(32, activation='relu')(x)

output_layers = []
for _ in range(6):
  output_layers.append(Dense(N_LABELS, activation="softmax")(x))

model = Model(inputs=input_layer, outputs=output_layers)

In [19]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 50, 250, 3)]         0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 48, 248, 64)          1792      ['input_1[0][0]']             
                                                                                                  
 max_pooling2d (MaxPooling2  (None, 24, 124, 64)          0         ['conv2d[0][0]']              
 D)                                                                                               
                                                                                                  
 conv2d_1 (Conv2D)           (None, 22, 122, 32)          18464     ['max_pooling2d[0][0]']   

In [44]:
model = tf.keras.models.Sequential([
    # input layer
    Conv2D(32, (3,3), activation="relu", input_shape=(50,250,3)),
    BatchNormalization(),
    MaxPooling2D(2,2),
    Dropout(0.25),
    Conv2D(64, (3,3), activation="relu"),
    BatchNormalization(),
    MaxPooling2D(2,2),
    Dropout(0.25),
    Conv2D(128, (3,3), activation="relu"),
    BatchNormalization(),
    MaxPooling2D(2,2),
    Dropout(0.25),
    Flatten(),
    # output layer
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(D * N_LABELS, activation="softmax"),
    Reshape((D, N_LABELS))
])

In [None]:
model.summary()

In [20]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

In [21]:
history = model.fit(
    train_dataset,
    epochs=10,
    validation_data=(val_dataset),
)

Epoch 1/10


InvalidArgumentError: Graph execution error:

Detected at node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits defined at (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code

  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start

  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start

  File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 685, in <lambda>

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 738, in _run_callback

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 825, in inner

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 361, in process_one

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 539, in execute_request

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py", line 302, in do_execute

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/zmqshell.py", line 539, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "<ipython-input-21-0a2a70650efa>", line 1, in <cell line: 1>

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1807, in fit

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1401, in train_function

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1384, in step_function

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1373, in run_step

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1151, in train_step

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1209, in compute_loss

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/compile_utils.py", line 277, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/losses.py", line 143, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/losses.py", line 270, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/losses.py", line 2454, in sparse_categorical_crossentropy

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend.py", line 5775, in sparse_categorical_crossentropy

logits and labels must have the same first dimension, got logits shape [32,36] and labels shape [192]
	 [[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_train_function_3047]

In [None]:
from google.colab import files
uploaded = files.upload()

Saving 8514_bszavb.jpeg to 8514_bszavb.jpeg
Saving 8522_nprxdm.jpeg to 8522_nprxdm.jpeg
Saving 8549_cusan2.jpeg to 8549_cusan2.jpeg


In [None]:
X_predict = []

for i, fn in enumerate(uploaded.keys()):
  img = Image.open(fn)
  img = np.array(img) /255.0
  X_predict.append(img)

X_predict = np.array(X_predict)

predictions = model.predict(X_predict)

predicted_indices = np.argmax(predictions, axis=2)
predicted_indices



array([[14, 23,  8, 15,  7, 16],
       [14, 23,  8, 15,  7, 16],
       [14, 23,  8, 15,  7, 16]])