# Setup

In [1]:
!pip install -q opendatasets

## Import libraries

In [2]:
import pandas as pd
import opendatasets as od
import numpy as np
import os
import tensorflow as tf
from PIL import Image
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten, Input

# Data Loading

In [3]:
od.download("https://www.kaggle.com/datasets/johnbergmann/captcha-image-dataset")

Dataset URL: https://www.kaggle.com/datasets/johnbergmann/captcha-image-dataset
Downloading captcha-image-dataset.zip to ./captcha-image-dataset


100%|██████████| 39.5M/39.5M [00:00<00:00, 62.9MB/s]





In [4]:
DATA_URL = "/content/captcha-image-dataset/captchas"
train_dir = os.path.join(DATA_URL, "train")
test_dir = os.path.join(DATA_URL, "test")

# Data Preparation

In [8]:
BATCH_SIZE = 64
IMG_SIZE = (250,50)
N_LABELS = 36
D = 6 # number per image

## Extract image data

In [6]:
X_train = []
y_train = []
X_test = []
y_test = []

for filename in os.listdir(train_dir):
  image_path = os.path.join(train_dir, filename)
  image = Image.open(image_path)
  image = np.array(image)
  X_train.append(image)

  label = os.path.splitext(filename.split('_')[1])[0].lower()
  y_train.append(label)

for filename in os.listdir(test_dir):
  image_path = os.path.join(test_dir, filename)
  image = Image.open(image_path)
  image = np.array(image)
  X_test.append(image)

  label = os.path.splitext(filename.split('_')[1])[0].lower()
  y_test.append(label)

X_train = np.array(X_train)/255
X_test = np.array(X_test)/255

In [None]:
X_train_reshaped = np.array([np.transpose(img, (2,0,1)) for img in X_train])
X_test_reshaped = np.array([np.transpose(img, (2,0,1)) for img in X_test])

In [None]:
print(X_train_reshaped.shape)

(8501, 3, 50, 250)


## Label encoding

In [None]:
print(y_train[0], y_test[0])

tnkxfn hgxc6s


In [None]:
charset = "0123456789abcdefghijklmnopqrstuvwxyz"
chars_to_index = {char: i for i, char in enumerate(charset)}
num_classes = len(charset)

def labels_to_indices(label):
  result = [chars_to_index[char] for char in label]
  return result

In [None]:
print(labels_to_indices("dxftup"))

[13, 33, 15, 29, 30, 25]


In [None]:
y_train_encoded = np.array([labels_to_indices(label) for label in y_train])
y_test_encoded = np.array([labels_to_indices(label) for label in y_test])

In [None]:
print(y_train_encoded[0])

[29 23 20 33 15 23]


np.transpose change 1D array to 2D array, example
```[0, 1, 2, 3, 4, 5]```
would be ```[[0],
 [1],
 [2],
 [3],
 [4],
 [5]]``` in 2D

In [None]:
print(np.transpose(y_train_encoded))

[[29 25 33 ... 13 27 31]
 [23 16 23 ...  6 14 31]
 [20 10 23 ... 17 10 20]
 [33 31 15 ... 17 13 30]
 [15 11 35 ...  2 13 23]
 [23 14 16 ...  6 22 25]]


In [None]:
y_train_1, y_train_2, y_train_3, y_train_4, y_train_5, y_train_6 = np.transpose(y_train_encoded)
y_test_1, y_test_2, y_test_3, y_test_4, y_test_5, y_test_6 = np.transpose(y_test_encoded)

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((X_train_reshaped, (y_train_1, y_train_2, y_train_3, y_train_4, y_train_5, y_train_6)))
train_dataset = train_dataset.batch(BATCH_SIZE).shuffle(buffer_size=1000)

test_dataset = tf.data.Dataset.from_tensor_slices((X_test_reshaped, (y_test_1, y_test_2, y_test_3, y_test_4, y_test_5, y_test_6)))
test_dataset = test_dataset.batch(BATCH_SIZE).shuffle(buffer_size=1000)

In [None]:
for data, labels in train_dataset:
    # Access the data tensor
    print("Data tensor shape:", data.shape)

    # Access the label tensors for each position
    for i, label_tensor in enumerate(labels):
        print(f"Label tensor {i+1} shape:", label_tensor.shape)