In [1]:
import os
from tensorflow.keras.preprocessing.image import img_to_array
from PIL import Image
import numpy as np

In [3]:
def load_data(folder_path, image_size=(100, 50)):
    data = []
    labels = []
    image_files = os.listdir(folder_path)

    for img_file in image_files:
        img_path = os.path.join(folder_path, img_file)
        label = os.path.basename(img_file).split('.')[0]
        img = Image.open(img_path) 
        img = img.resize(image_size)
        img = img_to_array(img) / 255.0  
        data.append(img)
        labels.append(label)

    return np.array(data), labels

In [2]:
train_dir = './captcha dataset/train'  
test_dir = './captcha dataset/test'

In [4]:
X_train, y_train = load_data(train_dir)
X_test, y_test = load_data(test_dir)

In [5]:
print(f"Training data shape: {X_train.shape}")
print(f"Test data shape: {X_test.shape}")

Training data shape: (8501, 50, 100, 3)
Test data shape: (1500, 50, 100, 3)


In [9]:
from sklearn.preprocessing import LabelBinarizer
import numpy as np

In [77]:
def preprocess_labels(labels, max_len=6):
    lb = LabelBinarizer()
    lb.fit(list('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ ')) 

    label_data = []
    for label in labels:
        if len(label) > max_len:
            padded_label = label[:max_len] 
        else:
            padded_label = label.ljust(max_len) 
        
        encoded_label = np.array([lb.transform([char])[0] for char in padded_label])
        label_data.append(encoded_label)

    return np.stack(label_data)


In [78]:
y_train_encoded = preprocess_labels(y_train)
y_test_encoded = preprocess_labels(y_test)

In [81]:
print(f"One-hot encoded training label shape: {y_train_encoded.shape}")
print(f"One-hot encoded test label shape: {y_test_encoded.shape}")

One-hot encoded training label shape: (8501, 6, 63)
One-hot encoded test label shape: (1500, 6, 63)


In [82]:
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

In [83]:
print(f"Training data shape: {X_train.shape}")
print(f"Test data shape: {X_test.shape}")

Training data shape: (8501, 50, 100, 3)
Test data shape: (1500, 50, 100, 3)


In [84]:
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Reshape, TimeDistributed
from keras.models import Model

In [85]:
characters = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ '
num_classes = len(characters)  

lb = LabelBinarizer()
lb.fit(list(characters)) 
num_classes

63

In [86]:
def build_captcha_model(num_classes, max_length):
    input_layer = Input(shape=(50, 100, 3))
    x = Conv2D(32, (3, 3), padding='same', activation='relu')(input_layer)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    
    x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    
    output_layer = Dense(num_classes * max_length, activation='softmax')(x)

    output_layer = Reshape((max_length, num_classes))(output_layer)
    
    model = Model(inputs=input_layer, outputs=output_layer)
    return model

In [89]:
# Parameters
num_classes = 63  
max_length = 6    
batch_size = 32
epochs = 20

model = build_captcha_model(num_classes, max_length)

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()


In [90]:
history = model.fit(
    X_train,            
     y_train_encoded,    
     validation_data=(X_test, y_test_encoded),  
    batch_size=batch_size,
    epochs=epochs,
    verbose=1
)


Epoch 1/20
[1m266/266[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 116ms/step - accuracy: 0.0737 - loss: 2.5150 - val_accuracy: 0.0519 - val_loss: 2.3140
Epoch 2/20
[1m266/266[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 160ms/step - accuracy: 0.0803 - loss: 2.1040 - val_accuracy: 0.0520 - val_loss: 2.3200
Epoch 3/20
[1m266/266[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 169ms/step - accuracy: 0.0785 - loss: 2.0745 - val_accuracy: 0.0520 - val_loss: 2.3036
Epoch 4/20
[1m266/266[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 148ms/step - accuracy: 0.0781 - loss: 2.0655 - val_accuracy: 0.0519 - val_loss: 2.2808
Epoch 5/20
[1m266/266[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 139ms/step - accuracy: 0.0811 - loss: 2.0634 - val_accuracy: 0.0521 - val_loss: 2.2890
Epoch 6/20
[1m266/266[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 143ms/step - accuracy: 0.0787 - loss: 2.0598 - val_accuracy: 0.0521 - val_loss: 2.3328
Epoch 7/20

In [91]:
test_loss, test_accuracy = model.evaluate(X_test, y_test_encoded)
print(f'Test loss: {test_loss}, Test accuracy: {test_accuracy}')


[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 0.0518 - loss: 2.2173
Test loss: 2.307643175125122, Test accuracy: 0.05233333259820938
