In [5]:
# Load the Drive helper and mount
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf

seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)

data_dir = Path("/content/drive/MyDrive/CAPTCHA DATA SET/samples")
images = list(data_dir.glob("*.png"))
print("Number of images found:", len(images))

characters = set()
captcha_length = []

for img_path in images:
    label = img_path.stem
    captcha_length.append(len(label))
    characters.update(label)

characters = sorted(characters)
max_length = max(captcha_length)
char_to_num = {c: i for i, c in enumerate(characters)}
num_to_char = {i: c for i, c in enumerate(characters)}
num_chars = len(characters)

print("Unique characters:", characters)
print("Max captcha length:", max_length)

data = [(str(p), p.stem) for p in images]
dataset = pd.DataFrame(data, columns=["img_path", "label"])
train_df, valid_df = train_test_split(dataset, test_size=0.1, random_state=seed)

def preprocess_image(img_path, img_width=200, img_height=50):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (img_width, img_height))
    img = img.astype(np.float32) / 255.0
    return img

img_width = 200
img_height = 50
batch_size = 16
max_length = 5

def encode_label(label):
    return [char_to_num[c] for c in label]

def decode_label(nums):
    return ''.join([num_to_char[n] for n in nums if n != -1])

def prepare_data(df):
    images = np.array([preprocess_image(path) for path in df['img_path']])
    images = np.expand_dims(images, axis=-1)
    labels = [encode_label(label) for label in df['label']]
    labels = keras.preprocessing.sequence.pad_sequences(labels, maxlen=max_length, padding='post', value=-1)
    return images, labels

train_images, train_labels = prepare_data(train_df)
valid_images, valid_labels = prepare_data(valid_df)

def conv_block(x, filters):
    x = layers.Conv2D(filters, 3, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(filters, 3, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)
    return x

def build_cnn_model():
    input_img = layers.Input(shape=(img_height, img_width, 1), name='image')
    x = conv_block(input_img, 64)
    x = conv_block(x, 128)
    x = conv_block(x, 256)
    x = layers.Conv2D(512, 3, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)

    shape = x.shape
    x = layers.Reshape((shape[2], shape[1] * shape[3]))(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.25)(x)
    x = layers.Dense(num_chars + 1, activation='softmax')(x)

    return keras.Model(inputs=input_img, outputs=x)


def ctc_loss(y_true, y_pred):
    batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
    input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
    label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")

    input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
    label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")

    return tf.keras.backend.ctc_batch_cost(y_true, y_pred, input_length, label_length)

model = build_cnn_model()
model.compile(optimizer='adam', loss=ctc_loss)
model.summary()

class DecodeCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        preds = model.predict(valid_images[:5])
        pred_texts = []

        for pred in preds:
            decoded = tf.keras.backend.ctc_decode(
                pred[np.newaxis, ...],
                input_length=np.ones(1) * pred.shape[0],
                greedy=True
            )[0][0].numpy()

            pred_text = decode_label(decoded[0])
            pred_texts.append(pred_text)

        print(f"\nPredictions: {pred_texts}")
        print(f"Actuals: {valid_df['label'].values[:5].tolist()}")

model.fit(
    train_images, train_labels,
    validation_data=(valid_images, valid_labels),
    epochs=40,
    batch_size=batch_size,
    callbacks=[
        DecodeCallback(),
        tf.keras.callbacks.EarlyStopping(patience=15, restore_best_weights=True)
    ]
)

def predict_captcha(image_path):
    img = preprocess_image(image_path)
    img = np.expand_dims(img, axis=0)
    img = np.expand_dims(img, axis=-1)
    pred = model.predict(img)

    decoded = tf.keras.backend.ctc_decode(
        pred,
        input_length=np.ones(1) * pred.shape[1],
        greedy=True
    )[0][0].numpy()

    return decode_label(decoded[0])

# Test one sample
sample_image = images[66]
print("Prediction:", predict_captcha(str(sample_image)))
print("Actual:", sample_image.stem)


Number of images found: 1040
Unique characters: ['2', '3', '4', '5', '6', '7', '8', 'b', 'c', 'd', 'e', 'f', 'g', 'm', 'n', 'p', 'w', 'x', 'y']
Max captcha length: 5


Epoch 1/40
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 926ms/step

Predictions: ['', '', '', '', '']
Actuals: ['6b46g', 'mxyxw', '6bxwg', '3n3cf', 'nbwnn']
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m293s[0m 5s/step - loss: 27.2608 - val_loss: 68.3520
Epoch 2/40
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 339ms/step

Predictions: ['', '', '', '', '']
Actuals: ['6b46g', 'mxyxw', '6bxwg', '3n3cf', 'nbwnn']
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m319s[0m 5s/step - loss: 15.6980 - val_loss: 69.7219
Epoch 3/40
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 601ms/step

Predictions: ['', '', '', '', '']
Actuals: ['6b46g', 'mxyxw', '6bxwg', '3n3cf', 'nbwnn']
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m287s[0m 5s/step - loss: 15.4342 - val_loss: 69.7594
Epoch 4/40
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 351ms/step

Predictions: ['', '', '', '', '']
Actuals: ['6b46g', 'mxyxw', '

In [7]:
import joblib
import json

# 1. Save the model in the new Keras format
model.save("captcha_cnn_model.keras")

# 2. Save label encoder mappings
encoder_data = {
    "char_to_num": char_to_num,
    "num_to_char": num_to_char
}
joblib.dump(encoder_data, "captcha_label_encoder.pkl")

# 3. Save config info as JSON
config = {
    "img_width": img_width,
    "img_height": img_height,
    "max_length": max_length,
    "num_chars": num_chars,
    "characters": characters
}

with open("captcha_model_config.json", "w") as f:
    json.dump(config, f, indent=4)

print("Model, encoder, and config files saved successfully.")


Model, encoder, and config files saved successfully.


In [8]:
# # Save model in .keras format (recommended for new versions)
# model.save("captcha_model.keras", save_format="keras")


In [9]:
# import pickle

# with open("char_mappings.pkl", "wb") as f:
#     pickle.dump({
#         "char_to_num": char_to_num,
#         "num_to_char": num_to_char,
#         "max_length": max_length,
#         "img_width": img_width,
#         "img_height": img_height
#     }, f)


In [10]:
# def prediction_vs_actual(model, df, num_samples=15):
#     samples = df.sample(num_samples)
#     preds = []

#     for row in samples.itertuples():
#         img = preprocess_image(row.img_path)
#         img_input = np.expand_dims(img, axis=(0, -1))
#         pred = model.predict(img_input, verbose=0)
#         decoded = tf.keras.backend.ctc_decode(pred, input_length=[pred.shape[1]], greedy=True)[0][0].numpy()[0]
#         pred_text = decode_label(decoded)
#         preds.append((pred_text, row.label))

#     results = pd.DataFrame(preds, columns=["Predicted", "Actual"])
#     print(results)


In [11]:
# from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# def show_string_confusion_matrix(model, images, labels):
#     y_true = []
#     y_pred = []

#     preds = model.predict(images, verbose=0)
#     decoded_preds = tf.keras.backend.ctc_decode(
#         preds,
#         input_length=np.ones(preds.shape[0]) * preds.shape[1],
#         greedy=True
#     )[0][0].numpy()

#     for pred_seq, true_seq in zip(decoded_preds, labels):
#         y_pred.append(decode_label(pred_seq))
#         y_true.append(decode_label(true_seq))

#     # Confusion matrix for full predicted vs actual CAPTCHA strings
#     cm_labels = sorted(set(y_true + y_pred))
#     cm = confusion_matrix(y_true, y_pred, labels=cm_labels)
#     disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=cm_labels)

#     fig, ax = plt.subplots(figsize=(10, 10))
#     disp.plot(ax=ax, cmap="Blues", xticks_rotation=45)
#     plt.title("Confusion Matrix: Full CAPTCHA Strings")
#     plt.show()


In [12]:
prediction_vs_actual(model, valid_df)
show_string_confusion_matrix(model, valid_images, valid_labels)


NameError: name 'prediction_vs_actual' is not defined

PREDICTOR :


In [None]:
# import numpy as np
# import tensorflow as tf
# import pickle
# import cv2
# from tensorflow.keras.models import load_model
# from pathlib import Path

# # Load the model
# model = load_model("captcha_model.keras")

# # Load mappings
# with open("char_mappings.pkl", "rb") as f:
#     mappings = pickle.load(f)

# char_to_num = mappings["char_to_num"]
# num_to_char = mappings["num_to_char"]
# max_length = mappings["max_length"]
# img_width = mappings["img_width"]
# img_height = mappings["img_height"]

# # Decoding function
# def decode_label(encoded_seq):
#     return ''.join([num_to_char[i] for i in encoded_seq if i != -1])

# # Preprocess input image
# def preprocess_image(img_path):
#     img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
#     img = cv2.resize(img, (img_width, img_height))
#     img = img.astype(np.float32) / 255.0
#     img = np.expand_dims(img, axis=(0, -1))  # (1, H, W, 1)
#     return img

# # Prediction function
# def predict_captcha(img_path):
#     img = preprocess_image(img_path)
#     pred = model.predict(img, verbose=0)
#     decoded = tf.keras.backend.ctc_decode(pred, input_length=[pred.shape[1]], greedy=True)[0][0].numpy()[0]
#     return decode_label(decoded)

# # Example usage
# if __name__ == "__main__":
#     image_path = "sample_captcha.png"  # Replace with your image path
#     result = predict_captcha(image_path)
#     print("Predicted CAPTCHA:", result)