<a href="https://colab.research.google.com/github/Ajayrawati/Captcha_Detection/blob/main/captcha.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

import os
from datetime import datetime

from mltu.configs import BaseModelConfigs


class ModelConfigs(BaseModelConfigs):
    def __init__(self):
        super().__init__()
        self.model_path = os.path.join("Models/02_captcha_to_text", datetime.strftime(datetime.now(), "%Y%m%d%H%M"))
        self.vocab = ""
        self.height = 50
        self.width = 200
        self.max_text_length = 0
        self.batch_size = 64
        self.learning_rate = 1e-3
        self.train_epochs = 20
        self.train_workers = 20

In [None]:
import os
from datetime import datetime

from mltu.configs import BaseModelConfigs


class ModelConfigs(BaseModelConfigs):
    def __init__(self):
        super().__init__()
        self.model_path = os.path.join("Models/02_captcha_to_text", datetime.strftime(datetime.now(), "%Y%m%d%H%M"))
        self.vocab = ""
        self.height = 50
        self.width = 200
        self.max_text_length = 0
        self.batch_size = 64
        self.learning_rate = 1e-3
        self.train_epochs = 1000
        self.train_workers = 20

In [None]:
from keras import layers
from keras.models import Model

from mltu.tensorflow.model_utils import residual_block
from keras import layers

class NormalizeLayer(layers.Layer):
    def call(self, inputs):
        return inputs / 255.0

def train_model(input_dim, output_dim, activation="leaky_relu", dropout=0.2):
    inputs = layers.Input(shape=input_dim, name="input")

    # Replace Lambda layer with a custom NormalizeLayer
    normalized_input = NormalizeLayer()(inputs)

    x1 = residual_block(normalized_input, 16, activation=activation, skip_conv=True, strides=1, dropout=dropout)
    x2 = residual_block(x1, 16, activation=activation, skip_conv=True, strides=2, dropout=dropout)
    x3 = residual_block(x2, 16, activation=activation, skip_conv=False, strides=1, dropout=dropout)
    x4 = residual_block(x3, 32, activation=activation, skip_conv=True, strides=2, dropout=dropout)
    x5 = residual_block(x4, 32, activation=activation, skip_conv=False, strides=1, dropout=dropout)
    x6 = residual_block(x5, 64, activation=activation, skip_conv=True, strides=2, dropout=dropout)
    x7 = residual_block(x6, 32, activation=activation, skip_conv=True, strides=1, dropout=dropout)
    x8 = residual_block(x7, 64, activation=activation, skip_conv=True, strides=2, dropout=dropout)
    x9 = residual_block(x8, 64, activation=activation, skip_conv=False, strides=1, dropout=dropout)

    squeezed = layers.Reshape((x9.shape[-3] * x9.shape[-2], x9.shape[-1]))(x9)
    blstm = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(squeezed)
    blstm = layers.Dropout(dropout)(blstm)
    output = layers.Dense(output_dim + 1, activation="softmax", name="output")(blstm)

    model = Model(inputs=inputs, outputs=output)
    return model


In [None]:
import tensorflow as tf
try: [tf.config.experimental.set_memory_growth(gpu, True) for gpu in tf.config.experimental.list_physical_devices("GPU")]
except: pass

from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard

from mltu.tensorflow.dataProvider import DataProvider
from mltu.tensorflow.losses import CTCloss
from mltu.tensorflow.callbacks import Model2onnx, TrainLogger
from mltu.tensorflow.metrics import CWERMetric

from mltu.preprocessors import ImageReader
from mltu.transformers import ImageResizer, LabelIndexer, LabelPadding
from mltu.augmentors import RandomBrightness, RandomRotate, RandomErodeDilate
from mltu.annotations.images import CVImage



import os
from urllib.request import urlopen
from io import BytesIO
from zipfile import ZipFile


def download_and_unzip(url, extract_to="Datasets"):
    http_response = urlopen(url)
    zipfile = ZipFile(BytesIO(http_response.read()))
    zipfile.extractall(path=extract_to)


if not os.path.exists(os.path.join("Datasets", "captcha_images_v2")):
    download_and_unzip("",
                       extract_to="Datasets")

# Create a list of all the images and labels in the dataset
dataset, vocab, max_len = [], set(), 0
captcha_path = os.path.join("Datasets", "captcha_images_v2")
for file in os.listdir(captcha_path):
    file_path = os.path.join(captcha_path, file)
    label = os.path.splitext(file)[0] # Get the file name without the extension
    dataset.append([file_path, label])
    vocab.update(list(label))
    max_len = max(max_len, len(label))

configs = ModelConfigs()

# Save vocab and maximum text length to configs
configs.vocab = "".join(vocab)
configs.max_text_length = max_len
configs.save()

# Create a data provider for the dataset
data_provider = DataProvider(
    dataset=dataset,
    skip_validation=True,
    batch_size=configs.batch_size,
    data_preprocessors=[ImageReader(CVImage)],
    transformers=[
        ImageResizer(configs.width, configs.height),
        LabelIndexer(configs.vocab),
        LabelPadding(max_word_length=configs.max_text_length, padding_value=len(configs.vocab))
        ],
)
# Split the dataset into training and validation sets
train_data_provider, val_data_provider = data_provider.split(split = 0.9)

# Augment training data with random brightness, rotation and erode/dilate
train_data_provider.augmentors = [RandomBrightness(), RandomRotate(), RandomErodeDilate()]

# Creating TensorFlow model architecture
model = train_model(
    input_dim = (configs.height, configs.width, 3),
    output_dim = len(configs.vocab),
)

# Compile the model and print summary
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=configs.learning_rate),
    loss=CTCloss(),
    metrics=[CWERMetric(padding_token=len(configs.vocab))],
    run_eagerly=False
)

# Define path to save the model
os.makedirs(configs.model_path, exist_ok=True)

# Define callbacks
earlystopper = EarlyStopping(monitor="val_CER", patience=50, verbose=1, mode="min")
checkpoint = ModelCheckpoint(f"{configs.model_path}/model.keras", monitor="val_CER", verbose=1, save_best_only=True, mode="min")
trainLogger = TrainLogger(configs.model_path)
tb_callback = TensorBoard(f"{configs.model_path}/logs", update_freq=1)
reduceLROnPlat = ReduceLROnPlateau(monitor="val_CER", factor=0.9, min_delta=1e-10, patience=20, verbose=1, mode="min")

# Train the model
model.fit(
    train_data_provider,
    validation_data=val_data_provider,
    epochs=configs.train_epochs,
    callbacks=[earlystopper, checkpoint, trainLogger, reduceLROnPlat, tb_callback]
)
model.save('model.keras')
# Save training and validation datasets as csv files
train_data_provider.to_csv(os.path.join(configs.model_path, "train.csv"))
val_data_provider.to_csv(os.path.join(configs.model_path, "val.csv"))



Epoch 1/1000


  self._warn_if_super_not_called()


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 180ms/step - CER: 3.8090 - WER: 1.0000 - loss: 82.7404
Epoch 1: val_CER improved from inf to 2.03038, saving model to Models/02_captcha_to_text/202409291200/model.keras
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 374ms/step - CER: 3.7050 - WER: 1.0000 - loss: 80.6381 - val_CER: 2.0304 - val_WER: 1.0000 - val_loss: 19.9176 - learning_rate: 0.0010
Epoch 2/1000
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 186ms/step - CER: 1.7151 - WER: 1.0000 - loss: 19.2691
Epoch 2: val_CER improved from 2.03038 to 1.51519, saving model to Models/02_captcha_to_text/202409291200/model.keras
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 218ms/step - CER: 1.7043 - WER: 1.0000 - loss: 19.1926 - val_CER: 1.5152 - val_WER: 1.0000 - val_loss: 19.0532 - learning_rate: 0.0010
Epoch 3/1000
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 200ms/step - CER: 1.4215 - WER: 1.0000 - 

In [None]:
import cv2
import typing
import numpy as np
from tensorflow.keras.models import load_model
from mltu.utils.text_utils import ctc_decoder, get_cer

class ImageToWordModel:
    def __init__(self, model_path: str, char_list: typing.Union[str, list]):
        self.model = load_model(model_path)  # Load the Keras model
        self.char_list = char_list
        self.input_shape = self.model.input_shape[1:3]  # Get input shape without batch size

    def predict(self, image: np.ndarray):
        # Resize the image to match the model's expected input shape
        image = cv2.resize(image, self.input_shape[::-1])  # Resize to (width, height)

        # Preprocess the image: Add batch dimension and convert to float32
        image_pred = np.expand_dims(image, axis=0).astype(np.float32)

        # Get predictions from the model
        preds = self.model.predict(image_pred)  # Get predictions

        # Decode predictions to text
        text = ctc_decoder(preds, self.char_list)[0]

        return text

if __name__ == "__main__":
    import pandas as pd
    from tqdm import tqdm
    from mltu.configs import BaseModelConfigs

    # Load configurations for the model
    configs = BaseModelConfigs.load("Models/02_captcha_to_text/202409291006/configs.yaml")

    # Initialize the model
    model = ImageToWordModel(model_path="/content/model.keras", char_list=configs.vocab)

    # Load validation data
    df = pd.read_csv("/content/Models/02_captcha_to_text/202409291006/val.csv").values.tolist()

    # Initialize a list to store Character Error Rates (CER)
    accum_cer = []

    # Iterate over the validation dataset
    for image_path, label in tqdm(df):
        image = cv2.imread(image_path.replace("\\", "/"))  # Load the image

        # Get prediction from the model
        prediction_text = model.predict(image)

        # Calculate CER
        cer = get_cer(prediction_text, label)
        print(f"Image: {image_path}, Label: {label}, Prediction: {prediction_text}, CER: {cer}")

        # Append CER to the list
        accum_cer.append(cer)

    # Print average CER
    print(f"Average CER: {np.average(accum_cer)}")


ValueError: The `{arg_name}` of this `Lambda` layer is a Python lambda. Deserializing it is unsafe. If you trust the source of the config artifact, you can override this error by passing `safe_mode=False` to `from_config()`, or calling `keras.config.enable_unsafe_deserialization().

In [None]:
!pip install tf2onnx
!pip install mltu
!pip install keras


Collecting tf2onnx
  Downloading tf2onnx-1.16.1-py3-none-any.whl.metadata (1.3 kB)
Collecting onnx>=1.4.1 (from tf2onnx)
  Downloading onnx-1.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Downloading tf2onnx-1.16.1-py3-none-any.whl (455 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m455.8/455.8 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading onnx-1.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.9/15.9 MB[0m [31m69.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: onnx, tf2onnx
Successfully installed onnx-1.16.2 tf2onnx-1.16.1
Collecting mltu
  Downloading mltu-1.2.5-py3-none-any.whl.metadata (3.4 kB)
Collecting qqdm==0.0.7 (from mltu)
  Downloading qqdm-0.0.7.tar.gz (5.3 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting onnxruntime>=1.15.0 (from mltu)
  Downloading onnxruntime-1.19.2-cp3

In [None]:
import tensorflow as tf
from keras.models import load_model

# Enable unsafe deserialization
tf.keras.backend.clear_session()  # Optional: Clears the current Keras session
tf.keras.config.enable_unsafe_deserialization()

# Load the model
model = load_model("model.keras")


TypeError: <class 'mltu.tensorflow.losses.CTCloss'> could not be deserialized properly. Please ensure that components that are Python object instances (layers, models, etc.) returned by `get_config()` are explicitly deserialized in the model's `from_config()` method.

config={'module': 'mltu.tensorflow.losses', 'class_name': 'CTCloss', 'config': {'name': 'CTCloss', 'reduction': 'sum_over_batch_size'}, 'registered_name': 'CTCloss'}.

Exception encountered: CTCloss.__init__() got an unexpected keyword argument 'reduction'

In [None]:
!pip install keras


