In [None]:
!curl -LO https://github.com/AakashKumarNain/CaptchaCracker/raw/master/captcha_images_v2.zip
!unzip -qq captcha_images_v2.zip
!pip install comet_ml

In [None]:
from comet_ml import Experiment
experiment = Experiment(
  api_key = "",
  project_name = "",
  workspace=""
)

In [3]:
import os
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path
from collections import Counter

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import yaml

data_dir = Path("./captcha_images_v2/")

f = open('parameters.yaml','r')
parameters = yaml.safe_load(f)

batch_size = parameters.get('batch_size')
img_width = parameters.get('img_width')
img_height = parameters.get('img_height')
downsample_factor = parameters.get('downsample_factor')
length_characters = parameters.get('length_characters')
epochs = parameters.get('epochs')
early_stopping_patience = parameters.get('early_stopping_patience')
characters = parameters.get('vocabulary')


experiment.log_parameters({
    "img_width": img_width,
    "img_height": img_height,
    "batch_size": batch_size,
    "epochs": epochs,
    "early_stopping_patience": early_stopping_patience,
    "length_characters":length_characters,
    "vocabulary": vocabulary
})

# Pre procesamiento ...

In [8]:
# mapeamos los carácteres para asignarles un número único
char_to_num = layers.StringLookup(
    vocabulary=list(characters), mask_token=None
)

# invertimos el mapeo para decodificar el mapeo anterior, para garantizar una correspondencia bidireccional
num_to_char = layers.StringLookup(
    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
)

In [9]:
from modules.utils import split_data

# Obtenemos la lista de todas las imágenes
images = sorted(list(map(str, list(data_dir.glob("*.png")))))
labels = [img.split(os.path.sep)[-1].split(".png")[0] for img in images]

x_train, x_valid, y_train, y_valid = split_data(np.array(images), np.array(labels))

# Guardamos en un dataset los datos transformados ...

In [11]:
from modules.utils import encode_single_sample, decode_single_sample

train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = (
    train_dataset.map(
        encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE
    )
    .batch(batch_size)
    .prefetch(buffer_size=tf.data.AUTOTUNE)
)

validation_dataset = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))
validation_dataset = (
    validation_dataset.map(
        encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE
    )
    .batch(1)
    .prefetch(buffer_size=tf.data.AUTOTUNE)
)

# Creamos el modelo o arquitectura ...

In [12]:
from modules.model import  model

model = build_model()
model.summary()

# Entrenamiento ...

In [20]:
# activamos la parada anticipada si es que en las siguientes x épocas
# no hay una mejora significativa
early_stopping = keras.callbacks.EarlyStopping(
    monitor="val_loss", patience=early_stopping_patience, restore_best_weights=True
)

# Crear un Callback personalizado para registrar métricas en Comet.ml
class CometMetricsCallback(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        experiment.log_metric("loss", logs["loss"], step=epoch)
        experiment.log_metric("val_loss", logs["val_loss"], step=epoch)

In [None]:
# Entrenando
history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=epochs,
    callbacks=[early_stopping, CometMetricsCallback()]
)

# Finalización del experimento de Comet.ml
experiment.end()

# Inferimos ...

In [None]:
# quitamos la última capa
prediction_model = keras.models.Model(
    model.get_layer(name="image").input, model.get_layer(name="dense2").output
)

In [None]:
import random
from modules.utils import decode_batch_predictions

num = random.randrange(len(y_valid))

mm = encode_single_sample(x_valid[num],y_valid[num])
imagen = tf.reshape(mm['image'], (1, 200, 50, 1))
etiqueta = tf.expand_dims(mm['label'], axis=0)

preds = prediction_model.predict(imagen)
label = tf.strings.reduce_join(num_to_char(etiqueta)).numpy().decode("utf-8")
img = (imagen[num, :, :, 0] * 255).numpy().astype(np.uint8).T
plt.imshow(img, cmap="gray")
print('texto de etiqueta: ', label)
pred_texts = decode_batch_predictions(preds)
print('texto predecido: ', pred_texts)