# Captchas

**see:** https://keras.io/examples/vision/captcha_ocr/<br>
**original:** https://colab.research.google.com/drive/1Olw2KMHfPlnGaYuzffl2zb6D1etlBGZf?usp=sharing<br>
**View Github version in Colab:** <a href="https://colab.research.google.com/github/KnollFrank/2captcha-worker-assistant-server/blob/master/captcha_ocr_trainAndSaveModel_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a><br>
**paper:** Simple and Easy: Transfer Learning-Based Attacks to Text CAPTCHA<br>

## Setup

In [1]:
import os
import numpy as np

from pathlib import Path

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

2023-03-15 10:46:02.303787: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
from GoogleDriveManager import GoogleDriveManager

In [3]:
from CaptchaGenerator import CaptchaGenerator

In [4]:
def getImagesAndLabels(dataDir):
    fileSuffix = ".jpeg"
    images = sorted(list(map(str, list(dataDir.glob("*" + fileSuffix)))))
    labels = [image.split(os.path.sep)[-1].split(fileSuffix)[0] for image in images]
    return images, labels


In [5]:
from CharNumConverter import CharNumConverter

In [6]:
class DataSplitter:

    def __init__(self, x, y):
        (self.x_train, self.y_train), (x_valid_test, y_valid_test) = DataSplitter._splitData(np.array(x), np.array(y), train_size=0.7)
        (self.x_valid, self.y_valid), (self.x_test, self.y_test) = DataSplitter._splitData(x_valid_test, y_valid_test, train_size=0.5)

    def getTrain(self):
        return (self.x_train, self.y_train)

    def getValid(self):
        return (self.x_valid, self.y_valid)

    def getTest(self):
        return (self.x_test, self.y_test)

    @staticmethod
    def _splitData(x, y, train_size=0.9, shuffle=True):
        size = len(x)
        indices = np.arange(size)
        if shuffle:
            np.random.shuffle(indices)
        train_samples = int(size * train_size)
        x_train, y_train = x[indices[:train_samples]], y[indices[:train_samples]]
        x_test, y_test = x[indices[train_samples:]], y[indices[train_samples:]]
        return (x_train, y_train), (x_test, y_test)


In [7]:
from DatasetFactory import DatasetFactory

In [8]:
def getTrainValidationTestDatasets(dataDir, datasetFactory):
    images, labels = getImagesAndLabels(dataDir)
    print("Number of images found:", len(images))
    print("Characters:", CaptchaGenerator.characters)

    dataSplitter = DataSplitter(images, labels)
    
    return (
        datasetFactory.createDataset(*dataSplitter.getTrain()),
        datasetFactory.createDataset(*dataSplitter.getValid()),
        datasetFactory.createDataset(*dataSplitter.getTest())
        )

In [9]:
import matplotlib.pyplot as plt
import math

def displayImagesInGrid(numGridCols, images, titles, titleColors):
    assert len(images) == len(titles) == len(titleColors)
    images = [image.numpy().astype(np.uint8) for image in images]
    numGridRows = math.ceil(len(images) / numGridCols)
    _, axs = plt.subplots(numGridRows, numGridCols, figsize=(15, 5))
    for row in range(numGridRows):
        for col in range(numGridCols):
            ax = axs[row, col]
            ax.axis("off")
            i = row * numGridCols + col
            if(i < len(images)):
                ax.imshow(images[i])
                ax.set_title(titles[i], color=titleColors[i])
    plt.show()


In [10]:
def display16Predictions(model, dataset, predictionsDecoder):
    for batch in dataset.take(1):
        numPredictions2Display = 16
        batch_images = batch["image"][:numPredictions2Display]
        batch_labels = batch["label"][:numPredictions2Display]

        preds = model.predict(batch_images)
        pred_texts = predictionsDecoder.decode_batch_predictions(preds)
        orig_texts = predictionsDecoder.asStrings(batch_labels)

        displayImagesInGrid(
            4,
            batch_images,
            [f"Prediction/Truth: {pred_text}/{orig_text}" for (pred_text, orig_text) in zip(pred_texts, orig_texts)],
            ['green' if pred_text == orig_text else 'red' for (pred_text, orig_text) in zip(pred_texts, orig_texts)])

In [11]:
from ModelFactory import ModelFactory

In [12]:
def printLayers(model):
    for i, layer in enumerate(model.layers):
        print(i, layer.name)


In [13]:
from PredictionsDecoder import PredictionsDecoder

In [14]:
from ModelDAO import ModelDAO

In [15]:
# FK-TODO: entferne die getAccuracy()-Methode. Implementiere stattdessen https://stackoverflow.com/questions/37657260/how-to-implement-custom-metric-in-keras oder https://keras.io/api/metrics/#custom-metrics
def getAccuracy(dataset, prediction_model, ctc_decode):
    accuracy = tf.keras.metrics.Accuracy()

    for batch in dataset:
        accuracy.update_state(batch["label"], ctc_decode(prediction_model.predict(batch["image"], verbose=0)))

    return accuracy.result().numpy()

## Preparation

In [16]:
inColab = 'google.colab' in str(get_ipython())

In [17]:
if inColab:
    GoogleDriveManager.mount()

In [18]:
if inColab:
  !cp {GoogleDriveManager._baseFolder}/captchas.zip .
  !unzip captchas.zip

In [19]:
modelDAO = ModelDAO(inColab)

In [20]:
charNumConverter = CharNumConverter(CaptchaGenerator.characters)

2023-03-15 10:41:54.085280: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-15 10:41:54.089954: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [21]:
predictionsDecoder = PredictionsDecoder(CaptchaGenerator.captchaLength, charNumConverter.num_to_char)

In [22]:
(img_width, img_height) = (241, 62)

In [23]:
datasetFactory = DatasetFactory(img_height, img_width, charNumConverter.char_to_num, batch_size = 64)

## Create And Train Base Model

In [24]:
if inColab:
    !sudo apt install ttf-mscorefonts-installer
    !sudo fc-cache -f
    !fc-match Arial

In [25]:
# "We generate 200,000 images for base model pre-training"
captchaGenerator = CaptchaGenerator(
    numCaptchas = 50, # 50, # 200000,
    dataDir = Path("captchas/generated/VAERS/"))

In [27]:
captchaGenerator.createAndSaveCaptchas()

In [None]:
train_dataset, validation_dataset, test_dataset = getTrainValidationTestDatasets(captchaGenerator.dataDir, datasetFactory)

In [None]:
for batch in train_dataset.take(1):
    numImages2Display = 16
    images = batch["image"][:numImages2Display]
    labels = batch["label"][:numImages2Display]
    displayImagesInGrid(4, images, predictionsDecoder.asStrings(labels), ['black'] * len(labels))

In [None]:
modelFactory = ModelFactory(img_height, img_width, charNumConverter.char_to_num)

In [None]:
model = modelFactory.createMobileNetV3Small()
model.summary()

In [None]:
# "the success rates became stable after the base-model training epochs exceeded 20"
history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=20)


In [None]:
modelDAO.saveModel(model)

In [None]:
prediction_model = ModelFactory.createPredictionModel(model)
prediction_model.summary()


In [None]:
display16Predictions(prediction_model, test_dataset, predictionsDecoder)

In [None]:
getAccuracy(test_dataset, prediction_model, predictionsDecoder.ctc_decode)

## Transfer learning

In [None]:
# "we collected 1,500 real CAPTCHAs from the websites. Note that only 500 of them are used for fine-tuning, and another 1,000 are applied to calculate the test accuracy"
# FK-TODO: lade das pre-trainierte model und trainiere es mit 500 real-world-Daten aus dem Ordner captchas/VAERS/, die restlichen 540 (es sollten nach obigem Zitat aber 1,000 sein) sind dann die Test-Daten.
# see https://keras.io/guides/transfer_learning/
# see https://www.tensorflow.org/tutorials/images/transfer_learning


In [None]:
modelName, numTrainableLayers = 'MobileNetV3Small', 104
# modelName, numTrainableLayers = 'ResNet101', 348

In [None]:
model = modelDAO.loadModel(modelName)
model.summary(show_trainable=True)

In [None]:
# printLayers(model)

In [None]:
model.trainable = True
for layer in model.layers[:numTrainableLayers]:
    layer.trainable = False

In [None]:
model.summary(show_trainable=True)

In [None]:
train_dataset, validation_dataset, test_dataset = getTrainValidationTestDatasets(Path("captchas/VAERS/"), datasetFactory)

In [None]:
# "The model is optimized by a stochastic gradient descent (SGD) strategy with an initial learning rate of 0.004, weight decay of 0.00004 and momentum of 0.9."
from tensorflow.keras.optimizers import SGD
# model.compile(optimizer=SGD(learning_rate=0.0001, momentum=0.9))
model.compile(optimizer='adam')

# "Therefore, in our experiments, we chose 1 epoch for the fine-tuning stage."
history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=20)


In [None]:
prediction_model = ModelFactory.createPredictionModel(model)
prediction_model.summary()

In [None]:
getAccuracy(test_dataset, prediction_model, predictionsDecoder.ctc_decode)

In [None]:
display16Predictions(prediction_model, test_dataset, predictionsDecoder)

In [None]:
modelDAO.saveModel(model)