<a href="https://colab.research.google.com/github/SupreethRao99/DeepCAPTCHA/blob/main/DeepCAPTCHA_TPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DeepCAPTCHA
DeepCAPTHA is a ResNet architecture based convolutional neural network (CNN) trained on the [Chars74K-Fonts](http://www.ee.surrey.ac.uk/CVSSP/demos/chars74k/#download) Dataset. It has been built as part of a larger project which attempts to defeat simple CAPTCHAs.

The [dataset](https://www.kaggle.com/supreethrao/chars74kdigitalenglishfont) used in this notebook can also be found on kaggle

In [1]:
# importing the required libraries
from google.colab import drive
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import zipfile
import os
import random
from shutil import copyfile
import datetime

# Setting up TPU and distribution strategy

In [2]:
# Initialzing TPU
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
tf.config.experimental_connect_to_cluster(resolver)
# This is the TPU initialization code that has to be at the beginning.
tf.tpu.experimental.initialize_tpu_system(resolver)
print("All devices: ", tf.config.list_logical_devices('TPU'))

INFO:tensorflow:Initializing the TPU system: grpc://10.53.18.218:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.53.18.218:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


All devices:  [LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:7', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:6', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:5', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:4', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:3', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:0', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:1', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:2', device_type='TPU')]


In [3]:
strategy = tf.distribute.TPUStrategy(resolver)

INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


# Loading and Preprocessing
The Data is stored on Google Drive as a ZipFile. It is imported into the Colab Notebook and Unzipped into the `/tmp` folder. Training and testing directories are created for each class present in the dataset.

In [2]:
drive.mount('/content/drive') 

#Unzipping the dataset
zip_ref = zipfile.ZipFile("/content/drive/MyDrive/English.zip", 'r')
zip_ref.extractall("/tmp")
zip_ref.close()

Mounted at /content/drive


In [5]:
# Training and testing directories are created with directory name being the
# label of the class. eg:images of the letter 'A' will be in directory 'A'
# similarly images of letter 'a' will be in directory 'a'.

# Note that directory names are case-sensitive.

os.mkdir('/tmp/CAPTCHA')
os.mkdir('/tmp/CAPTCHA/testing')
os.mkdir('/tmp/CAPTCHA/training')

for i in range(0,62):
  try:
    if i>= 0 and i<10: # for numbers 0-9
      os.mkdir('/tmp/CAPTCHA/training/'+chr(i+48))
      os.mkdir('/tmp/CAPTCHA/testing/'+chr(i+48))

    if i>= 10 and i<36: # for alphabets A-Z
      os.mkdir('/tmp/CAPTCHA/training/'+chr(i-10+65))
      os.mkdir('/tmp/CAPTCHA/testing/'+chr(i-10+65))

    if i>=36 and i<62: # for alphabets a-z
      os.mkdir('/tmp/CAPTCHA/training/'+chr(i-36+97))
      os.mkdir('/tmp/CAPTCHA/testing/'+chr(i-36+97))
      
  except OSError:
    print('directory creation failed')
    pass

the `split_data` function splits the dataset into training and testing sets randomly. the size of the training and testing set is determined by the `SPLIT_SIZE` parameter

In [6]:
def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
    all_images = os.listdir(SOURCE)
    shuffle(all_images)
    splitting_index = round(SPLIT_SIZE*len(all_images))
    train_images = all_images[:splitting_index]
    test_images = all_images[splitting_index:]

    for img in train_images:
        src = os.path.join(SOURCE, img)
        dst = os.path.join(TRAINING, img)
        if os.path.getsize(src) <= 0:
            print(img+" is zero length, so ignoring!!")
        else:
            shutil.copyfile(src, dst)

    for img in test_images:
        src = os.path.join(SOURCE, img)
        dst = os.path.join(TESTING, img)
        if os.path.getsize(src) <= 0:
            print(img+" is zero length, so ignoring!!")
        else:
            shutil.copyfile(src, dst)

In [8]:
from random import shuffle
import shutil

split_size = 0.90 

for i in range(0,62):
  if i>=0 and i<10:
    split_data('/tmp/English/Fnt/'+chr(i+48),
               '/tmp/CAPTCHA/training/'+chr(i+48),
               '/tmp/CAPTCHA/testing/'+chr(i+48),
               split_size)
  if i>=10 and i<36:
    split_data('/tmp/English/Fnt/'+chr(i-10+65)+"-1",
               '/tmp/CAPTCHA/training/'+chr(i-10+65),
               '/tmp/CAPTCHA/testing/'+chr(i-10+65),
               split_size)
  if i>=36 and i<62:
    split_data('/tmp/English/Fnt/'+chr(i-36+97),
               '/tmp/CAPTCHA/training/'+chr(i-36+97),
               '/tmp/CAPTCHA/testing/'+chr(i-36+97),
               split_size)

# Converting to TFRecord Format

In [3]:
data_dir = '/tmp/English/Fnt'

from functools import partial

IMG_HEIGHT = 32
IMG_WIDTH = 32

load_split = partial(
    tf.keras.preprocessing.image_dataset_from_directory,
    data_dir,
    validation_split=0.2,
    shuffle=True,
    seed=123,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=1,
)

ds_train = load_split(subset='training')
ds_valid = load_split(subset='validation')

class_names = ds_train.class_names
print("\nClass names: {}".format(class_names))

Found 62992 files belonging to 62 classes.
Using 50394 files for training.
Found 62992 files belonging to 62 classes.
Using 12598 files for validation.

Class names: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A-1', 'B-1', 'C-1', 'D-1', 'E-1', 'F-1', 'G-1', 'H-1', 'I-1', 'J-1', 'K-1', 'L-1', 'M-1', 'N-1', 'O-1', 'P-1', 'Q-1', 'R-1', 'S-1', 'T-1', 'U-1', 'V-1', 'W-1', 'X-1', 'Y-1', 'Z-1', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [4]:
from tensorflow.train import BytesList, FloatList, Int64List
from tensorflow.train import Example, Features, Feature

def process_image(image, label):
    image = tf.image.convert_image_dtype(image, dtype=tf.uint8)
    image = tf.io.encode_jpeg(image)
    return image, label

ds_train_encoded = (
    ds_train
    .unbatch()
    .map(process_image)
)

ds_valid_encoded = (
    ds_valid
    .unbatch()
    .map(process_image)
)

In [5]:
def make_example(encoded_image, label):
    image_feature = Feature(
        bytes_list=BytesList(value=[
            encoded_image,
        ]),
    )
    label_feature = Feature(
        int64_list=Int64List(value=[
            label,
        ])
    )

    features = Features(feature={
        'image': image_feature,
        'label': label_feature,
    })
    
    example = Example(features=features)
    
    return example.SerializeToString()

In [6]:
os.mkdir('/tmp/working')
os.mkdir('/tmp/working/training')
NUM_SHARDS = 32
PATH = '/tmp/working/training/shard_{:02d}.tfrecord'

for shard in range(NUM_SHARDS):
    ds_shard = (
        ds_train_encoded
        .shard(NUM_SHARDS, shard)
        .as_numpy_iterator()
    )
    with tf.io.TFRecordWriter(path=PATH.format(shard)) as f:
        for encoded_image, label in ds_shard:
            example = make_example(encoded_image, label)
            f.write(example)

In [7]:
!mkdir '/tmp/working/validation'

NUM_SHARDS = 8
PATH = '/tmp/working/validation/shard_{:02d}.tfrecord'

for shard in range(NUM_SHARDS):
    ds_shard = (
        ds_valid_encoded
        .shard(NUM_SHARDS, shard)
        .as_numpy_iterator()
    )
    with tf.io.TFRecordWriter(path=PATH.format(shard)) as f:
        for encoded_image, label in ds_shard:
            example = make_example(encoded_image, label)
            f.write(example)

In [8]:
train_filenames = tf.io.gfile.glob('/tmp/working/training*.tfrec')
validation_filenames = tf.io.gfile.glob('/tmp/working/validation*.tfrec')

dataset = tf.data.TFRecordDataset(train_filenames)

In [14]:
%cp -av "/tmp/working" "/content/drive/MyDrive/"

'/tmp/working' -> '/content/drive/MyDrive/working'
'/tmp/working/training' -> '/content/drive/MyDrive/working/training'
'/tmp/working/training/shard_00.tfrecord' -> '/content/drive/MyDrive/working/training/shard_00.tfrecord'
'/tmp/working/training/shard_01.tfrecord' -> '/content/drive/MyDrive/working/training/shard_01.tfrecord'
'/tmp/working/training/shard_02.tfrecord' -> '/content/drive/MyDrive/working/training/shard_02.tfrecord'
'/tmp/working/training/shard_03.tfrecord' -> '/content/drive/MyDrive/working/training/shard_03.tfrecord'
'/tmp/working/training/shard_04.tfrecord' -> '/content/drive/MyDrive/working/training/shard_04.tfrecord'
'/tmp/working/training/shard_05.tfrecord' -> '/content/drive/MyDrive/working/training/shard_05.tfrecord'
'/tmp/working/training/shard_06.tfrecord' -> '/content/drive/MyDrive/working/training/shard_06.tfrecord'
'/tmp/working/training/shard_07.tfrecord' -> '/content/drive/MyDrive/working/training/shard_07.tfrecord'
'/tmp/working/training/shard_08.tfrecord

# Creating the Model

Images are augmented by rescaling, rotating , shearing, zooming and flipping. This provides a cheap and very effective way to provide more data for the model to learn from.

## Augmentation of the Dataset

In [9]:
TRAINING_DIR = '/tmp/CAPTCHA/training'
train_datagen = ImageDataGenerator(rescale=1. / 255,
                                   rotation_range=30,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   fill_mode='nearest')
train_generator = train_datagen.flow_from_directory(
    TRAINING_DIR,
    target_size = (32,32),
    batch_size = 1024,
    class_mode = 'categorical'
)

VALIDATION_DIR = '/tmp/CAPTCHA/testing'
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_directory(
    VALIDATION_DIR,
    target_size = (32,32),
    batch_size = 1024,
    class_mode = 'categorical'
)

Found 56668 images belonging to 62 classes.
Found 6324 images belonging to 62 classes.


## ResNet Model
the model described below uses a custom model based on the [ResNet architecture](https://arxiv.org/pdf/1512.03385.pdf) 

In [10]:
import keras
from functools import partial
DefaultConv2D = partial(keras.layers.Conv2D, kernel_size=3, strides=1,
                        padding="SAME", use_bias=False)

class ResidualUnit(keras.layers.Layer):
    def __init__(self, filters, strides=1, activation="relu", **kwargs):
        super().__init__(**kwargs)
        self.activation = keras.activations.get(activation)
        self.main_layers = [
            DefaultConv2D(filters, strides=strides),
            keras.layers.BatchNormalization(),
            self.activation,
            DefaultConv2D(filters),
            keras.layers.BatchNormalization()]
        self.skip_layers = []
        if strides > 1:
            self.skip_layers = [
                DefaultConv2D(filters, kernel_size=1, strides=strides),
                keras.layers.BatchNormalization()]

    def get_config(self):
      cfg = super().get_config()
      return cfg  

    def call(self, inputs):
        Z = inputs
        for layer in self.main_layers:
            Z = layer(Z)
        skip_Z = inputs
        for layer in self.skip_layers:
            skip_Z = layer(skip_Z)
        return self.activation(Z + skip_Z)

In [11]:
def create_model():
  '''
  Implementation of custom sized resnet model
  
  '''
  model = keras.models.Sequential()
  model.add(DefaultConv2D(64, kernel_size=4, strides=2,
                        input_shape=[32, 32, 3]))
  model.add(keras.layers.BatchNormalization())
  model.add(keras.layers.Activation("relu"))
  model.add(keras.layers.MaxPool2D(pool_size=3, strides=2, padding="SAME"))
  prev_filters = 64
  for filters in [64] * 2 + [128] * 2 + [256] * 2 :
      strides = 1 if filters == prev_filters else 2
      model.add(ResidualUnit(filters, strides=strides))
      prev_filters = filters
  model.add(keras.layers.GlobalAvgPool2D())
  model.add(keras.layers.Flatten())
  model.add(keras.layers.Dropout(0.5))
  model.add(keras.layers.Dense(62, activation="softmax"))

  return model


In [13]:
with strategy.scope():
  model = create_model()
  model.compile(loss="categorical_crossentropy",
                optimizer="nadam",
              metrics=["accuracy"])

model.fit(train_generator, epochs = 10,
                    validation_data=validation_generator)

Epoch 1/10


UnavailableError: ignored