# Automated CAPTCHA Solving With Deep Learning

This notebook demonstrates the use of convolutional neural networks and supervised training to automate CAPTCHA-solving. 

Original Author: Jackon Yang (2017)

Further Modified By: Turhan Kimbrough (2021)

---

In [1]:
import os

# Functions from other notebook file.
from ipynb.fs.full.shared_functions import *

In [2]:
# Move one directory back to the project root.
os.chdir("..")

In [3]:
# Suppress tensorflow log messages.
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [4]:
# GLOBALS
DATA_DIRECTORY = os.path.join(os.getcwd() + '/datasets/single-digit')

IMAGE_HEIGHT = 100
IMAGE_WIDTH = 100
IMAGE_CHANNELS = 3

CATEGORIES = 10 
DIMENSIONS = 1  

TRAINING_EPOCHS = 20

TRAINING_BATCH_SIZE = 32
VALIDATION_BATCH_SIZE = 32
TESTING_BATCH_SIZE = 32

TOTAL_TO_DISPLAY = 30
COLUMNS = 5

---
## Prepare the Dataset



Store each CAPTCHA-image file path with its respective label
into a pandas DataFrame.

In [5]:
data_frame = create_captcha_dataframe(DATA_DIRECTORY)
data_frame.head()

Unnamed: 0,label,file
0,3,/mnt/wd-blue/captcha-tensorflow/datasets/singl...
1,2,/mnt/wd-blue/captcha-tensorflow/datasets/singl...
2,2,/mnt/wd-blue/captcha-tensorflow/datasets/singl...
3,8,/mnt/wd-blue/captcha-tensorflow/datasets/singl...
4,1,/mnt/wd-blue/captcha-tensorflow/datasets/singl...


---
Shuffle the data and create a training set, validation set, and testing set.

In [6]:
train_indices, validation_indices, test_indices = shuffle_and_split_data(data_frame)

print('train count: %s, validation count: %s, test count: %s' % (
    len(train_indices), len(validation_indices), len(test_indices)))

train count: 4998, validation count: 2142, test count: 3060


---
## Get a baseline sequential model

In [7]:
model = create_untrained_vgg16_model(IMAGE_HEIGHT, 
                                     IMAGE_WIDTH, 
                                     IMAGE_CHANNELS,
                                     DIMENSIONS, 
                                     CATEGORIES)

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Functional)           (None, 3, 3, 512)         14714688  
_________________________________________________________________
flatten (Flatten)            (None, 4608)              0         
_________________________________________________________________
dense (Dense)                (None, 1024)              4719616   
_________________________________________________________________
dropout (Dropout)            (None, 1024)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                10250     
_________________________________________________________________
reshape (Reshape)            (None, 1, 10)             0         
Total params: 19,444,554
Trainable params: 19,444,554
Non-trainable params: 0
____________________________________________

---
## Train the Model

In [10]:
history = train_vgg16_model(model, 
                            data_frame, 
                            train_indices, 
                            validation_indices, 
                            TRAINING_BATCH_SIZE, 
                            VALIDATION_BATCH_SIZE, 
                            TRAINING_EPOCHS,
                            IMAGE_HEIGHT,
                            IMAGE_WIDTH,
                            CATEGORIES)

TypeError: list indices must be integers or slices, not tuple

---
## Analyze Model Performance

Plot the accuracy and loss metrics

In [None]:
plot_training_history(history)

Use some 'unseen' test samples

In [None]:
for_training=False

testing_set_generator = get_captcha_generator(data_frame, 
                                              test_indices, 
                                              for_training, 
                                              TESTING_BATCH_SIZE,
                                              IMAGE_HEIGHT,
                                              IMAGE_WIDTH,
                                              CATEGORIES)

dict(zip(model.metrics_names, model.evaluate(testing_set_generator, 
                                             steps=len(test_indices)//TESTING_BATCH_SIZE)))

---
## Visualize Model Performance

In [None]:
captcha_images, predictions, true_values = get_prediction_results(model, 
                                                                  data_frame, 
                                                                  test_indices, 
                                                                  TESTING_BATCH_SIZE,
                                                                  IMAGE_HEIGHT,
                                                                  IMAGE_WIDTH,
                                                                  CATEGORIES)

In [None]:
display_predictions_from_model(captcha_images, 
                               predictions, 
                               true_values,
                               TOTAL_TO_DISPLAY,
                               COLUMNS)

## Save the Model

In [None]:
#model.save('my_model')