In [1]:
from glob import glob
import numpy as np

from sklearn.model_selection import train_test_split

from torch.utils.data import DataLoader

import torch
import torch.optim as optim
import torch.nn as nn
from torch.autograd import Variable

from Infrastructure import CaptchaDataset, CNN

In [2]:
CUDA = False
if torch.cuda.is_available():
    CUDA = True
print(f'CUDA available: {CUDA}')

CUDA available: False


In [3]:
file_locations = glob('./Captchas/*')
captcha_names = [file.split('/')[-1].split('.')[0] for file in file_locations]
print( f'identified {len(file_locations)} images' )

identified 113062 images


In [4]:
# Unique characters is global -- expect only 60 out
unique_characters = [*set(char for name in captcha_names for char in name)]

print( f'{len(unique_characters)} unique characters: {unique_characters}' )

60 unique characters: ['M', 'U', 'r', 'x', 'e', '3', 'f', '8', '9', 'J', 'q', 'N', 't', 'Q', 'a', 'D', 'h', 'm', 'g', 'A', 'y', 'd', 'c', 'k', 'u', '7', 's', 'V', 'b', '6', 'Y', 'T', 'F', 'C', 'l', 'P', 'i', 'Z', 'O', '1', 'H', '5', '2', 'j', 'S', 'G', 'z', 'n', 'R', 'I', 'W', 'K', 'X', 'L', 'v', 'B', 'w', '4', 'E', 'p']


In [5]:
# Split training/test data
train_files, test_files = train_test_split(file_locations[0:1_000], test_size = .2)
print(f'Split dataset into 80:20 train/test of sizes {len(train_files)},{len(test_files)}.')

Split dataset into 80:20 train/test of sizes 800,200.


In [6]:
# Load in training files.
train = CaptchaDataset.from_dir(train_files)#[0:128])

print(f'{train.X.shape}\nSample of images in format 40px x 150px x 3 RGB channels, of type {type(train.X[0][0][0][0])}')

# Instantiate dataloader (the iterable that provides batches for gradient descent.)
dl = DataLoader(train, \
    64, # Fetch 4 samples per batch
    shuffle=True, num_workers=2)

(800, 40, 150, 3)
Sample of images in format 40px x 150px x 3 RGB channels, of type <class 'numpy.float32'>


In [7]:
# Test out the iterable.
dataiter = iter(dl)
images, label_array, labels = next(dataiter)
print(f'Each batch has a dataset of shape {images.shape} and a corresponding set of {label_array.shape} labels.')

Each batch has a dataset of shape torch.Size([64, 3, 40, 150]) and a corresponding set of torch.Size([64, 5, 62]) labels.


In [8]:
# Instantiate all our stuff
net = CNN()
if CUDA: net.cuda()
criterion = nn.MultiLabelSoftMarginLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

In [9]:
for epoch in range(20):
    print(f'Starting epoch {epoch}')

    running_loss = 0.0
    for i,(images, label_array, labels) in enumerate(dl,0):
        
        # Zero param grads
        optimizer.zero_grad()

        if CUDA: images = Variable(images).cuda()
        if CUDA: label_array = Variable(label_array).cuda()

        # Forward
        prediction = net(images)
        
        # Calculate loss
        loss = criterion(prediction.reshape(prediction.shape[0],5,62), label_array)
        # Backpropagate
        loss.backward()
        # Step optimizer
        optimizer.step()

        # print stats
        running_loss += loss.item()
        interval = len(train_files) // 64 // 10
        if i%interval == 0: print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / interval:.3e}')

print('Finished Training')

Starting epoch 0
[1,     1] loss: 7.021e-01


KeyboardInterrupt: 

In [10]:
PATH = './trained_net.pth'
if True:
    torch.save(net.state_dict(), PATH)

In [11]:
from Infrastructure import decode_prediction

In [12]:
tl = DataLoader(CaptchaDataset.from_dir(test_files), \
    4, # Fetch 4 samples per batch
    shuffle=True, num_workers=2)

total = correct = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for (images, label_array, labels) in tl:

        if CUDA: images = Variable(images).cuda()

        # calculate outputs by running images through the network
        prediction = net(images)

        pred_labels, c, t = decode_prediction(prediction, return_accuracy=True, print_comparison=True, labels=labels)
        correct+=c
        total+=t


print(f'Accuracy of the network on the 1000 test images: {200 * correct / total} %')

Predicted: OVCQn Ground Truth: XU9lv
Predicted: wKco0 Ground Truth: fcXLp
Predicted: w3Con Ground Truth: gjsGu
Predicted: wVco0 Ground Truth: y3rdt
Predicted: wKcon Ground Truth: yYX2L
Predicted: OVCop Ground Truth: kwelk
Predicted: OVCo0 Ground Truth: dQyRD
Predicted: wVcon Ground Truth: a8wbe
Predicted: O3Co0 Ground Truth: LYyAg
Predicted: wKcon Ground Truth: KjX7w
Predicted: wKc6n Ground Truth: qL5ku
Predicted: wKc6n Ground Truth: yhO16
Predicted: wVc6n Ground Truth: fjmzi
Predicted: wKcon Ground Truth: 5G2lZ
Predicted: wVcon Ground Truth: YvpBX
Predicted: wKCo0 Ground Truth: Shzn2
Predicted: wKcon Ground Truth: JNQb5
Predicted: wKc6n Ground Truth: xyDnK
Predicted: wKcon Ground Truth: BdBFE


KeyboardInterrupt: 