In [1]:
from glob import glob
import numpy as np

from sklearn.model_selection import train_test_split

from torch.utils.data import DataLoader

import torch

import torch.optim as optim

import torch.nn as nn

In [2]:
file_locations = glob('./Captchas/*')
captcha_names = [file.split('/')[-1].split('.')[0] for file in file_locations]
print( f'identified {len(file_locations)} images' )

identified 113062 images


In [3]:
# Unique characters is global -- expect only 60 out
unique_characters = [*set(char for name in captcha_names for char in name)]

print( f'{len(unique_characters)} unique characters: {unique_characters}' )

60 unique characters: ['v', 'P', 'u', '8', 'A', 'M', 'n', 'D', 's', '6', 'K', '1', '9', 'Z', '4', 'G', 'z', 'R', 'i', 'F', 'l', '2', 'k', 'c', 'N', 'J', 'E', 'w', 'C', 'Y', 'd', 'B', 't', 'r', '5', 'U', 'b', 'V', '3', 'j', '7', 'q', 'L', 'I', 'e', 'T', 'f', 'g', 'W', 'a', 'O', 'X', 'y', 'H', 'p', 'Q', 'h', 'S', 'm', 'x']


In [4]:
from Infrastructure import Captcha_Dataset

In [5]:
# Split training/test data
train_files, test_files = train_test_split(file_locations[0:1_000], test_size = .2)
print(f'Split dataset into 80:20 train/test of sizes {len(train_files)},{len(test_files)}.')

Split dataset into 80:20 train/test of sizes 800,200.


In [6]:
# Load in training files.
train = Captcha_Dataset.from_dir(train_files)#[0:128])

print(f'{train.X.shape}\nSample of images in format 40px x 150px x 3 RGB channels, of type {type(train.X[0][0][0][0])}')

# Instantiate dataloader (the iterable that provides batches for gradient descent.)
dl = DataLoader(train, \
    64, # Fetch 4 samples per batch
    shuffle=True, num_workers=2)

(800, 40, 150, 3)
Sample of images in format 40px x 150px x 3 RGB channels, of type <class 'numpy.float32'>


In [7]:
# Test out the iterable.
dataiter = iter(dl)
images, label_array, labels = next(dataiter)
print(f'Each batch has a dataset of shape {images.shape} and a corresponding set of {label_array.shape} labels.')

Each batch has a dataset of shape torch.Size([64, 3, 40, 150]) and a corresponding set of torch.Size([64, 5, 60]) labels.


In [8]:
from Infrastructure import Net

In [9]:
# Instantiate all our stuff
net = Net()
criterion = nn.MultiLabelSoftMarginLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

In [10]:
for epoch in range(20):
    print(f'Starting epoch {epoch}')

    running_loss = 0.0
    for i,(images, label_array, labels) in enumerate(dl,0):
        
        # Zero param grads
        optimizer.zero_grad()

        # Forward
        prediction = net(images)
        
        # Calculate loss
        loss = criterion(prediction.reshape(prediction.shape[0],5,60), label_array)
        # Backpropagate
        loss.backward()
        # Step optimizer
        optimizer.step()

        # print stats
        running_loss += loss.item()
        if i%( len(train_files) // 64 // 10 ) == 0: print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / ( len(train_files) / 64 / 10 ):.3e}')

print('Finished Training')

Starting epoch 0
[1,     1] loss: 1.086e+00
[1,     2] loss: 2.005e+00
[1,     3] loss: 2.516e+00
[1,     4] loss: 2.797e+00
[1,     5] loss: 3.036e+00
[1,     6] loss: 3.209e+00
[1,     7] loss: 3.337e+00
[1,     8] loss: 3.472e+00
[1,     9] loss: 3.606e+00
[1,    10] loss: 3.712e+00
[1,    11] loss: 3.810e+00
[1,    12] loss: 3.910e+00
[1,    13] loss: 4.009e+00
Starting epoch 1
[2,     1] loss: 1.035e-01
[2,     2] loss: 1.990e-01
[2,     3] loss: 2.876e-01
[2,     4] loss: 3.837e-01
[2,     5] loss: 4.850e-01
[2,     6] loss: 5.714e-01
[2,     7] loss: 6.538e-01
[2,     8] loss: 7.400e-01
[2,     9] loss: 8.296e-01
[2,    10] loss: 9.151e-01
[2,    11] loss: 9.969e-01
[2,    12] loss: 1.078e+00
[2,    13] loss: 1.157e+00
Starting epoch 2
[3,     1] loss: 8.010e-02
[3,     2] loss: 1.592e-01
[3,     3] loss: 2.369e-01
[3,     4] loss: 3.143e-01
[3,     5] loss: 3.910e-01
[3,     6] loss: 4.695e-01
[3,     7] loss: 5.460e-01
[3,     8] loss: 6.215e-01
[3,     9] loss: 6.974e-01
[3, 

In [11]:
PATH = './trained_net.pth'
if False:
    torch.save(net.state_dict(), PATH)

In [12]:
from Infrastructure import decode_prediction

In [16]:
tl = DataLoader(Captcha_Dataset.from_dir(test_files), \
    4, # Fetch 4 samples per batch
    shuffle=True, num_workers=2)

total = correct = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for (images, label_array, labels) in tl:

        # calculate outputs by running images through the network
        prediction = net(images)

        pred_labels, c, t = decode_prediction(prediction, return_accuracy=True, print_comparison=True, labels=labels)
        correct+=c
        total+=t


print(f'Accuracy of the network on the 1000 test images: {200 * correct / total} %')

Predicted: vxOLK Ground Truth: nvlxB
Predicted: AQvD3 Ground Truth: yI25B
Predicted: Azvgs Ground Truth: qdfl9
Predicted: q2tVX Ground Truth: exkxP
Predicted: WbV6s Ground Truth: T8cgq
Predicted: agxjs Ground Truth: Ws7lV
Predicted: GNjpM Ground Truth: cf3PG
Predicted: Sz9VL Ground Truth: gOUq3
Predicted: TzbhS Ground Truth: 6KyyT
Predicted: MhqmL Ground Truth: XrgEg
Predicted: Tz87r Ground Truth: 6hyEW
Predicted: AxUTs Ground Truth: Ycein
Predicted: JGTCq Ground Truth: sxR16
Predicted: PuZVq Ground Truth: PvT8L
Predicted: wYBdK Ground Truth: lI1ep
Predicted: nnAOL Ground Truth: bVreB
Predicted: SVM7e Ground Truth: X58ua
Predicted: cUmAu Ground Truth: YUnYl
Predicted: igq5V Ground Truth: DmKKj
Predicted: Ry6k2 Ground Truth: WXOkj
Predicted: YGtky Ground Truth: t8tp4
Predicted: sQzuS Ground Truth: GpK6g
Predicted: aEA22 Ground Truth: QxHgB
Predicted: 8Q7QY Ground Truth: cTf9I
Predicted: JJE7k Ground Truth: w94pY
Predicted: CnSmZ Ground Truth: jYRUs
Predicted: 2rAZV Ground Truth: kgxeP
P