#Required modules

In [None]:
# Needed modules
import numpy as np
import matplotlib.pyplot as plt
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision.transforms as transforms
from PIL import *
from tqdm import tqdm


use_cuda = True

In [None]:
!unzip serverside.zip
#get from trevor 

In [None]:
pip install pillow-lut

In [None]:
from serverside.captcha.image import ImageCaptcha
from tqdm import tqdm
import os
import string
import random

# Dataset Generation


In [None]:
def get_random_string(length):
    # choose from all lowercase letter
    letters = "abcdefghjkmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "123456789" + "     "
    result_str = ''.join(random.choice(letters) for i in range(length))
    return result_str

In [None]:
from torchvision.transforms.transforms import Grayscale
# Load datasets
# Label formated as 2D tensor of indexed as [place][tokenID]
chars = "abcdefghijklmnopqrstuvwxyz123456789 " # use " " as null character. Leave out o and 0, also case insensitive
charIndex = {}
for i, char in enumerate(chars):
    charIndex[char] = i

idn = torch.eye(len(chars)).cuda()

def strToOH(string):
    oneHot = []
    for char in string:
      oneHot.append(idn[charIndex[char]])
    return torch.stack(oneHot)

def strToInd(string):
    oneHot = []
    for char in string:
      oneHot.append(torch.tensor(charIndex[char]))
    return torch.stack(oneHot)

In [None]:
class TCacheSet():
    def __init__(self, size, period):
        self.size = size
        self.cached = []
        self.period = period
        self.IC = ImageCaptcha(width = 25)
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor()
        ])
        self.flush()
    def flush(self):
        self.cached.clear()
        for index in range(self.period):
            label = get_random_string(1)
            data = self.IC.generate_image(label)
            data = self.transform(data)
            label = label.lower().replace(" ", "")
            label += " " * (1 - len(label))

            labelTensor = strToInd(label)
            self.cached.append((data, labelTensor))
    def __getitem__(self, index):
        index = index % self.period
        entry = self.cached[index]
        return entry[0], entry[1]

    def __len__(self):
        return self.size

class TFrozenSet():
    def __init__(self, size):
        self.size = size
        self.cached = []
        self.period = size
        self.IC = ImageCaptcha(width = 25) # specify width to be 25, and then it will be resized to 224
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor()
        ])
        self.flush2()
    def flush2(self):
        self.cached.clear()
        for index in range(self.period):
            label = get_random_string(1)
            data = self.IC.generate_image(label)
            data = self.transform(data)
            label = label.lower().replace(" ", "")
            label += " " * (1 - len(label))

            labelTensor = strToInd(label)
            self.cached.append((data, labelTensor))
    def __getitem__(self, index):
        index = index % self.period
        entry = self.cached[index]
        return entry[0], entry[1]

    def __len__(self):
        return self.size

In [None]:
trainDataT = TCacheSet(12800, 1280)
valDataT = TFrozenSet(3000)

# Model

In [None]:
import torchvision.models as models
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
resnet = models.resnet18(pretrained=True)

In [None]:
class resnet_captcha(nn.Module):
    def __init__(self):
        super(resnet_captcha, self).__init__()
        self.name = "res_cap"
        self.conv = resnet
        self.fc1 = nn.Linear(1000, 500)  #width*height will change depending on previous layers, pooling, and the initial dimensions of the photo
        self.fc2 = nn.Linear(500, 36)       # there are 36 possible characters, 26 letters (A-Z) and 10 digits (0-9)

    def forward(self, x):
        x = self.conv(x)
        
        x = x.view(-1, 1000)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = x.view(-1, len(chars), 1)
        x = x.squeeze(1) # Flatten to [batch_size]
        return x

# Training

In [None]:
do_cuda = True

def get_accuracy1(model, train=False, batch_size = 64):
    if train:
        data = trainDataT
    else:
        data = valDataT

    correct = 0
    total = 0
    for imgs, labels in tqdm(torch.utils.data.DataLoader(data, batch_size=batch_size)):
        if use_cuda and torch.cuda.is_available:
            imgs = imgs.cuda()
            labels = labels.cuda()
          #pass

        output = model(imgs)
        #select index with maximum prediction score
        pred = output.max(1, keepdim=True)[1]
        correct += pred.eq(labels.view_as(pred)).sum().item()
        total += imgs.shape[0]
    return correct / total


def train_res_new(model, data, batch_size=64, num_epochs=30, learning_rate=0.0001):
    torch.manual_seed(1000)
    train_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    iters, losses, train_acc, val_acc = [], [], [], []


    startTime = time.time() # to see how long training goes
    print("starting training")
    # training
    n = 0 # the number of iterations
    for epoch in range(num_epochs):
        
        try:
            data.flush()
            train_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True)
        except Exception:
            pass
        print("epoch: ", epoch)
        for imgs, labels in tqdm(iter(train_loader)):
            #print(labels.shape)
            if do_cuda and torch.cuda.is_available:
              imgs = imgs.cuda()
              labels = labels.cuda()

            out = model(imgs)             # forward pass
            #print(out[0])
            #print()
            #print(labels[0])
            #print()
            loss = criterion(out, labels) # compute the total loss
            loss.backward()               # backward pass (compute parameter updates)
            optimizer.step()              # make the updates for each parameter
            optimizer.zero_grad()         # a clean up step for PyTorch

            # save the current training information
            iters.append(n)
            losses.append(float(loss)/batch_size)             # compute *average* loss
            n += 1

        #train_acc.append(get_accuracy(model, train=True, batch_size=batch_size)) # compute training accuracy 
        val_acc.append(get_accuracy1(model, train=False, batch_size=batch_size))  # compute validation accuracy
        print(("Epoch {}: |"+"Validation acc: {}").format(
                epoch, # call epoch zero epoch zero
                
                val_acc[-1]))
        
        #checkpoint
        path = "model_{0}_bs{1}_lr{2}_epoch{3}".format(model.name,
                                                   batch_size,
                                                   learning_rate,
                                                   epoch)
        torch.save(model.state_dict(), path)


    
    finishTime = time.time()

    delta = finishTime - startTime
    print("\nDONE TRAINING in %s seconds!\n" % delta)

    # plotting
    plt.title("Training Curve")
    plt.plot(iters, losses, label="Train")
    plt.xlabel("Iterations")
    plt.ylabel("Loss")
    plt.show()

    plt.title("Training Curve")
    #plt.plot(range(num_epochs), train_acc, label="Train")
    plt.plot(range(num_epochs), val_acc, label="Validation")
    plt.xlabel("Iterations")
    plt.ylabel("Training Accuracy")
    plt.legend(loc='best')
    plt.show()

    #print("Final Training Accuracy: {}".format(train_acc[-1]))
    print("Final Validation Accuracy: {}".format(val_acc[-1]))

In [None]:
model2 = resnet_captcha()
print(torch.cuda.is_available())
model2.cuda()
train_res_new(model2,  trainDataT, num_epochs = 50, learning_rate = 0.001)

# Testing

Run everything here

In [None]:
img = plt.imread('/content/gdrive/path/to/test/image')
plt.imshow(img)

In [None]:
c_w = torch.from_numpy(img_c).shape[1]
c_h = torch.from_numpy(img_c).shape[0]
print(("shape is: {}x{}").format(c_w, c_h))

In [None]:
import cv2

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
img_windows = []
captcha_w = img2.shape[1]
transf = transforms.Resize((224, 224))
      
for i in range(0,captcha_w - window_w):
    #print(c_w)
    new_img = img2[:,i:(window_w+i),:]
    if window_w != 224:
        #new_img = transf(new_img)
        #new_img = cv2.resize(new_img, (0,0 ), fx=224/window_w, fy=224/c_h)
        #print(new_img.shape)


        #new_img = np.tile(new_img, (1,9,1))
        #new_img = new_img[:,:224,:]

        #print(new_img.shape)
        new_img = cv2.resize(new_img, (0,0 ), fx=224/window_w, fy=224/c_h)
    img_windows.append(new_img)

In [None]:
img_to_tensor = transforms.ToTensor()
for i in range(0, len(img_windows)):
    img_windows[i] = img_to_tensor(np.array(img_windows[i]))

print(img_windows[0].shape)

In [None]:
for i in range(0, len(img_windows)):
    img_windows[i] = img_windows[i].unsqueeze(0)

In [None]:
model2.cuda()

This will output a plot of the probability distribution across the image

In [None]:
#will store the probabilities 
distribution = [[0 for i in range(len(img_windows))] for j in range(36)]  # len(img_windows)x35 to hold all outputs for each window
distribution = np.array(distribution)

#dist_nosm = [[0 for i in range(len(img_windows))] for j in range(36)]  # len(img_windows)x35 to hold all outputs for each window
#dist_nosm = np.array(dist_nosm)

iter1 = 0
prob_sum = 0
for k in range(0, len(img_windows)): # for each window
    img_windows[k] = img_windows[k].cuda()
    pred = model2(img_windows[k])  #pred has shape 1x35 (will change to 36 once null is added)
    #print(len(pred[0]))
    for j in range(0, 36): # for each class
        #print(("k: {} | j: {}").format(k, j))
        
        pred2 = torch.softmax(pred, dim=1) #apply softmax
        if iter1 == 0:
            for pr in range(36):
                prob_sum = prob_sum + pred2[0][pr]
            print(("total prob is: {}").format(prob_sum))
            iter1 = iter1 + 1
        #print(pred2[0][j])
        arr = pred2[0][j]*100
        distribution[j][k] = arr

print(torch.max(torch.from_numpy(distribution)))
chars_str = "abcdefghijklmnopqrstuvwxyz123456789 "
plt.title("Character Distribution")
for ch in range(0, 36):
    plt.plot(range(len(img_windows)), distribution[ch], label = chars_str[ch])
    plt.xlabel("Captcha")
    plt.ylabel("Prob")
plt.legend()
plt.show()