In [1]:
!pip install transformers



In [2]:
import os
from tqdm.notebook import tqdm
import glob
import sys
import time
import pickle
import cv2
import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
import random
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from transformers import get_scheduler

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [4]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
%cd /content/drive/MyDrive/DL_Project

/content/drive/.shortcut-targets-by-id/1mJgNU1aJtlW2TXPFO2e4TWL1mJ2AVebd/DL_Project


In [6]:
BASE_DIR = './newestDataset/'

In [13]:
class CaptchaDataset(Dataset):
    def __init__(self, root_dir, annotation_file, transform=None):
        self.root_dir = root_dir
        self.annotations = pd.read_csv(f"{root_dir}{annotation_file}", dtype=str)
        # self.annotations = self.annotations[self.annotations['length']=='5']
        self.annotations = self.annotations
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_id = self.annotations.iloc[index]
        img = Image.open(
            # os.path.join( self.root_dir, 'NewDataset', 'data', img_id['category'], f"Len{img_id['length']}Char", img_id['file'])
            os.path.join( self.root_dir, 'NewDataset', f"Len{img_id['length']}Char", 'data', img_id['file'])
          ).convert("L")
        # print(img_id['length'], len(str(img_id['text'])))
        y_label = (int(img_id['length']), img_id['text']) #int(img_id['text'][1]), int(img_id['text'][2]), int(img_id['text'][3]), int(img_id['text'][4]))

        if self.transform is not None:
            img = self.transform(img)

        return (img, y_label)
# class CaptchaDataset(Dataset):
#     def __init__(self, root_dir, annotation_file, transform=None):
#         self.root_dir = root_dir
#         self.annotations = pd.read_csv(f"./{annotation_file}", dtype=str)
#         self.transform = transform

#     def __len__(self):
#         return len(self.annotations)

#     def __getitem__(self, index):
#         img_id = self.annotations.iloc[index]
#         img = Image.open(
#             # os.path.join( self.root_dir, img_id['category'], f"Len{img_id['length']}Char", img_id['file'])
#             os.path.join( self.root_dir, img_id['file'])
#           ).convert("L")
#         # print('qwerty  ', img_id['text'])
#         y_label = (img_id['length'], str(img_id['text']))

#         if self.transform is not None:
#             img = self.transform(img)
  
#         return (img, y_label)

In [14]:
transform = transforms.Compose(
    [
        transforms.Resize((60, 200)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, ), (0.5, )),
    ]
)

In [15]:
BATCH = 16

In [16]:
df = pd.read_csv('./newestDataset/Num.csv')
print(len(df))

25000


In [17]:
dataset = CaptchaDataset(BASE_DIR, "Num.csv", transform = transform)
print(len(dataset))
train_set, validation_set, test_set = torch.utils.data.random_split(dataset, [21000, 2000, 2000])
# train_set, validation_set = torch.utils.data.random_split(dataset, [8000,2001])

train_loader = DataLoader(dataset = train_set, shuffle = True, batch_size = BATCH)
validation_loader = DataLoader(dataset = validation_set, shuffle = True, batch_size = BATCH)
test_loader = DataLoader(dataset = test_set, shuffle = True, batch_size = BATCH)

25000


In [18]:
dataiter = iter(train_loader)
images, labels = dataiter.next()
print(type(images))
print(images.shape)
print(type(labels[1]))

<class 'torch.Tensor'>
torch.Size([16, 1, 60, 200])
<class 'tuple'>


In [19]:
DROPOUT_PROB = 0.3

In [20]:
class Model(nn.Module):
    def __init__(self, dropout_flag=1):
        super().__init__()

        # self.dropout_flag = dropout_flag  # 1->Conv Layer, 2->FC Layer

        self.blockA = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, stride=1, padding='same'),
            nn.ReLU(),
        )

        self.blockA_drop = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, stride=1, padding='same'),
            nn.ReLU(),
            nn.Dropout(DROPOUT_PROB),
        )

        self.blockB = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=48, kernel_size=5, stride=1, padding='same'),
            nn.ReLU(),
        )

        self.blockB_drop = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=48, kernel_size=5, stride=1, padding='same'),
            nn.ReLU(),
            nn.Dropout(DROPOUT_PROB),
        )

        self.blockC = nn.Sequential(
            nn.Conv2d(in_channels=48, out_channels=64, kernel_size=5, stride=1, padding='same'),
            nn.ReLU(),
        )

        self.blockC_drop = nn.Sequential(
            nn.Conv2d(in_channels=48, out_channels=64, kernel_size=5, stride=1, padding='same'),
            nn.ReLU(),
            nn.Dropout(DROPOUT_PROB),
        )


        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(11200, 512)
        self.out1 = nn.Linear(512, 11)
        self.out2 = nn.Linear(512, 11)
        self.out3 = nn.Linear(512, 11)
        self.out4 = nn.Linear(512, 11)
        self.out5 = nn.Linear(512, 11)

        self.length_out = nn.Linear(512, 5)

        self.softmax = nn.Softmax()
        self.relu = nn.ReLU()

        # self.drop = nn.Dropout(DROPOUT_PROB)
        

    def forward(self, x):
        
        x = self.blockA(x)
        x = self.pool(x)
        
        x = self.blockB(x)
        x = self.pool(x)
        
        x = self.blockC_drop(x)
        x = self.pool(x)
        
        x = torch.flatten(x, start_dim=1)
        
        x = self.relu(self.fc1(x))

        label1 = self.out1(x)
        label2 = self.out2(x)
        label3 = self.out3(x)
        label4 = self.out4(x)
        label5 = self.out5(x)

        length = self.length_out(x)

        # return {'label1':label1, 'label2':label2, 'label3':label3, 'label4':label4, 'label5':label5}

        return {'label': torch.stack([label1, label2, label3, label4, label5], dim=1), 'length': length}

In [21]:
model = Model().to(device)

In [22]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.AdamW(model.parameters(), lr=5e-5)


In [23]:
## create the default learning rate scheduler
num_epochs = 50
num_training_steps = num_epochs * len(train_loader)
lr_scheduler = get_scheduler(
    name="linear", 
    optimizer=optimizer, 
    num_warmup_steps=0, 
    num_training_steps=num_training_steps
)

In [24]:
from torch.nn.utils.rnn import pad_sequence
# def train(model, title, regularization = None):
    
progress_bar = tqdm(range(num_training_steps))
trainlosslist = []
validationlosslist = []

for epoch in range(num_epochs):
    train_loss = 0.0
    train_correct1 = 0
    train_correct2 = 0
    train_correct3 = 0
    train_correct4 = 0
    train_correct5 = 0
    train_correct = 0
    train_total = 0

    validation_loss = 0.0
    validation_correct1 = 0
    validation_correct2 = 0
    validation_correct3 = 0
    validation_correct4 = 0
    validation_correct5 = 0
    validation_correct = 0
    validation_total = 0

    model.train()
    for i, data in enumerate(train_loader):
        inputs = data[0].to(device)
        labels = data[1]
        
        label = []
        
        for l in range(len(labels[0])):
            label.append([])
            length = labels[0][l]

            for j in range(int(length)):
                character = labels[1][l][j]
                output_array = [0]*11
                output_array[int(character)] = 1
                label[l].append(output_array)

            for j in range(int(length), 5):
                output_array = [0]*11
                output_array[10] = 1
                label[l].append(output_array)

        label = torch.Tensor(label).to(device)
        outputs = model(inputs.to(device))

        # calculate loss
        loss_label = criterion(outputs['label'], label)

        # loss = loss #+ loss2 + loss3 + loss4 + loss5
        loss = loss_label

        pred = torch.argmax(outputs['label'], 2)
        
        label = torch.argmax(label, axis=2)

        train_correct += (torch.sum(label==pred, axis=1)==5).nonzero().shape[0]
        
        train_total += BATCH
        
        correct = torch.sum(label==pred, axis=0)

        train_correct1 += (correct[0])
        train_correct2 += (correct[1])
        train_correct3 += (correct[2])
        train_correct4 += (correct[3])
        train_correct5 += (correct[4])

        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        progress_bar.update(1)
        # print statistics
        train_loss += loss.item()


    model.eval()
    for i, data in tqdm(enumerate(validation_loader)):
        inputs = data[0].to(device)
        labels = data[1]
        
        label = []
        
        for l in range(len(labels[0])):
            label.append([])
            length = labels[0][l]

            for j in range(int(length)):
                character = labels[1][l][j]
                output_array = [0]*11
                output_array[int(character)] = 1
                label[l].append(output_array)

            for j in range(int(length), 5):
                output_array = [0]*11
                output_array[10] = 1
                label[l].append(output_array)

        label = torch.Tensor(label).to(device)
        outputs = model(inputs.to(device))

        # # calculate loss
        loss_label = criterion(outputs['label'], label)

        # # loss = loss #+ loss2 + loss3 + loss4 + loss5
        loss = loss_label

        pred = torch.argmax(outputs['label'], 2)
        
        label = torch.argmax(label, axis=2)

        validation_correct += (torch.sum(label==pred, axis=1)==5).nonzero().shape[0]
        
        validation_total += BATCH
        
        correct = torch.sum(label==pred, axis=0)
        
        validation_correct1 += (correct[0])
        validation_correct2 += (correct[1])
        validation_correct3 += (correct[2])
        validation_correct4 += (correct[3])
        validation_correct5 += (correct[4])

        validation_loss += loss.item()



    epoch_train_loss = train_loss/len(train_loader)
    epoch_train_acc1 = (train_correct1/train_total)*100
    epoch_train_acc2 = (train_correct2/train_total)*100
    epoch_train_acc3 = (train_correct3/train_total)*100
    epoch_train_acc4 = (train_correct4/train_total)*100
    epoch_train_acc5 = (train_correct5/train_total)*100

    print("Epoch [%d/%d]  Training Loss: %.5f  Acc1: %.5f  Acc2: %.5f  Acc3: %.5f  Acc4: %.5f  Acc5: %.5f " %(epoch+1, num_epochs, epoch_train_loss, epoch_train_acc1, epoch_train_acc2, epoch_train_acc3, epoch_train_acc4, epoch_train_acc5))
    print(f"Epoch {epoch},  Loss : {epoch_train_loss},  Acc : {train_correct/train_total}")


    epoch_validation_loss = validation_loss/len(validation_loader)
    epoch_validation_acc1 = (validation_correct1/validation_total)*100
    epoch_validation_acc2 = (validation_correct2/validation_total)*100
    epoch_validation_acc3 = (validation_correct3/validation_total)*100
    epoch_validation_acc4 = (validation_correct4/validation_total)*100
    epoch_validation_acc5 = (validation_correct5/validation_total)*100

    print("Epoch [%d/%d]  Validation Loss: %.5f  Acc1: %.5f  Acc2: %.5f  Acc3: %.5f  Acc4: %.5f  Acc5: %.5f " %(epoch+1, num_epochs, epoch_validation_loss, epoch_validation_acc1, epoch_validation_acc2, epoch_validation_acc3, epoch_validation_acc4, epoch_validation_acc5))
    print(f"Epoch {epoch},  Loss : {epoch_validation_loss},  Acc : {validation_correct/train_total}")
    
    trainlosslist.append(epoch_train_loss)
    validationlosslist.append(epoch_validation_loss)
    

print('Finished Training')

# plot(trainlosslist, validationlosslist, title)

  0%|          | 0/65650 [00:00<?, ?it/s]

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-24-a2b2126c2b7a>", line 28, in <module>
    for i, data in enumerate(train_loader):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 530, in __next__
    data = self._next_data()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 570, in _next_data
    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/usr/local/lib/python3.7/dist-packages/torch/uti

KeyboardInterrupt: ignored

In [None]:
model.eval()
for i, data in tqdm(enumerate(validation_loader)):
    inputs = data[0].to(device)
    labels = data[1]
    
    label = []
    
    for l in range(len(labels[0])):
        label.append([])
        length = labels[0][l]

        for j in range(int(length)):
            character = labels[1][l][j]
            output_array = [0]*11
            output_array[int(character)] = 1
            label[l].append(output_array)

        for j in range(int(length), 5):
            output_array = [0]*11
            output_array[10] = 1
            label[l].append(output_array)

    label = torch.Tensor(label).to(device)
    outputs = model(inputs.to(device))

    # # calculate loss
    # loss_label = criterion(outputs['label'], label)

    # # loss = loss #+ loss2 + loss3 + loss4 + loss5
    # loss = loss_label

    pred = torch.argmax(outputs['label'], 2)
    
    label = torch.argmax(label, axis=2)

    train_correct += (torch.sum(label==pred, axis=1)==5).nonzero().shape[0]
    
    train_total += BATCH
    
    correct = torch.sum(label==pred, axis=0)
    
    train_correct1 += (correct[0])
    train_correct2 += (correct[1])
    train_correct3 += (correct[2])
    train_correct4 += (correct[3])
    train_correct5 += (correct[4])

epoch_train_loss = train_loss/len(train_loader)
# epoch_validation_loss = validation_loss/len(validation_loader) 
epoch_train_acc1 = (train_correct1/train_total)*100
epoch_train_acc2 = (train_correct2/train_total)*100
epoch_train_acc3 = (train_correct3/train_total)*100
epoch_train_acc4 = (train_correct4/train_total)*100
epoch_train_acc5 = (train_correct5/train_total)*100
# epoch_validation_acc = (validation_correct1/validation_total)*100

print("Epoch [%d/%d]  Validation Loss: %.5f  Acc1: %.5f  Acc2: %.5f  Acc3: %.5f  Acc4: %.5f  Acc5: %.5f " %(epoch+1, num_epochs, epoch_train_loss, epoch_train_acc1, epoch_train_acc2, epoch_train_acc3, epoch_train_acc4, epoch_train_acc5))
print(f"Epoch {epoch},  Loss : {epoch_train_loss},  Acc : {train_correct/train_total}")


In [None]:
torch.save(model.state_dict(), './variableLengthBestModel.pt')