# Captcha Hacker

### 1. Import Modules

In [1]:
from torch.utils.data import DataLoader, TensorDataset, Dataset
import torch
import torch.nn.functional as F
import pandas as pd
from pathlib import Path
import os
import cv2
import numpy as np
import torchvision.transforms as tf
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau, ExponentialLR
import time

### 2. Load Data

In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [3]:
# !unzip /content/drive/MyDrive/ML_HW5/captcha.zip -d /content
# !unzip /content/drive/MyDrive/ML_HW5/test.zip -d /content

In [4]:
# %cd "/content/drive/MyDrive/ML_HW5"

### 3. Set Random Seed


In [5]:
torch.manual_seed(120)
np.random.seed(120)

### 4. Custom Dataset

In [6]:
class CaptchaDataset(Dataset):
    def __init__(self, train_img, train_label) -> None:
        self.train_img = train_img
        self.train_label = train_label
        self.length = len(train_img)
        self.transform = tf.Compose(
            [
                tf.RandomRotation([0, 15]),
                tf.Lambda(lambda x: x.repeat(3, 1, 1)),
            ]
        )

    def __getitem__(self, index):

        image = self.transform(self.train_img[index])

        label = self.train_label[index]
        return image, label

    def __len__(self):
        return self.length

### 5. Load Data Function

In [7]:
def load_data(captcha_len=1, batch_size=128):
    alphabet = list("0123456789abcdefghijklmnopqrstuvwxyz")
    x = captcha_len if captcha_len != 4 else 3
    train_path = Path(".") / "captcha" / f"task{x}"

    train_img = []
    train_label = []

    for path in train_path.rglob("*.png"):
        img = cv2.imread(str(path))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = img / 255.0
        # cv2.imshow("", img)
        # cv2.waitKey(0)
        img = np.reshape(img, (img.shape[0], img.shape[1], 1))
        img = img.transpose((2, 0, 1))
        label = str(path).split("/")[-1].split("_")[0]

        code = np.zeros((captcha_len, len(alphabet)))
        for i in range(captcha_len):
            code[i, alphabet.index(label[i])] = 1

        train_img.append(img)
        train_label.append(code)

    train_len = int(len(train_img) * 0.8)
    print(f"train_len: {train_len}")

    shuffle = np.arange(len(train_img))
    np.random.shuffle(shuffle)

    train_img = np.array(train_img)[shuffle].astype(np.float32)
    train_label = np.array(train_label)[shuffle]

    train_data = torch.tensor(train_img[:train_len])
    train_labels = torch.tensor(train_label[:train_len])
    train_set = CaptchaDataset(train_data, train_labels)
    train_iter = DataLoader(train_set, batch_size=batch_size, shuffle=True)

    valid_data = torch.tensor(train_img[train_len:])
    valid_labels = torch.tensor(train_label[train_len:])
    valid_set = CaptchaDataset(valid_data, valid_labels)
    valid_iter = DataLoader(valid_set, batch_size=batch_size)

    return train_iter, valid_iter


### 6. Model


In [8]:
import torch.nn as nn
from torchvision.models import resnet18


class ResCAPTCHA(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.model = resnet18(weights="DEFAULT")
        self.model.fc = nn.Linear(512, 128)
        self.drop1 = nn.Dropout(0.3)
        self.dense1 = nn.Linear(in_features=128, out_features=36)
        self.dense2 = nn.Linear(in_features=128, out_features=36)
        self.dense3 = nn.Linear(in_features=128, out_features=36)
        self.dense4 = nn.Linear(in_features=128, out_features=36)

    def forward(self, input):
        x = self.model(input)
        x = self.drop1(x)
        output = [self.dense1(x), self.dense2(x), self.dense3(x), self.dense4(x)]
        return output


### 7. Train

In [9]:
def train(captcha_len):
    model.train()
    min_loss = 100000.0
    for epoch in range(epochs):
        print(f"|epoch: {epoch+1}|")
        start_time = time.time()
        running_loss = [0.0 for _ in range(captcha_len)]
        batches = 0
        for data in train_iter:
            images, labels = data
            optimizer.zero_grad()
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)

            for i in range(captcha_len):
                loss = criterion(outputs[i], labels[:, i, :])
                if i == captcha_len - 1:
                    loss.backward()
                else:
                    loss.backward(retain_graph=True)
                running_loss[i] += loss.item()

            optimizer.step()
            batches += 1

        for j in range(captcha_len):
            print(f"loss{j+1}: {running_loss[j]/batches} ", end="")

        running_loss = [0.0 for _ in range(captcha_len)]
        with torch.no_grad():
            valid_loss = [0.0 for _ in range(captcha_len)]
            batches = 0
            for data in valid_iter:
                images, labels = data
                images = images.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                outputs = model(images)
                for i in range(captcha_len):
                    loss = criterion(outputs[i], labels[:, i, :])

                    valid_loss[i] += loss.item()
                batches += 1
            print("\n|Validation:|")
            for j in range(captcha_len):
                print(
                    f"loss{j+1}: {valid_loss[j]/batches} ",
                    end="",
                )
   
            if (sum(valid_loss) / (captcha_len * batches)) < min_loss:
                print("\nSaving...")
                min_loss = sum(valid_loss) / (captcha_len * batches)
                torch.save(model.state_dict(), f"/content/model{captcha_len}_res18.pth")
        print(f"time: {time.time() - start_time}")
        print("-" * 70)





### Task1

In [10]:
captcha_len = 1

train_iter, valid_iter = load_data(captcha_len, batch_size=128)

model = ResCAPTCHA()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = Adam(params=model.parameters(), lr=1e-3)
scheduler = ReduceLROnPlateau(optimizer, patience=2)
scheduler = ExponentialLR(optimizer, gamma=0.7)
criterion = nn.CrossEntropyLoss()


epochs = 10
train(captcha_len=1)


train_len: 8000


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

|epoch: 1|
loss1: 0.34084943415410396 
|Validation:|
loss1: 0.0416596128867454 
Saving...
time: 16.85210657119751
----------------------------------------------------------------------
|epoch: 2|
loss1: 0.036711228171293304 
|Validation:|
loss1: 0.03636834870145821 
Saving...
time: 9.270767450332642
----------------------------------------------------------------------
|epoch: 3|
loss1: 0.036777953753731336 
|Validation:|
loss1: 0.04175509744234129 time: 9.201897859573364
----------------------------------------------------------------------
|epoch: 4|
loss1: 0.018417900901523933 
|Validation:|
loss1: 0.013839871226489282 
Saving...
time: 9.882970809936523
----------------------------------------------------------------------
|epoch: 5|
loss1: 0.028272782251487834 
|Validation:|
loss1: 0.02795611450674679 time: 9.113187074661255
----------------------------------------------------------------------
|epoch: 6|
loss1: 0.015984131676526084 
|Validation:|
loss1: 0.012192688084027181 
Savin

### Task2

In [11]:
captcha_len = 2

train_iter, valid_iter = load_data(captcha_len, batch_size=128)

model = ResCAPTCHA()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = Adam(params=model.parameters(), lr=1e-3)
scheduler = ReduceLROnPlateau(optimizer, patience=2)
scheduler = ExponentialLR(optimizer, gamma=0.8)
criterion = nn.CrossEntropyLoss()


epochs = 15
train(captcha_len=2)

train_len: 15200
|epoch: 1|
loss1: 1.533292656290875 loss2: 1.419790459476521 
|Validation:|
loss1: 0.3397405111299966 loss2: 0.31336788980030045 
Saving...
time: 23.41341805458069
----------------------------------------------------------------------
|epoch: 2|
loss1: 0.2078884796383294 loss2: 0.21581673320059458 
|Validation:|
loss1: 0.19284643532663875 loss2: 0.2159274511495335 
Saving...
time: 23.05518937110901
----------------------------------------------------------------------
|epoch: 3|
loss1: 0.1181722984437389 loss2: 0.12293881210422054 
|Validation:|
loss1: 0.12819944162932193 loss2: 0.13619549097761613 
Saving...
time: 23.177544116973877
----------------------------------------------------------------------
|epoch: 4|
loss1: 0.09260035497548573 loss2: 0.0873226000278937 
|Validation:|
loss1: 0.10035302495444055 loss2: 0.12356131237333234 
Saving...
time: 23.16662311553955
----------------------------------------------------------------------
|epoch: 5|
loss1: 0.07951858317

### Task3

In [12]:
captcha_len = 4

train_iter, valid_iter = load_data(captcha_len=captcha_len, batch_size=128)

model = ResCAPTCHA()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = Adam(params=model.parameters(), lr=1e-3)
scheduler = ReduceLROnPlateau(optimizer, patience=1)
scheduler = ExponentialLR(optimizer, gamma=0.9)
criterion = nn.CrossEntropyLoss()


epochs = 20
train(captcha_len=4)


train_len: 20000
|epoch: 1|
loss1: 2.310103358208493 loss2: 3.1213676154105197 loss3: 2.992599250501649 loss4: 2.2004210397232615 
|Validation:|
loss1: 1.1125210028828634 loss2: 2.240680414859162 loss3: 1.986625257936248 loss4: 0.9135818893557826 
Saving...
time: 49.301252365112305
----------------------------------------------------------------------
|epoch: 2|
loss1: 0.671762327821035 loss2: 1.3752573391480942 loss3: 1.279443282679344 loss4: 0.6069001875799288 
|Validation:|
loss1: 0.5108085235810101 loss2: 0.9035503333207316 loss3: 0.8870117688767778 loss4: 0.406278557185442 
Saving...
time: 49.235657691955566
----------------------------------------------------------------------
|epoch: 3|
loss1: 0.3361891395073979 loss2: 0.6255985552684044 loss3: 0.5918729453190225 loss4: 0.3022845403412428 
|Validation:|
loss1: 0.320337392648275 loss2: 0.5232196773051563 loss3: 0.5405023848345394 loss4: 0.26069581523093566 
Saving...
time: 49.311119556427
-----------------------------------------

In [13]:
%cd /content/drive/MyDrive/ML_HW5

/content/drive/MyDrive/ML_HW5


### 8. Predict

In [14]:
# alphabet = list("0123456789abcdefghijklmnopqrstuvwxyz")
# prediction = {"filename": [], "label": []}
# transform = tf.Compose(
#     [
#         tf.RandomRotation([0, 15]),
#         tf.Lambda(lambda x: x.repeat(1, 3, 1, 1)),
#     ]
# )


# def predict(cur_task, model_path, length, model_type=""):
   
#     model = ResCAPTCHA()
  

#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#     model.to(device)
#     model.load_state_dict(torch.load(model_path))
#     model.eval()
#     with torch.no_grad():
#         test_path = Path(".") / "test1"
#         for path in test_path.rglob("*.png"):
#             if str(path).split("/")[1] == cur_task:
#                 img = cv2.imread(str(path))
#                 img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                
#                 # img = cv2.equalizeHist(img)
#                 img = img / 255.0
#                 img = np.reshape(img, (img.shape[0], img.shape[1], 1))
#                 img = img.transpose((2, 0, 1))

#                 img = torch.tensor(img.astype(np.float32))
#                 img = torch.reshape(img, (1, img.shape[0], img.shape[1], img.shape[2]))
                
#                 candidate = [{} for _ in range(length)]
#                 for _ in range(5):
#                   cur_img = transform(img)
#                   cur_img = cur_img.to(device)
#                   output = model(cur_img)
                 
#                   for i in range(length):
#                       index = alphabet[torch.argmax(output[i])]
#                       if index in candidate[i].keys():
#                         candidate[i][index] += 1
#                       else:
#                         candidate[i][index] = 1
#                 result = ""
#                 for i in range(length):
#                   result += max(candidate[i], key=candidate[i].get)  
#                 prediction["filename"].append(str(path)[6:])
#                 prediction["label"].append(result)


# predict(cur_task="task1", model_path="/content/model1_res18.pth", length=1)
# predict(cur_task="task2", model_path="/content/model2_res18.pth", length=2)
# predict(cur_task="task3", model_path="/content/model4_res18.pth", length=4)


# df = pd.DataFrame(prediction)
# df.to_csv("submission.csv", index=False)
