In [1]:
# import libaraies
import csv
import numpy as np
import random
import time
import os
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torchvision.transforms.functional as F
from torchvision.io import read_image
from torch.utils.data import DataLoader
import torch.optim as optim

In [3]:
# path of dataset
TRAIN_PATH = "/content/drive/MyDrive/ML/captcha-hacker/train"
TEST_PATH = "/content/drive/MyDrive/ML/captcha-hacker/test"

# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [4]:
# mapping for label's one-hot encoding 
mapping = { 'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, 'f': 5, 'g': 6, 'h': 7, 'i': 8, 'j': 9, 'k': 10, 'l': 11, 'm': 12, 'n': 13, 
            'o': 14, 'p': 15, 'q': 16, 'r': 17, 's': 18, 't': 19, 'u': 20, 'v': 21, 'w': 22, 'x': 23, 'y': 24, 'z': 25, '0': 26, 
            '1': 27, '2': 28, '3': 29, '4': 30, '5': 31, '6': 32, '7': 33, '8': 34, '9': 35 }

# dataset classes
class Task1Dataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = [sample for sample in data if sample[0].startswith("task1")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = read_image(f"{self.root}/{filename}")
        if self.return_filename:
            # normalize the value of image to 0 ~ 1
            return torch.FloatTensor(img / 255), filename
        else:
            # normalize the value of image to 0 ~ 1
            return torch.FloatTensor(img / 255), int(label)

    def __len__(self):
        return len(self.data)
    
class Task2Dataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = [sample for sample in data if sample[0].startswith("task2")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = read_image(f"{self.root}/{filename}")
        if self.return_filename:
            # normalize the value of image to 0 ~ 1
            return torch.FloatTensor((img) / 255), filename
        else:
            # one-hot encoding
            encoding_label = [0] * 72
            encoding_label[mapping[label[1]] + 36] = 1
            encoding_label[mapping[label[0]]] = 1
            # normalize the value of image to 0 ~ 1
            return torch.FloatTensor(img / 255), torch.FloatTensor(encoding_label)

    def __len__(self):
        return len(self.data)

class Task3Dataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = [sample for sample in data if sample[0].startswith("task3")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = read_image(f"{self.root}/{filename}")
        if self.return_filename:
            # normalize the value of image to 0 ~ 1
            return torch.FloatTensor(img / 255), filename
        else:
            # one-hot encoding
            encoding_label = [0] * 144
            encoding_label[mapping[label[3]] + 108] = 1
            encoding_label[mapping[label[2]] + 72] = 1
            encoding_label[mapping[label[1]] + 36] = 1
            encoding_label[mapping[label[0]]] = 1
            # normalize the value of image to 0 ~ 1
            return torch.FloatTensor(img / 255), torch.FloatTensor(encoding_label)

    def __len__(self):
        return len(self.data)

In [5]:
# mapping one-hot encoding back to string
mapping2 = { 0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e', 5: 'f', 6: 'g', 7: 'h', 8: 'i', 9: 'j', 10: 'k', 11: 'l', 12: 'm', 13: 'n', 14: 'o', 
            15: 'p', 16: 'q', 17: 'r', 18: 's', 19: 't', 20: 'u', 21: 'v', 22: 'w', 23: 'x', 24: 'y', 25: 'z', 26: '0', 27: '1', 28: '2', 
            29: '3', 30: '4', 31: '5', 32: '6', 33: '7', 34: '8', 35: '9'}

def test(TASK, PATH, BATCH_SIZE, out_features):
    # select preprocess
    if TASK == "task1":
      preprocess = transforms.Compose([transforms.Resize((288, 288)), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
    elif TASK == "task2":
      preprocess = transforms.Compose([transforms.Resize((288, 288)), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
    else:
      preprocess = transforms.Compose([transforms.Resize((384, 288)), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

    # load test data
    test_data = []
    with open(f'{TEST_PATH}/../sample_submission.csv', newline='') as csvfile:
        for row in csv.reader(csvfile, delimiter=','):
            if row[0].startswith(TASK):
                test_data.append(row)

    if TASK == "task1":
      test_ds = Task1Dataset(test_data, root=TEST_PATH, return_filename=True)
    elif TASK == "task2":
      test_ds = Task2Dataset(test_data, root=TEST_PATH, return_filename=True)
    else:
      test_ds = Task3Dataset(test_data, root=TEST_PATH, return_filename=True)
      
    test_dl = DataLoader(test_ds, batch_size=BATCH_SIZE, drop_last=False, shuffle=False)

    # open submission.csv for writing predictions
    if os.path.exists('submission.csv'):
        file = open('submission.csv', 'a', newline='')
        csv_writer = csv.writer(file)
    else:
        file = open('submission.csv', 'w', newline='')
        csv_writer = csv.writer(file)
        csv_writer.writerow(["filename", "label"])

    # define model and load weights
    model = torchvision.models.resnet18()
    model.fc = nn.Linear(in_features=512, out_features=out_features, bias=True)
    model.load_state_dict(torch.load(PATH))
    model = model.to(device)
    
    # testing loop
    model.eval()
    with torch.no_grad():
        for image, filenames in test_dl:
            image = preprocess(image)
            image = image.to(device)

            pred = model(image)
            temp = None
            if TASK == "task1":
                pred = torch.argmax(pred, dim=1)
                for i in range(len(filenames)):
                    csv_writer.writerow([filenames[i], str(pred[i].item())])
            elif TASK == "task2":
                pred = torch.softmax(pred, dim=1)
                pred = pred.view(-1, 2, 36)
                pred = torch.argmax(pred, dim=2)
                for i in range(len(filenames)):
                    temp = mapping2[pred[i][0].item()] + mapping2[pred[i][1].item()]
                    csv_writer.writerow([filenames[i], temp])
            else:
                pred = torch.softmax(pred, dim=1)
                pred = pred.view(-1, 4, 36)
                pred = torch.argmax(pred, dim=2)
                for i in range(len(filenames)):
                    temp = mapping2[pred[i][0].item()] + mapping2[pred[i][1].item()] + mapping2[pred[i][2].item()] + mapping2[pred[i][3].item()]
                    csv_writer.writerow([filenames[i], temp])
               
            # delete variable to avoid excess cuda memory
            del image, pred, temp

    file.close()
    
    print("Finish", TASK, "!!")

In [6]:
test("task1", "/content/drive/MyDrive/ML/task1_weight.pth", 100, 10)
test("task2", "/content/drive/MyDrive/ML/task2_weight.pth", 100, 36 + 36)
test("task3", "/content/drive/MyDrive/ML/task3_weight.pth", 50, 36 + 36 + 36 + 36)

Finish task1 !!
Finish task2 !!
Finish task3 !!
