In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.24.0-py3-none-any.whl (5.5 MB)
[K     |████████████████████████████████| 5.5 MB 14.9 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 55.6 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.10.1-py3-none-any.whl (163 kB)
[K     |████████████████████████████████| 163 kB 64.6 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.10.1 tokenizers-0.13.2 transformers-4.24.0


In [None]:
import os

os.chdir("drive/MyDrive/Colab Notebooks/AOI")

In [None]:
import pandas as pd
import os
from PIL import Image
import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from tqdm import tqdm
from transformers import get_scheduler

# the class for training data
class AOI_Dataset(Dataset):
    def __init__(self, root_dir, annotation_file, transform1=None, transform2=None):
        self.root_dir = root_dir
        self.annotations = pd.read_csv(annotation_file)

        # transformation 1
        self.transform1 = transform1

        # transformation 2
        self.transform2 = transform2

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_id = self.annotations.iloc[index, 0]
        img = Image.open(os.path.join(self.root_dir, img_id)).convert("RGB")
        y_label = torch.tensor(float(self.annotations.iloc[index, 1]))

        img1 = self.transform1(img)
        img2 = self.transform2(img)

        return (img1, img2, y_label)

In [None]:
# the structure of the model
class AOI_network(nn.Module):
  def __init__(self, num_classes=6):
    super(AOI_network, self).__init__()
    self.resnet50_01 = models.resnet50(pretrained=True)
    self.resnet50_02 = models.resnet50(pretrained=True)

    self.out_fc = nn.Linear(self.resnet50_01.fc.out_features * 2, num_classes)
    self.relu = nn.ReLU()
    self.dropout = nn.Dropout(0.2)

  def forward(self, images1, images2):
    features_01 = self.resnet50_01(images1)
    features_02 = self.resnet50_02(images2)
    in_features = torch.cat([features_01, features_02], axis=1)
    out = self.dropout(self.relu(self.out_fc(in_features)))
    return out

In [None]:
device = ("cuda" if torch.cuda.is_available() else "cpu")

# transformations can be specified here
# the goal of the second transformation is sharpening the images as the second
# input images for the model
transform_01 = transforms.Compose(
        [
            transforms.Resize((256, 256)),
            transforms.RandomRotation((10)),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        ]
    )

transform_02 = transforms.Compose(
        [
            transforms.RandomAdjustSharpness(4, p=1),
            transforms.Resize((256, 256)),
            transforms.RandomRotation((10)),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        ]
    )
num_epochs = 20
learning_rate = 1e-5
batch_size = 16
shuffle = True
num_workers = 1

In [None]:
# all the training images must be in the directory "train_images"
dataset = AOI_Dataset("train_images","train.csv",transform1=transform_01, transform2=transform_02)
train_loader = DataLoader(dataset=dataset, shuffle=shuffle, batch_size=batch_size,num_workers=num_workers)
model = AOI_network().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
learning_rate_scheduler = get_scheduler(
        "linear",
        optimizer=optimizer,
        num_warmup_steps=0,
        num_training_steps=num_epochs * len(train_loader),
    )

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

In [None]:
for epoch in range(num_epochs):
  model.train()
  loop = tqdm(train_loader, total = len(train_loader), leave = True)
  for imgs1, imgs2, labels in loop:
    imgs1 = imgs1.to(device)
    imgs2 = imgs2.to(device)
    labels = labels.type(torch.LongTensor).to(device)
    optimizer.zero_grad()
    outputs = model(imgs1, imgs2)
    # print(outputs.shape, labels.shape)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    learning_rate_scheduler.step()
    loop.set_description(f"Epoch [{epoch+1}/{num_epochs}]")
    loop.set_postfix(loss = loss.item())
  checkpoint = {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict()
  }
  torch.save(checkpoint, "checkpoint")

Epoch [1/20]: 100%|██████████| 158/158 [19:36<00:00,  7.45s/it, loss=0.2]
Epoch [2/20]: 100%|██████████| 158/158 [01:22<00:00,  1.92it/s, loss=0.652]
Epoch [3/20]: 100%|██████████| 158/158 [01:21<00:00,  1.93it/s, loss=0.33]
Epoch [4/20]: 100%|██████████| 158/158 [01:22<00:00,  1.92it/s, loss=0.562]
Epoch [5/20]: 100%|██████████| 158/158 [01:22<00:00,  1.91it/s, loss=0.5]
Epoch [6/20]: 100%|██████████| 158/158 [01:22<00:00,  1.92it/s, loss=0.255]
Epoch [7/20]: 100%|██████████| 158/158 [01:23<00:00,  1.90it/s, loss=0.459]
Epoch [8/20]: 100%|██████████| 158/158 [01:25<00:00,  1.86it/s, loss=0.566]
Epoch [9/20]: 100%|██████████| 158/158 [01:24<00:00,  1.87it/s, loss=0.228]
Epoch [10/20]: 100%|██████████| 158/158 [01:23<00:00,  1.90it/s, loss=0.118]
Epoch [11/20]: 100%|██████████| 158/158 [01:21<00:00,  1.94it/s, loss=0.124]
Epoch [12/20]: 100%|██████████| 158/158 [01:21<00:00,  1.94it/s, loss=0.228]
Epoch [13/20]: 100%|██████████| 158/158 [01:22<00:00,  1.93it/s, loss=0.346]
Epoch [14/20]

In [None]:
# an extra class for testing data
class AOI_Dataset_TEST(Dataset):
    def __init__(self, root_dir, annotation_file, transform1=None, transform2=None):
        self.root_dir = root_dir
        self.annotations = pd.read_csv(annotation_file)

        # resolution 1
        self.transform1 = transform1

        # resolution 2
        self.transform2 = transform2

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_id = self.annotations.iloc[index, 0]
        img = Image.open(os.path.join(self.root_dir, img_id)).convert("RGB")

        img1 = self.transform1(img)
        img2 = self.transform2(img)

        return (img1, img2)

In [None]:
import numpy as np
model.eval()
# all the testing images must be in the directory "test_images"
dataset_TEST = AOI_Dataset_TEST("test_images", "test.csv", transform1=transform_01, transform2=transform_02)

test_loader = DataLoader(dataset=dataset_TEST, shuffle=False, batch_size=16,num_workers=1)
loop = tqdm(test_loader, total = len(test_loader), leave = True)
pred_labels = []
res = []

for imgs1, imgs2 in loop:
  imgs1 = imgs1.to(device)
  imgs2 = imgs2.to(device)

  outputs = model(imgs1, imgs2)
  pred = torch.argmax(torch.softmax(outputs, dim=1), dim=-1).to(device, dtype=torch.int8)
  pred_labels.append(pred.cpu().detach().numpy().tolist())

# the original csv file "test.csv" containing the names of all images
pred_labels = np.asarray(pred_labels)
for i in pred_labels:
  for j in i:
    res.append(j)
res = np.array(res)
out_df = pd.read_csv("test.csv")
print(res)
print(len(out_df))
out_df["Label"] = res

# the new output file can be specified here
out_df.to_csv("test_output.csv", encoding='utf-8', index=False)

100%|██████████| 634/634 [1:33:53<00:00,  8.89s/it]

[1 2 5 ... 1 3 1]
10142



