In [1]:
import cv2 as cv
import numpy as np
import pandas as pd
from torchvision import transforms
from torchvision.models import resnet18, ResNet18_Weights
import torchvision.transforms.functional as TF
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torch import nn
from torch import optim
import torch
from PIL import Image
import time
import random

In [9]:
class Dataset(Dataset):
    def __init__(self, csv_path, transform=None, augment_factor=1):
        self.data = pd.read_csv(csv_path)
        self.transform = transform

        unique_labels = sorted(self.data['id'].unique())
        self.label_to_idx = {label: idx for idx, label in enumerate(unique_labels)}

        self.data = pd.concat([self.data] * augment_factor, ignore_index=True)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img_path = self.data.iloc[idx]['path']
        label_id = self.data.iloc[idx]['id']

        img = Image.open('../' + img_path).convert("RGB")
        label = self.label_to_idx[label_id]

        if self.transform:
            img = self.transform(img)

        return img, label

In [10]:
def train(model, optimizer, loss_fn, train_loader, val_loader, epochs=20, device="cpu"):
    print('using device:', device)
    for epoch in range(1, epochs+1):
        start_time = time.time()
        training_loss = 0.0
        valid_loss = 0.0
        model.train()
        for batch in train_loader:
            optimizer.zero_grad()
            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            loss = loss_fn(output, targets)
            loss.backward()
            optimizer.step()
            training_loss += loss.data.item() * inputs.size(0)
        training_loss /= len(train_loader.dataset)

        model.eval()
        num_correct = 0
        num_examples = 0
        with torch.no_grad():
            for batch in val_loader:
                inputs, targets = batch
                inputs = inputs.to(device)
                targets = targets.to(device)
                output = model(inputs)
                loss = loss_fn(output,targets)
                valid_loss += loss.data.item() * inputs.size(0)
                correct = (output.argmax(dim=1) == targets)
                num_correct += torch.sum(correct).item()
                num_examples += correct.shape[0]
        valid_loss /= len(val_loader.dataset)

        torch.save({'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'num_classes' : 528}, f'checkpoint/checkpoint#{epoch}.pth')
        end_time = time.time()
        print('Epoch: {}, Training Loss: {:.2f}, Validation Loss: {:.2f}, accuracy = {:.2f}, time elapsed: {:.2f}'.format(epoch, training_loss, valid_loss, num_correct / num_examples, end_time - start_time)) 

In [11]:
def add_synthetic_noise(img_tensor):
    if random.random() < 0.5:
        noise = torch.randn_like(img_tensor) * 0.02
        img_tensor += noise
    return torch.clamp(img_tensor, 0, 1)

In [12]:
train_transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.Resize((224, 224)),
    transforms.RandomResizedCrop(224, scale=(0.6, 1.0), ratio=(0.75, 1.33)),
    transforms.RandomApply([
        transforms.ColorJitter(0.4, 0.4, 0.4, 0.2),
        transforms.GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 2.0)),
        transforms.RandomRotation(15),
        transforms.RandomPerspective(distortion_scale=0.4, p=1.0),
        transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=10)
    ], p=0.8),
    transforms.RandomGrayscale(p=0.1),
    transforms.ToTensor(),
    transforms.Lambda(add_synthetic_noise),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

csv_path = '../mtgdb.csv'

train_data = Dataset(csv_path, train_transform, augment_factor=20)
val_data = Dataset(csv_path, val_transform, augment_factor=1)

train_data_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_data_loader = DataLoader(val_data, batch_size=32, shuffle=False)

In [13]:
transfer_model = resnet18(weights=ResNet18_Weights.DEFAULT)
for name, param in transfer_model.named_parameters():
    if "bn" not in name:
        param.requires_grad = False
transfer_model.fc = nn.Sequential(
    nn.Linear(transfer_model.fc.in_features, 512),
    nn.ReLU(inplace=True),
    nn.Dropout(0.5),
    nn.Linear(512, 528)
)

In [14]:
optimizer = optim.Adam(transfer_model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transfer_model.to(device)

train(transfer_model, optimizer, loss_fn, train_data_loader, val_data_loader, epochs=30, device=device)

using device: cpu
Epoch: 1, Training Loss: 6.03, Validation Loss: 5.57, accuracy = 0.05, time elapsed: 557.86
Epoch: 2, Training Loss: 4.74, Validation Loss: 3.34, accuracy = 0.36, time elapsed: 566.46
Epoch: 3, Training Loss: 3.03, Validation Loss: 1.81, accuracy = 0.61, time elapsed: 579.95
Epoch: 4, Training Loss: 2.08, Validation Loss: 1.06, accuracy = 0.74, time elapsed: 566.21
Epoch: 5, Training Loss: 1.54, Validation Loss: 0.70, accuracy = 0.83, time elapsed: 572.96
Epoch: 6, Training Loss: 1.23, Validation Loss: 0.49, accuracy = 0.88, time elapsed: 538.44
Epoch: 7, Training Loss: 1.01, Validation Loss: 0.42, accuracy = 0.88, time elapsed: 576.33
Epoch: 8, Training Loss: 0.87, Validation Loss: 0.28, accuracy = 0.93, time elapsed: 579.62
Epoch: 9, Training Loss: 0.79, Validation Loss: 0.35, accuracy = 0.89, time elapsed: 583.65
Epoch: 10, Training Loss: 0.73, Validation Loss: 0.22, accuracy = 0.92, time elapsed: 587.29
Epoch: 11, Training Loss: 0.67, Validation Loss: 0.21, accura

KeyboardInterrupt: 