## **Import frameworks and libraries**

In [2]:
import shutil
import os
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision.transforms import transforms
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
from collections import Counter
from tqdm.auto import tqdm

device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.manual_seed(42)
torch.cuda.manual_seed(42)
torch.cuda.manual_seed_all(42)

In [3]:
from google.colab import files
file = files.upload() # kaggel.json
os.makedirs("/root/.kaggle", exist_ok=True)
shutil.move("kaggle.json", "/root/.kaggle/")
!kaggle competitions download -c aio-hutech
os.mkdir("data/")
!unzip aio-hutech.zip -d data

Saving kaggle.json to kaggle.json
Archive:  aio-hutech.zip
  inflating: data/submission.csv     
  inflating: data/test/001.jpg       
  inflating: data/test/002.jpg       
  inflating: data/test/003.jpg       
  inflating: data/test/004.jpg       
  inflating: data/test/005.jpg       
  inflating: data/test/006.jpg       
  inflating: data/test/007.jpg       
  inflating: data/test/008.jpg       
  inflating: data/test/009.jpg       
  inflating: data/test/010.jpg       
  inflating: data/test/011.jpg       
  inflating: data/test/012.jpg       
  inflating: data/test/013.jpg       
  inflating: data/test/014.jpg       
  inflating: data/test/015.jpg       
  inflating: data/test/016.jpg       
  inflating: data/test/017.jpg       
  inflating: data/test/018.jpg       
  inflating: data/test/019.jpg       
  inflating: data/test/020.jpg       
  inflating: data/test/021.jpg       
  inflating: data/test/022.jpg       
  inflating: data/test/023.jpg       
  inflating: data/test/024.jp

## **Create Dataset**

In [4]:
# Define Dataset
class AugmentedDataset(Dataset):
  def __init__(self, root, transform_list):
    super().__init__()
    self.dataset = torchvision.datasets.ImageFolder(root=root,
                                                    transform=None,
                                                    target_transform=None)
    self.transform_list = transform_list

  def __len__(self):
    return len(self.transform_list) * len(self.dataset.samples)

  def __getitem__(self, idx):
    samples = self.dataset.samples

    vt_idx = idx % len(samples)
    transform_idx = idx // len(samples)

    img, label = self.transform_list[transform_idx](torchvision.io.read_image(path=samples[vt_idx][0])), samples[vt_idx][1]

    return img, label

class TestDataset(Dataset):
  def __init__(self, data_dir, transform=None):
    super().__init__()

    self.data_dir = data_dir
    self.file_names = sorted(os.listdir(data_dir))
    self.names = [name.split('.')[0] for name in self.file_names]

    self.transform = transform

  def __len__(self):
    return len(self.names)

  def __getitem__(self, idx):
    image = torchvision.io.read_image(path=os.path.join(self.data_dir, self.file_names[idx]))

    if self.transform is not None:
      image = self.transform(image)

    return image, -1

## **Neccessary Methods**

In [31]:
def load_all_data(data_dir):
  """
  Only create train_loader
  """

  default_transforms = transforms.Compose([
      transforms.ConvertImageDtype(torch.float32),
      transforms.Resize((128, 128)),
      transforms.Normalize(mean=[0.439673, 0.394798, 0.360261], std=[0.184119, 0.177669, 0.170195]),
  ])

  transform1 = transforms.Compose([
      transforms.ConvertImageDtype(torch.float32),
      transforms.RandomHorizontalFlip(),
      transforms.RandomVerticalFlip(),
      transforms.RandomResizedCrop(size=(128, 128), scale=(0.9, 1)),
      transforms.Normalize(mean=[0.439673, 0.394798, 0.360261], std=[0.184119, 0.177669, 0.170195]),

  ])

  transform2 = transforms.Compose([
      transforms.ConvertImageDtype(torch.float32),
      transforms.Resize(size=(128, 128)),
      transforms.ColorJitter(brightness=0.2, contrast=0.2),
      transforms.Normalize(mean=[0.439673, 0.394798, 0.360261], std=[0.184119, 0.177669, 0.170195]),
  ])

  dataset = AugmentedDataset(root=data_dir,
                             transform_list=[default_transforms,transform1, transform2])

  train_loader = DataLoader(dataset,
                            batch_size=32,
                            shuffle=True)

  return train_loader

def create_effnetb6():
  weights = torchvision.models.EfficientNet_B6_Weights.IMAGENET1K_V1

  model = torchvision.models.efficientnet_b6(weights=weights)

  model.classifier = nn.Sequential(
      nn.Linear(in_features=2304, out_features=1024),
      nn.Dropout(p=0.5),
      nn.BatchNorm1d(num_features=1024),
      nn.ReLU(),
      nn.Linear(in_features=1024, out_features=512),
      nn.Dropout(p=0.5),
      nn.BatchNorm1d(num_features=512),
      nn.ReLU(),
      nn.Linear(in_features=512, out_features=4)
  )

  return model

def train_all(model : nn.Module,
          train_loader : DataLoader,
          loss_fn : nn,
          optimizer : torch.optim,
          num_epochs,
          device=device):

  model.to(device)
  scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)
  preds = []
  for epoch in tqdm(range(num_epochs)):
    model.train()
    for i, (X, y) in enumerate(train_loader):
      X, y = X.to(device), y.to(device)

      y_logits = model(X)
      loss = loss_fn(y_logits, y)

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

    scheduler.step()
    preds.append(test(model, 'data'))

  return preds

def test(model, root):
  """"
    Predict and log
  """

  manual_transforms = transforms.Compose([
                          transforms.ConvertImageDtype(torch.float32),
                          transforms.Resize((128, 128)),
                          transforms.Normalize(mean=[0.439673, 0.394798, 0.360261], std=[0.184119, 0.177669, 0.170195]),
                      ])
  test_ds = TestDataset(data_dir=os.path.join(root, 'test'),
                        transform=manual_transforms)

  test_loader = DataLoader(dataset=test_ds,
                           batch_size=32,
                           shuffle=False)
  preds = []
  model.eval()

  for idx, (X, _) in enumerate(test_loader):
    X = X.to(device)
    with torch.inference_mode():
      y_logits = model(X)

      y_preds = torch.argmax(torch.softmax(y_logits, dim=-1), dim=-1).cpu().numpy()
      preds.extend(y_preds)

  class_mapping = {
    0 : 1,
    1 : 3,
    2 : 0,
    3 : 2
  }
  preds = [class_mapping[label] for label in preds]

  df = pd.DataFrame(data = {
      'id' : test_ds.names,
      'type' : preds
  })
  df.set_index('id', inplace=True)
  df.to_csv(os.path.join(root, "submission.csv"))

  test_labels = torch.tensor([[1]*50, [2]*50, [3]*50, [0]*50]).reshape(-1)
  return (test_labels == torch.tensor(preds)).sum().item() / len(test_labels)

## **Train and Test**

In [None]:
torch.cuda.empty_cache() # Free cuda RAM
torch.manual_seed(42)
torch.cuda.manual_seed(42)
torch.cuda.manual_seed_all(42)

train_loader = load_all_data(data_dir='data/train')

model = create_effnetb6()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)

# Training
preds = train_all(model=model,
          train_loader=train_loader,
          loss_fn=loss_fn,
          optimizer=optimizer,
          num_epochs=20,
          device=device)

# Testing
print(preds)
test(model, 'data')

  0%|          | 0/20 [00:00<?, ?it/s]