In [1]:
!pip -q install vit_pytorch linformer

[0m

In [2]:
from __future__ import print_function

import glob
from itertools import chain
import os
import random
import zipfile

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from linformer import Linformer
from PIL import Image
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
from tqdm.notebook import tqdm

from vit_pytorch.efficient import ViT

In [3]:
# Training settings
batch_size = 256
epochs = 20
lr = 0.0003
gamma = 0.7
seed = 42

In [4]:
device = 'cuda'

In [5]:
train_dir = '../input/iais22-birds/birds/birds'
test_dir = '../input/iais22-birds/submission_test'

In [6]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(seed)

In [7]:
def get_classes(data_dir):
    all_data = datasets.ImageFolder(data_dir)
    return all_data.classes

In [8]:
train_list = []
classes = get_classes(train_dir)
for c in classes:
    train_list = train_list + (glob.glob(os.path.join(train_dir+"/"+c,'*.jpg')))
    
print(f"Train Data: {len(train_list)}")

Train Data: 58388


In [9]:
labels = [path.split('/')[-2] for path in train_list]

In [10]:
train_list, valid_list = train_test_split(train_list, 
                                          test_size=0.2,
                                          stratify=labels,
                                          random_state=seed)

labels2 = [path.split('/')[-2] for path in valid_list]

test_list, valid_list = train_test_split(valid_list, 
                                          test_size=0.5,
                                          stratify=labels2,
                                          random_state=seed)

In [11]:
print(f"Train Data: {len(train_list)}")
print(f"Validation Data: {len(valid_list)}")
print(f"Test Data: {len(test_list)}")

Train Data: 46710
Validation Data: 5839
Test Data: 5839


In [12]:
train_transforms = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ]
)

val_transforms = transforms.Compose(
    [
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ]
)


test_transforms = transforms.Compose(
    [
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ]
)

In [13]:
class BirdsDataset(Dataset):
    def __init__(self, file_list, transform=None):
        self.file_list = file_list
        self.transform = transform

    def __len__(self):
        self.filelength = len(self.file_list)
        return self.filelength

    def __getitem__(self, idx):
        img_path = self.file_list[idx]
        img = Image.open(img_path)
        img_transformed = self.transform(img)

        label = img_path.split("/")[-2]
        label = classes.index(label)

        return img_transformed, label

In [14]:
train_data = BirdsDataset(train_list, transform=train_transforms)
valid_data = BirdsDataset(valid_list, transform=val_transforms)
test_data = BirdsDataset(test_list, transform=test_transforms)

In [15]:
train_loader = DataLoader(dataset = train_data, batch_size=batch_size, shuffle=True, num_workers=2)
valid_loader = DataLoader(dataset = valid_data, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(dataset = test_data, batch_size=batch_size, shuffle=True, num_workers=2)

In [16]:
print(len(train_data), len(train_loader))

46710 183


In [17]:
print(len(valid_data), len(valid_loader))

5839 23


In [18]:
#Esta funcion se ha importado
efficient_transformer = Linformer(
    dim=224,
    seq_len=49+1,  # 7x7 patches + 1 cls-token
    depth=12,
    heads=8,
    k=64
)

In [19]:
model = ViT(
    dim=224,
    image_size=224,
    patch_size=32,
    num_classes=400,
    transformer=efficient_transformer,
    channels=3,
).to(device)

In [20]:
# loss function
criterion = nn.CrossEntropyLoss()
# optimizer
optimizer = optim.Adam(model.parameters(), lr=lr)
# scheduler
scheduler = StepLR(optimizer, step_size=1, gamma=gamma)

In [21]:
for epoch in range(epochs):
    epoch_loss = 0
    epoch_accuracy = 0

    for data, label in tqdm(train_loader):
        data = data.to(device)
        label = label.to(device)

        output = model(data)
        loss = criterion(output, label)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        acc = (output.argmax(dim=1) == label).float().mean()
        epoch_accuracy += acc / len(train_loader)
        epoch_loss += loss / len(train_loader)

    with torch.no_grad():
        epoch_val_accuracy = 0
        epoch_val_loss = 0
        for data, label in valid_loader:
            data = data.to(device)
            label = label.to(device)

            val_output = model(data)
            val_loss = criterion(val_output, label)

            acc = (val_output.argmax(dim=1) == label).float().mean()
            epoch_val_accuracy += acc / len(valid_loader)
            epoch_val_loss += val_loss / len(valid_loader)

    print(
        f"Epoch : {epoch+1} - loss : {epoch_loss:.4f} - acc: {epoch_accuracy:.4f} - val_loss : {epoch_val_loss:.4f} - val_acc: {epoch_val_accuracy:.4f}\n"
    )

  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 1 - loss : 5.9438 - acc: 0.0064 - val_loss : 5.4919 - val_acc: 0.0214



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 2 - loss : 5.2126 - acc: 0.0389 - val_loss : 4.7243 - val_acc: 0.0785



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 3 - loss : 4.7513 - acc: 0.0777 - val_loss : 4.2786 - val_acc: 0.1386



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 4 - loss : 4.4386 - acc: 0.1137 - val_loss : 3.9987 - val_acc: 0.1782



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 5 - loss : 4.1833 - acc: 0.1478 - val_loss : 3.7227 - val_acc: 0.2189



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 6 - loss : 3.9599 - acc: 0.1793 - val_loss : 3.5451 - val_acc: 0.2439



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 7 - loss : 3.7621 - acc: 0.2100 - val_loss : 3.2839 - val_acc: 0.2910



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 8 - loss : 3.5792 - acc: 0.2426 - val_loss : 3.1150 - val_acc: 0.3166



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 9 - loss : 3.4094 - acc: 0.2707 - val_loss : 2.9634 - val_acc: 0.3469



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 10 - loss : 3.2613 - acc: 0.2967 - val_loss : 2.8248 - val_acc: 0.3709



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 11 - loss : 3.1135 - acc: 0.3234 - val_loss : 2.6775 - val_acc: 0.3948



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 12 - loss : 2.9910 - acc: 0.3449 - val_loss : 2.5659 - val_acc: 0.4106



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 13 - loss : 2.8688 - acc: 0.3675 - val_loss : 2.4629 - val_acc: 0.4397



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 14 - loss : 2.7557 - acc: 0.3892 - val_loss : 2.3659 - val_acc: 0.4626



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 15 - loss : 2.6293 - acc: 0.4134 - val_loss : 2.2841 - val_acc: 0.4750



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 16 - loss : 2.5127 - acc: 0.4378 - val_loss : 2.2009 - val_acc: 0.4920



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 17 - loss : 2.4148 - acc: 0.4575 - val_loss : 2.1353 - val_acc: 0.5034



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 18 - loss : 2.3163 - acc: 0.4737 - val_loss : 2.0662 - val_acc: 0.5149



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 19 - loss : 2.2178 - acc: 0.4953 - val_loss : 2.0109 - val_acc: 0.5287



  0%|          | 0/183 [00:00<?, ?it/s]

Epoch : 20 - loss : 2.1181 - acc: 0.5132 - val_loss : 1.9466 - val_acc: 0.5455



In [22]:
def test(model):
  test_loss = 0.0
  class_correct = list(0. for i in range(len(classes)))
  class_total = list(0. for i in range(len(classes)))

  model.eval()

  for data, target in tqdm(test_loader):
      if torch.cuda.is_available(): 
          data, target = data.cuda(), target.cuda()
      with torch.no_grad():
        output = model(data)
        loss = criterion(output, target)
      test_loss += loss.item()*data.size(0)
      _, pred = torch.max(output, 1)    
      correct_tensor = pred.eq(target.data.view_as(pred))
      correct = np.squeeze(correct_tensor.numpy()) if not torch.cuda.is_available() else np.squeeze(correct_tensor.cpu().numpy())
      if len(target) == 64:
        for i in range(64):
            label = target.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1

  test_loss = test_loss/len(test_loader.dataset)
  print('Test Loss: {:.6f}\n'.format(test_loss))

  for i in range(len(classes)):
      if class_total[i] > 0:
          print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
              classes[i], 100 * class_correct[i] / class_total[i],
              np.sum(class_correct[i]), np.sum(class_total[i])))
      else:
          print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

  print('\nTest Accuracy (Overall): {:.4f} ({}/{})'.format(
      100. * np.sum(class_correct) / np.sum(class_total),
      np.sum(class_correct), np.sum(class_total)))

In [23]:
test(model)

  0%|          | 0/23 [00:00<?, ?it/s]

Test Loss: 1.933814

Test Accuracy of ABBOTTS BABBLER: N/A (no training examples)
Test Accuracy of ABBOTTS BOOBY: N/A (no training examples)
Test Accuracy of ABYSSINIAN GROUND HORNBILL: N/A (no training examples)
Test Accuracy of AFRICAN CROWNED CRANE: N/A (no training examples)
Test Accuracy of AFRICAN EMERALD CUCKOO: N/A (no training examples)
Test Accuracy of AFRICAN FIREFINCH: N/A (no training examples)
Test Accuracy of AFRICAN OYSTER CATCHER: N/A (no training examples)
Test Accuracy of ALBATROSS: N/A (no training examples)
Test Accuracy of ALBERTS TOWHEE: N/A (no training examples)
Test Accuracy of ALEXANDRINE PARAKEET: N/A (no training examples)
Test Accuracy of ALPINE CHOUGH: N/A (no training examples)
Test Accuracy of ALTAMIRA YELLOWTHROAT: N/A (no training examples)
Test Accuracy of AMERICAN AVOCET: N/A (no training examples)
Test Accuracy of AMERICAN BITTERN: N/A (no training examples)
Test Accuracy of AMERICAN COOT: N/A (no training examples)
Test Accuracy of AMERICAN GOLDFI

