# Visual Transformer with Linformer

Training Visual Transformer on *Dogs vs Cats Data*

* Dogs vs. Cats Redux: Kernels Edition - https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition
* Base Code - https://www.kaggle.com/reukki/pytorch-cnn-tutorial-with-cats-and-dogs/
* Effecient Attention Implementation - https://github.com/lucidrains/vit-pytorch#efficient-attention

In [1]:
%pip install --upgrade tqdm

Collecting tqdm
  Downloading tqdm-4.62.2-py2.py3-none-any.whl (76 kB)
[K     |████████████████████████████████| 76 kB 633 kB/s eta 0:00:01
[?25hInstalling collected packages: tqdm
  Attempting uninstall: tqdm
    Found existing installation: tqdm 4.61.1
    Uninstalling tqdm-4.61.1:
      Successfully uninstalled tqdm-4.61.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-cloud 0.1.13 requires tensorflow<3.0,>=1.15.0, which is not installed.[0m
Successfully installed tqdm-4.62.2
Note: you may need to restart the kernel to use updated packages.


## Import Libraries

In [2]:
from __future__ import print_function

import glob
from itertools import chain
import os
import random
import zipfile
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
from tqdm.notebook import tqdm

In [3]:
%pip install timm

Collecting timm
  Downloading timm-0.4.12-py3-none-any.whl (376 kB)
[K     |████████████████████████████████| 376 kB 626 kB/s eta 0:00:01
Installing collected packages: timm
Successfully installed timm-0.4.12
Note: you may need to restart the kernel to use updated packages.


In [4]:
%pip install einops

Collecting einops
  Downloading einops-0.3.2-py3-none-any.whl (25 kB)
Installing collected packages: einops
Successfully installed einops-0.3.2
Note: you may need to restart the kernel to use updated packages.


In [7]:
import sys
sys.path
sys.path.append('/kaggle/input/pvt-conv-code')
from pvt_conv import PvtConv

In [8]:
print(f"Torch: {torch.__version__}")

Torch: 1.7.0


In [9]:
# Training settings
batch_size = 64
epochs = 20
lr = 3e-5
gamma = 0.7
seed = 42

In [10]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(seed)

In [11]:
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'
device

'cuda'

## Load Data

In [15]:
os.makedirs('data', exist_ok=True)

In [12]:
train_dir = 'data/train'
test_dir = 'data/test'

In [16]:
with zipfile.ZipFile('/kaggle/input/dogs-vs-cats-redux-kernels-edition/train.zip') as train_zip:
    train_zip.extractall('data')
    
with zipfile.ZipFile('/kaggle/input/dogs-vs-cats-redux-kernels-edition//test.zip') as test_zip:
    test_zip.extractall('data')

In [17]:
train_list = glob.glob(os.path.join(train_dir,'*.jpg'))
test_list = glob.glob(os.path.join(test_dir, '*.jpg'))

In [18]:
print(f"Train Data: {len(train_list)}")
print(f"Test Data: {len(test_list)}")

Train Data: 25000
Test Data: 12500


In [19]:
labels = [path.split('/')[-1].split('.')[0] for path in train_list]

## Random Plots

In [20]:
random_idx = np.random.randint(1, len(train_list), size=9)
fig, axes = plt.subplots(3, 3, figsize=(16, 12))

for idx, ax in enumerate(axes.ravel()):
    img = Image.open(train_list[idx])
    ax.set_title(labels[idx])
    ax.imshow(img)


SyntaxError: invalid syntax (<ipython-input-20-0c61af6564c9>, line 1)

## Split

In [21]:
train_list, valid_list = train_test_split(train_list, 
                                          test_size=0.2,
                                          stratify=labels,
                                          random_state=seed)

In [22]:
print(f"Train Data: {len(train_list)}")
print(f"Validation Data: {len(valid_list)}")
print(f"Test Data: {len(test_list)}")

Train Data: 20000
Validation Data: 5000
Test Data: 12500


## Image Augumentation

In [24]:
train_transforms = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ]
)

val_transforms = transforms.Compose(
    [
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ]
)


test_transforms = transforms.Compose(
    [
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ]
)


## Load Datasets

In [25]:
class CatsDogsDataset(Dataset):
    def __init__(self, file_list, transform=None):
        self.file_list = file_list
        self.transform = transform

    def __len__(self):
        self.filelength = len(self.file_list)
        return self.filelength

    def __getitem__(self, idx):
        img_path = self.file_list[idx]
        img = Image.open(img_path)
        img_transformed = self.transform(img)

        label = img_path.split("/")[-1].split(".")[0]
        label = 1 if label == "dog" else 0

        return img_transformed, label


In [26]:
train_data = CatsDogsDataset(train_list, transform=train_transforms)
valid_data = CatsDogsDataset(valid_list, transform=test_transforms)
test_data = CatsDogsDataset(test_list, transform=test_transforms)

In [27]:
train_loader = DataLoader(dataset = train_data, batch_size=batch_size, shuffle=True )
valid_loader = DataLoader(dataset = valid_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset = test_data, batch_size=batch_size, shuffle=True)

In [28]:
print(len(train_data), len(train_loader))

20000 313


In [29]:
print(len(valid_data), len(valid_loader))

5000 79


## Effecient Attention

In [30]:
model = PvtConv(num_classes=2).to(device)

### Training

In [34]:
import torch.optim as optim

In [35]:
# loss function
criterion = nn.CrossEntropyLoss()
# optimizer
optimizer = optim.Adam(model.parameters(), lr=lr)
# scheduler
scheduler = StepLR(optimizer, step_size=1, gamma=gamma)

In [36]:
for epoch in range(epochs):
    epoch_loss = 0
    epoch_accuracy = 0

    for data, label in tqdm(train_loader):
        data = data.to(device)
        label = label.to(device)

        output = model(data)
        loss = criterion(output, label)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        acc = (output.argmax(dim=1) == label).float().mean()
        epoch_accuracy += acc / len(train_loader)
        epoch_loss += loss / len(train_loader)

    with torch.no_grad():
        epoch_val_accuracy = 0
        epoch_val_loss = 0
        for data, label in valid_loader:
            data = data.to(device)
            label = label.to(device)

            val_output = model(data)
            val_loss = criterion(val_output, label)

            acc = (val_output.argmax(dim=1) == label).float().mean()
            epoch_val_accuracy += acc / len(valid_loader)
            epoch_val_loss += val_loss / len(valid_loader)

    print(
        f"Epoch : {epoch+1} - loss : {epoch_loss:.4f} - acc: {epoch_accuracy:.4f} - val_loss : {epoch_val_loss:.4f} - val_acc: {epoch_val_accuracy:.4f}\n"
    )


  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 1 - loss : 0.7089 - acc: 0.5011 - val_loss : 0.7007 - val_acc: 0.5196



  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 2 - loss : 0.6918 - acc: 0.5221 - val_loss : 0.6822 - val_acc: 0.5348



  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 3 - loss : 0.6849 - acc: 0.5495 - val_loss : 0.6731 - val_acc: 0.5791



  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 4 - loss : 0.6832 - acc: 0.5526 - val_loss : 0.6737 - val_acc: 0.5763



  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 5 - loss : 0.6738 - acc: 0.5758 - val_loss : 0.6641 - val_acc: 0.6003



  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 6 - loss : 0.6596 - acc: 0.6048 - val_loss : 0.6558 - val_acc: 0.5979



  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 7 - loss : 0.6421 - acc: 0.6283 - val_loss : 0.6207 - val_acc: 0.6752



  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 8 - loss : 0.6191 - acc: 0.6532 - val_loss : 0.5685 - val_acc: 0.7011



  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 9 - loss : 0.5959 - acc: 0.6740 - val_loss : 0.5650 - val_acc: 0.7055



  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 10 - loss : 0.5805 - acc: 0.6883 - val_loss : 0.5425 - val_acc: 0.7166



  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 11 - loss : 0.5659 - acc: 0.7036 - val_loss : 0.5168 - val_acc: 0.7482



  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 12 - loss : 0.5543 - acc: 0.7086 - val_loss : 0.5050 - val_acc: 0.7599



  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 13 - loss : 0.5463 - acc: 0.7166 - val_loss : 0.4920 - val_acc: 0.7644



  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 14 - loss : 0.5365 - acc: 0.7280 - val_loss : 0.4920 - val_acc: 0.7609



  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 15 - loss : 0.5270 - acc: 0.7334 - val_loss : 0.5030 - val_acc: 0.7563



  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 16 - loss : 0.5223 - acc: 0.7368 - val_loss : 0.4919 - val_acc: 0.7607



  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 17 - loss : 0.5106 - acc: 0.7455 - val_loss : 0.4604 - val_acc: 0.7811



  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 18 - loss : 0.5064 - acc: 0.7470 - val_loss : 0.4696 - val_acc: 0.7733



  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 19 - loss : 0.5018 - acc: 0.7528 - val_loss : 0.4638 - val_acc: 0.7769



  0%|          | 0/313 [00:00<?, ?it/s]

Epoch : 20 - loss : 0.4964 - acc: 0.7532 - val_loss : 0.4558 - val_acc: 0.7826

