[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](http://colab.research.google.com/github/RustamyF/vision-transformer/blob/master/vit_example.ipynb)


In [1]:
!wget --no-check-certificate \
    https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip \
    -O pets


--2023-02-14 21:00:57--  https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.68.128, 74.125.24.128, 142.250.4.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.68.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 68606236 (65M) [application/zip]
Saving to: ‘pets’


2023-02-14 21:01:01 (16.0 MB/s) - ‘pets’ saved [68606236/68606236]



In [2]:
!git clone https://github.com/RustamyF/vision-transformer.git

Cloning into 'vision-transformer'...
remote: Enumerating objects: 30, done.[K
remote: Counting objects: 100% (30/30), done.[K
remote: Compressing objects: 100% (23/23), done.[K
remote: Total 30 (delta 7), reused 28 (delta 5), pack-reused 0[K
Unpacking objects: 100% (30/30), 118.22 KiB | 9.09 MiB/s, done.


In [12]:
# !unzip pets
# !rm pets
# !mv -v cats_and_dogs_filtered/validation/dogs/* cats_and_dogs_filtered/train/dogs/
# !mv -v cats_and_dogs_filtered/validation/cats/* cats_and_dogs_filtered/train/cats/

In [5]:
from vision_tr.simple_vit import Transformer

In [11]:
import torch.nn as nn
import torch
import torch.optim as optim

from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from sklearn.model_selection import train_test_split

import os


class LoadData:
    def __init__(self):
        self.cat_path = 'cats_and_dogs_filtered/train/cats'
        self.dog_path = 'cats_and_dogs_filtered/train/dogs'

    def delete_non_jpeg_files(self, directory):
        for filename in os.listdir(directory):
            if not filename.endswith('.jpg') and not filename.endswith('.jpeg'):
                file_path = os.path.join(directory, filename)
                try:
                    if os.path.isfile(file_path) or os.path.islink(file_path):
                        os.unlink(file_path)
                    elif os.path.isdir(file_path):
                        shutil.rmtree(file_path)
                    print('deleted', file_path)
                except Exception as e:
                    print('Failed to delete %s. Reason: %s' % (file_path, e))

    def data(self):
        self.delete_non_jpeg_files(self.dog_path)
        self.delete_non_jpeg_files(self.cat_path)

        dog_list = os.listdir(self.dog_path)
        dog_list = [(os.path.join(self.dog_path, i), 1) for i in dog_list]

        cat_list = os.listdir(self.cat_path)
        cat_list = [(os.path.join(self.cat_path, i), 0) for i in cat_list]

        total_list = cat_list + dog_list

        train_list, test_list = train_test_split(total_list, test_size=0.2)
        train_list, val_list = train_test_split(train_list, test_size=0.2)
        print('train list', len(train_list))
        print('test list', len(test_list))
        print('val list', len(val_list))
        return train_list, test_list, val_list


# data Augumentation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])


class dataset(torch.utils.data.Dataset):

    def __init__(self, file_list, transform=None):
        self.file_list = file_list
        self.transform = transform

    # dataset length
    def __len__(self):
        self.filelength = len(self.file_list)
        return self.filelength

    # load an one of images
    def __getitem__(self, idx):
        img_path, label = self.file_list[idx]
        img = Image.open(img_path).convert('RGB')
        img_transformed = self.transform(img)
        return img_transformed, label


class Cnn(nn.Module):
    def __init__(self):
        super(Cnn, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=0, stride=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=3, padding=0, stride=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.layer3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=0, stride=2),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.fc1 = nn.Linear(3 * 3 * 64, 10)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(10, 2)
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1)
        out = self.relu(self.fc1(out))
        out = self.fc2(out)
        return out


if __name__ == "__main__":
    lr = 0.001  # learning_rate
    batch_size = 100  # we will use mini-batch method
    epochs = 10  # How much to train a model

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    torch.manual_seed(1234)
    if device == 'cuda':
        torch.cuda.manual_seed_all(1234)

    print(device)

    load_data = LoadData()

    train_list, test_list, val_list = load_data.data()

    train_data = dataset(train_list, transform=transform)
    test_data = dataset(test_list, transform=transform)
    val_data = dataset(val_list, transform=transform)

    train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=False)
    test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(dataset=val_data, batch_size=batch_size, shuffle=True)

    model = Cnn().to(device)
    model.train()

    optimizer = optim.Adam(params=model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        epoch_loss = 0
        epoch_accuracy = 0

        for data, label in train_loader:
            data = data.to(device)
            label = label.to(device)

            output = model(data)
            loss = criterion(output, label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            acc = ((output.argmax(dim=1) == label).float().mean())
            epoch_accuracy += acc / len(train_loader)
            epoch_loss += loss / len(train_loader)

        print('Epoch : {}, train accuracy : {}, train loss : {}'.format(epoch + 1, epoch_accuracy, epoch_loss))

        with torch.no_grad():
            epoch_val_accuracy = 0
            epoch_val_loss = 0
            for data, label in val_loader:
                data = data.to(device)
                label = label.to(device)

                val_output = model(data)
                val_loss = criterion(val_output, label)

                acc = ((val_output.argmax(dim=1) == label).float().mean())
                epoch_val_accuracy += acc / len(val_loader)
                epoch_val_loss += val_loss / len(val_loader)

            print('Epoch : {}, val_accuracy : {}, val_loss : {}'.format(epoch + 1, epoch_val_accuracy, epoch_val_loss))


cuda
train list 1920
test list 600
val list 480
Epoch : 1, train accuracy : 0.565000057220459, train loss : 0.6857559680938721
Epoch : 1, val_accuracy : 0.6000000238418579, val_loss : 0.6539488434791565
Epoch : 2, train accuracy : 0.6114999651908875, train loss : 0.6514831781387329
Epoch : 2, val_accuracy : 0.6330000162124634, val_loss : 0.6340216398239136
Epoch : 3, train accuracy : 0.6214999556541443, train loss : 0.6426859498023987
Epoch : 3, val_accuracy : 0.6399999856948853, val_loss : 0.6335686445236206
Epoch : 4, train accuracy : 0.6580000519752502, train loss : 0.6204438209533691
Epoch : 4, val_accuracy : 0.6539999842643738, val_loss : 0.6156217455863953
Epoch : 5, train accuracy : 0.652999997138977, train loss : 0.6182745099067688
Epoch : 5, val_accuracy : 0.6654999852180481, val_loss : 0.6166585683822632
Epoch : 6, train accuracy : 0.6804999113082886, train loss : 0.5977426171302795
Epoch : 6, val_accuracy : 0.6725000143051147, val_loss : 0.611417293548584
Epoch : 7, train ac

In [10]:
import torch.nn as nn
import torch
import torch.optim as optim
from vision_tr.simple_vit import ViT
# from vit_pytorch.efficient import ViT
from linformer import Linformer
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from sklearn.model_selection import train_test_split

import os

# from linformer import Linformer
from torch.optim.lr_scheduler import StepLR
# from vit_pytorch.efficient import ViT


class LoadData:

    def __init__(self):
        self.cat_path = 'cats_and_dogs_filtered/train/cats'
        self.dog_path = 'cats_and_dogs_filtered/train/dogs'

    def delete_non_jpeg_files(self, directory):
        for filename in os.listdir(directory):
            if not filename.endswith('.jpg') and not filename.endswith('.jpeg'):
                file_path = os.path.join(directory, filename)
                try:
                    if os.path.isfile(file_path) or os.path.islink(file_path):
                        os.unlink(file_path)
                    elif os.path.isdir(file_path):
                        shutil.rmtree(file_path)
                    print('deleted', file_path)
                except Exception as e:
                    print('Failed to delete %s. Reason: %s' % (file_path, e))

    def data(self):
        self.delete_non_jpeg_files(self.dog_path)
        self.delete_non_jpeg_files(self.cat_path)

        dog_list = os.listdir(self.dog_path)
        dog_list = [(os.path.join(self.dog_path, i), 1) for i in dog_list]

        cat_list = os.listdir(self.cat_path)
        cat_list = [(os.path.join(self.cat_path, i), 0) for i in cat_list]

        total_list = cat_list + dog_list

        train_list, test_list = train_test_split(total_list, test_size=0.2)
        train_list, val_list = train_test_split(train_list, test_size=0.2)
        print('train list', len(train_list))
        print('test list', len(test_list))
        print('val list', len(val_list))
        return train_list, test_list, val_list


# data Augumentation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])


class dataset(torch.utils.data.Dataset):

    def __init__(self, file_list, transform=None):
        self.file_list = file_list
        self.transform = transform

    # dataset length
    def __len__(self):
        self.filelength = len(self.file_list)
        return self.filelength

    # load an one of images
    def __getitem__(self, idx):
        img_path, label = self.file_list[idx]
        img = Image.open(img_path).convert('RGB')
        img_transformed = self.transform(img)
        return img_transformed, label



if __name__ == "__main__":
    # Training settings
    batch_size = 64
    epochs = 20
    lr = 3e-5
    gamma = 0.7
    seed = 42

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    torch.manual_seed(1234)
    if device == 'cuda':
        torch.cuda.manual_seed_all(1234)

    print(device)

    load_data = LoadData()

    train_list, test_list, val_list = load_data.data()

    train_data = dataset(train_list, transform=transform)
    test_data = dataset(test_list, transform=transform)
    val_data = dataset(val_list, transform=transform)

    train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=False)
    test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(dataset=val_data, batch_size=batch_size, shuffle=True)
    model = ViT(
        image_size=224,
        patch_size=32,
        num_classes=2,
        dim=128,
        depth=12,
        heads=8,
        mlp_dim=1024,
        dropout=0.1,
        emb_dropout=0.1,
    ).to(device)

    # loss function
    criterion = nn.CrossEntropyLoss()
    # optimizer
    optimizer = optim.Adam(model.parameters(), lr=lr)
    # scheduler
    scheduler = StepLR(optimizer, step_size=1, gamma=gamma)

    epochs = 20

    for epoch in range(epochs):
        epoch_loss = 0
        epoch_accuracy = 0

        for data, label in train_loader:
            data = data.to(device)
            label = label.to(device)

            output = model(data)
            loss = criterion(output, label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            acc = ((output.argmax(dim=1) == label).float().mean())
            epoch_accuracy += acc / len(train_loader)
            epoch_loss += loss / len(train_loader)

        print('Epoch : {}, train accuracy : {}, train loss : {}'.format(epoch + 1, epoch_accuracy, epoch_loss))

        with torch.no_grad():
            epoch_val_accuracy = 0
            epoch_val_loss = 0
            for data, label in val_loader:
                data = data.to(device)
                label = label.to(device)

                val_output = model(data)
                val_loss = criterion(val_output, label)

                acc = ((val_output.argmax(dim=1) == label).float().mean())
                epoch_val_accuracy += acc / len(val_loader)
                epoch_val_loss += val_loss / len(val_loader)

            print('Epoch : {}, val_accuracy : {}, val_loss : {}'.format(epoch + 1, epoch_val_accuracy, epoch_val_loss))


cuda
train list 1920
test list 600
val list 480
Epoch : 1, train accuracy : 0.4994792342185974, train loss : 0.7245917320251465
Epoch : 1, val_accuracy : 0.490234375, val_loss : 0.7057166695594788
Epoch : 2, train accuracy : 0.5265625715255737, train loss : 0.6961603164672852
Epoch : 2, val_accuracy : 0.546875, val_loss : 0.6814125776290894
Epoch : 3, train accuracy : 0.5187500715255737, train loss : 0.6930896043777466
Epoch : 3, val_accuracy : 0.515625, val_loss : 0.6940701603889465
Epoch : 4, train accuracy : 0.5375000238418579, train loss : 0.6871858835220337
Epoch : 4, val_accuracy : 0.53515625, val_loss : 0.6902409791946411
Epoch : 5, train accuracy : 0.5598958730697632, train loss : 0.6862000823020935
Epoch : 5, val_accuracy : 0.54296875, val_loss : 0.6816489696502686
Epoch : 6, train accuracy : 0.5598958730697632, train loss : 0.6801536083221436
Epoch : 6, val_accuracy : 0.537109375, val_loss : 0.6803664565086365
Epoch : 7, train accuracy : 0.55364590883255, train loss : 0.68091