https://www.analyticsvidhya.com/blog/2021/06/how-to-load-kaggle-datasets-directly-into-google-colab/

In [1]:
#data is a csv file w one column denoting names and another denoting the corresponding Jpeg

#Vit (Adam)

#tutorial to use: https://theaisummer.com/hugging-face-vit/
#uses CIFAR10 

#Resnet 50 (David)

#Cvt (Vivian)

Load Dataset :(

Part I. ViT (Adam)

In [1]:
import numpy as np
import pandas as pd

import shutil

import torch
import torch.nn as nn

from torch.utils.data import Dataset, DataLoader
from torch.nn.modules import Module
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor, Resize
import torchvision.transforms as transforms
import torchvision.models as models

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
import tqdm.notebook as t

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [2]:
main_path = './WondersOfTheWorld'
dir_list = os.listdir(main_path)

for num, path in enumerate(dir_list):
    print(f'{path} : {len(os.listdir(os.path.join(main_path, path)))}')

Train : 1
Valid : 1


In [3]:
BATCH_SIZE = 8
EPOCHS = 10
t_size = 16
DEVICE = 'cuda'

In [4]:
def get_train_val_image_list(main_path, train_size, val_size):
    train_set = dict()
    val_set = dict()
    
    dir_list = os.listdir(main_path)
    
    for path in t.tqdm_notebook(dir_list):
        file_list = os.listdir(os.path.join(main_path, path))

        total_files = len(file_list)
        train_list = file_list[:train_size]
        val_list = file_list[train_size:train_size+val_size]
        
        train_set[path] = [os.path.join(main_path, path,n) for n in t.tqdm_notebook(train_list)]
        val_set[path] = [os.path.join(main_path, path,n) for n in t.tqdm_notebook(val_list)]
        
        
    return train_set, val_set
    
def get_class_list(train_set):
    idx_to_classes = dict()
    classes_to_idx = dict()
    
    for count, key in enumerate(train_set.keys()):
        idx_to_classes[count] = key
        classes_to_idx[key] = count
        
    return idx_to_classes, classes_to_idx
def get_average_height_width(train_set):
    total_files = 0
    total_height = 0
    total_width = 0
    
    for key, img_list in t.tqdm_notebook(train_set.items()):
        for img in img_list:
            total_width += img.shape[0]
            total_height += img.shape[1]
            total_files += 1
            
    return int(total_width / total_files), int(total_height / total_files)

In [5]:
class WoWDataset(Dataset):
    def __init__(self, data, num_classes ):
        self.data = data
        self.num_classes = num_classes
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        
        img = self.data[idx][0]
        label = self.data[idx][1]
        
        o_label = [0] * self.num_classes
        o_label[label] = 1
        r = r = transforms.Compose([
                      transforms.Resize([16, 16]),
                      transforms.RandomRotation(180),
                      transforms.GaussianBlur(kernel_size=(5), sigma=(0.1, 5))
                  ])
        o_label = torch.tensor(o_label, dtype=torch.float)
        return r(img), o_label

In [6]:
train_data = ImageFolder('./WondersOfTheWorld/Train', transform = ToTensor())
val_data = ImageFolder('./WondersOfTheWorld/Valid', transform = ToTensor())

In [7]:
train_dataset = WoWDataset(train_data, 12)
val_dataset = WoWDataset(val_data, 12)

In [8]:
train_dl = DataLoader(train_dataset, BATCH_SIZE, pin_memory=True, shuffle=True)
val_dl =DataLoader(val_dataset, BATCH_SIZE, pin_memory=True)

Create Model/Import Model

In [9]:
class VITModel(Module):
    def __init__(self, num_channels):
        super(VITModel, self).__init__()
        
        self.loss_fn = nn.BCELoss(reduction='mean')

        self.transform1 = torch.nn.Sequential(
            transforms.Resize((32,32))
        )
        
        self.convnext_large = models.convnext_large(pretrained=True)
        self.flatten1 = nn.Flatten()
        self.linear1 = nn.Linear(1000,1000)
        self.linear2 = nn.Linear(1000,1000)
        self.linear3 = nn.Linear(1000,12)
        self.sigmoid1 = nn.Sigmoid()
        
    def forward(self, x, y):

        output = x
        output = self.transform1(output)
        output = self.convnext_large(output)
        output = self.linear1(output)
        output = self.linear2(output)
        output = self.linear3(output)
        output = self.sigmoid1(output)

        loss = self.loss_fn(output, y)
        
        return loss, output
    
    def predict(self, x):
        
        output = x
        output = self.convnext_large(output) 
        output = self.linear1(output)
        output = self.linear2(output)
        output = self.linear3(output)
        output = self.sigmoid1(output)

        return np.argmax(output.detach().numpy(), axis=1)
    
    def train_using_train_dl(self, optimizer, train_dl):
        
        self.train()
        
        losses = []
        acc = []
        pre = []
        re = []
        f1 = []
        
        total = len(train_dl)

        for idx , data in enumerate(train_dl):
            image, label = data
            image = image.to(DEVICE)
            label = label.to(DEVICE)
            
            optimizer.zero_grad()
            
            loss, output = self(image, label)
            losses.append(loss.item())
            loss.backward()
            
            optimizer.step()
            
            acc.append(accuracy_score(np.argmax(label.cpu(), axis=1), np.argmax(output.cpu().detach().numpy(), axis=1)))
            pre.append(precision_score(np.argmax(label.cpu(), axis=1), np.argmax(output.cpu().detach().numpy(), axis=1), average='macro', zero_division=1))
            re.append(recall_score(np.argmax(label.cpu(), axis=1), np.argmax(output.cpu().detach().numpy(), axis=1), average='macro', zero_division=1))
            f1.append(f1_score(np.argmax(label.cpu(), axis=1), np.argmax(output.cpu().detach().numpy(), axis=1), average='macro', zero_division=1))
            
            print(f'{idx} / {total} - Loss : {sum(losses) / len(losses):0.4f}, Accuracy : {sum(acc) / len(acc):0.4f}, F1 : {sum(f1) / len(f1):0.4f} P : {sum(pre) / len(pre):0.4f}, R : {sum(re)/ len(re):0.4f}', end='\r')
            
        return sum(losses) / len(losses) , sum(acc) / len(acc), sum(f1) / len(f1), sum(pre) / len(pre), sum(re) / len(re)
    
    def evaluate_using_val_dl(self, val_dl):
        
        self.eval()
        
        losses = []
        acc = []
        pre = []
        re = []
        f1 = []
        total = len(val_dl)
        
        with torch.no_grad():
            
            for idx, data in enumerate(val_dl):
                image, label = data
                image = image.to(DEVICE)
                label = label.to(DEVICE)

                loss, output = self(image, label)
                losses.append(loss.item())

                acc.append(accuracy_score(np.argmax(label.cpu(), axis=1), np.argmax(output.cpu().detach().numpy(), axis=1)))
                pre.append(precision_score(np.argmax(label.cpu(), axis=1), np.argmax(output.cpu().detach().numpy(), axis=1), average='macro', zero_division=1))
                re.append(recall_score(np.argmax(label.cpu(), axis=1), np.argmax(output.cpu().detach().numpy(), axis=1), average='macro', zero_division=1))
                f1.append(f1_score(np.argmax(label.cpu(), axis=1), np.argmax(output.cpu().detach().numpy(), axis=1), average='macro', zero_division=1))

                print(f'{idx} / {total} - Val_Loss : {sum(losses) / len(losses):0.4f}, Val_Accuracy : {sum(acc) / len(acc):0.4f}, Val_F1 : {sum(f1) / len(f1):0.4f}, Val_P : {sum(pre) / len(pre):0.4f}, Val_R : {sum(re)/ len(re):0.4f}', end='\r')
            print('', end='\r')
        return sum(losses) / len(losses) , sum(acc) / len(acc), sum(f1) / len(f1), sum(pre) / len(pre), sum(re) / len(re)

In [10]:
model = VITModel(num_channels = 3)



Train Model

In [11]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, eps=0.1)
model.to(DEVICE)
1

1

In [12]:
def train_model(epochs, model, optimizer, train_dl, val_dl):
    
    history = {
                'train_loss' : [],
                'val_loss' : [],
                'train_accuracy' : [],
                'val_accuracy' : [],
                'F1' : [],
                'val_F1' : [],
                'P' :[],
                'val_P':[],
                'Recall' : [],
                'val_Recall' : []
              }
    
    for epoch in range(epochs):
        print(f'EPOCH : {epoch + 1} / {epochs}')
        
        train_loss, train_acc, train_f1, train_pre, train_re = model.train_using_train_dl(optimizer, train_dl)
        val_loss, val_acc, val_f1, val_pre, val_re = model.evaluate_using_val_dl(val_dl)
        
        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        history['train_accuracy'].append(train_acc)
        history['val_accuracy'].append(val_acc)
        history['F1'].append(train_f1)
        history['val_F1'].append(val_f1)
        history['P'].append(train_pre)
        history['val_P'].append(val_pre)
        history['Recall'].append(train_re)
        history['val_Recall'].append(val_re)
        
        print(f'Loss : {train_loss:0.4f}, Accuracy : {train_acc:0.4f}, F1 : {train_f1:0.4f}, P :{train_pre:0.4f}, R : {train_re:0.4f}')
        print(f'Val_Loss : {val_loss:0.4f}, Val_Accuracy : {val_acc:0.4f}, Val_F1 : {val_f1:0.4f}, Val_P :{val_pre:0.4f}, Val_R : {val_re:0.4f}')

    return history

In [13]:
out = train_model(EPOCHS, model, optimizer, train_dl, val_dl)
out

EPOCH : 1 / 10
Loss : 0.0314, Accuracy : 0.9878, F1 : 0.9816, P :0.9844, R : 0.9967 Val_P : 1.0000, Val_R : 1.0000
Val_Loss : 0.0008, Val_Accuracy : 1.0000, Val_F1 : 1.0000, Val_P :1.0000, Val_R : 1.0000
EPOCH : 2 / 10
Loss : 0.0008, Accuracy : 1.0000, F1 : 1.0000, P :1.0000, R : 1.0000 Val_P : 1.0000, Val_R : 1.0000
Val_Loss : 0.0004, Val_Accuracy : 1.0000, Val_F1 : 1.0000, Val_P :1.0000, Val_R : 1.0000
EPOCH : 3 / 10
53 / 225 - Loss : 0.0008, Accuracy : 1.0000, F1 : 1.0000 P : 1.0000, R : 1.0000

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

In [None]:
train_loss = out['train_loss']
test_loss = out['val_loss']
train_acc = out['train_accuracy']
test_acc = out['val_accuracy']

In [None]:
plt.plot(range(10), train_loss, label = "Train Loss");
plt.plot(range(10), test_loss, label = "Test Loss");
plt.title("VIT Loss Each Epoch")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()

In [None]:
plt.plot(range(10), np.log(train_loss), label = "Train Log-Loss");
plt.plot(range(10), np.log(test_loss), label = "Test Log-Loss");
plt.title("VIT Log Loss Each Epoch")
plt.xlabel("Epoch")
plt.ylabel("Log-Loss")
plt.legend()
plt.show()

In [None]:
plt.plot(range(10), train_acc, label = "Train Accuracy");
plt.plot(range(10), test_acc, label = "Test Accuracy");
plt.title("VIT Accuracy Each Epoch")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.show()