In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import os
import json
import numpy as np
import torch
import torchvision
from torch.utils.data import random_split
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from sklearn.metrics import f1_score, accuracy_score
from collections import defaultdict
from time import time
from tqdm import tqdm
from datetime import datetime
import matplotlib.pyplot as plt

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
class BatchNorm(nn.Module):
    def __init__(self,num_features,eps = 1e-5,momentum = 0.1,affine = True):
        super(BatchNorm,self).__init__()
        self.affine = affine
        self.curr_mean = 0
        self.curr_var = 0
        self.momentum = momentum
        self.eps = torch.tensor(eps)
        self.num_features = num_features
        self.gamma = nn.Parameter(torch.ones(self.num_features))
        self.beta = nn.Parameter(torch.zeros(self.num_features))
    def forward(self,x):
        if self.training:
            mean = x.mean(dim = (0,2,3),keepdim = True)
            var = x.var(dim = (0,2,3),unbiased = True,keepdim = True)
            with torch.no_grad():
                self.curr_mean = (1-self.momentum)*self.curr_mean + self.momentum*mean
                self.curr_var = (1-self.momentum)*self.curr_var + self.momentum*var
        else:
            mean = self.curr_mean
            var = self.curr_var
        x = (x-mean)/(torch.sqrt(var+self.eps))
        if self.affine:
            x = x*self.gamma.view(1,-1,1,1) + self.beta.view(1,-1,1,1)
        return x

    

class InstanceNorm(nn.Module):
    def __init__(self,num_features,eps = 1e-5,momentum = 0.1,affine = True):
        super(InstanceNorm,self).__init__()
        self.affine = affine
        self.eps = torch.tensor(eps)
        self.num_features = num_features
        self.gamma = nn.Parameter(torch.ones(self.num_features))
        self.beta = nn.Parameter(torch.zeros(self.num_features))
    def forward(self,x):
        mean = x.mean(dim = (2,3),keepdim = True)
        var = x.var(dim = (2,3),keepdim = True)
        x = (x-mean)/torch.sqrt(var+self.eps)
        if(self.affine):
            x = x*self.gamma.view(1,-1,1,1) + self.beta.view(1,-1,1,1)
        return x


    
class BatchInstanceNorm(nn.Module):
    def __init__(self,num_features,momentum = 0.1,eps = 1e-5,affine = True,rho = 0.5):
        super(BatchInstanceNorm,self).__init__()
        self.affine = affine
        self.curr_mean = 0
        self.curr_var = 0
        self.momentum = momentum
        self.eps = torch.tensor(eps)
        self.num_features = num_features
        self.gamma = nn.Parameter(torch.ones(self.num_features))
        self.beta = nn.Parameter(torch.zeros(self.num_features))
        self.rho = rho
        
    def forward(self,x):
        if self.training:
#             print(x.shape)
            batch_mean = x.mean(dim = (0,2,3),keepdim = True)
            batch_var = x.var(dim = (0,2,3),unbiased = True,keepdim = True)
            with torch.no_grad():
                self.curr_mean = (1-self.momentum)*self.curr_mean + self.momentum*batch_mean
                self.curr_var = (1-self.momentum)*self.curr_var + self.momentum*batch_var
        else:
            batch_mean = self.curr_mean
            batch_var = self.curr_var
        x_batch = (x-batch_mean)/torch.sqrt(batch_var+self.eps)
        instance_mean = x.mean(dim = (2,3),keepdim = True)
        instance_var = x.var(dim = (2,3),keepdim = True)
        x_instance = (x-instance_mean)/torch.sqrt(instance_var+self.eps)
        x = self.rho*x_batch + (1 - self.rho)*x_instance
        if self.affine:
            x = x*self.gamma.view(1,-1,1,1) + self.beta.view(1,-1,1,1)
        return x

class LayerNorm(nn.Module):
    def __init__(self,num_features,eps = 1e-5,affine = True):
        super(LayerNorm,self).__init__()
        self.num_features = num_features
        self.affine = affine
        self.eps = torch.tensor(eps)
        self.gamma = nn.Parameter(torch.ones(self.num_features))
        self.beta = nn.Parameter(torch.zeros(self.num_features))
    def forward(self,x):
        mean = x.mean(dim = (1,2,3),keepdim = True)
        var = x.var(dim = (1,2,3),keepdim = True)
        x = (x-mean)/torch.sqrt(var+self.eps)
        if self.affine:
            x = x*self.gamma.view(1,-1,1,1) + self.beta.view(1,-1,1,1)
        return x

class GroupNorm(nn.Module):
    def __init__(self,num_features,num_groups = 4,eps = 1e-5,affine = True):
        super(GroupNorm,self).__init__()
        self.eps = eps
        self.num_groups = num_groups
        self.affine = affine
        self.num_features = num_features
        self.gamma = nn.Parameter(torch.ones(self.num_features))
        self.beta = nn.Parameter(torch.zeros(self.num_features))
    def forward(self,x):
        N,C,H,W = x.shape
        x = x.view(N,self.num_groups,-1)
        mean = x.mean(dim = 2,keepdim = True)
        var = x.var(dim = 2,keepdim = True)
        x = (x-mean)/torch.sqrt(var+self.eps)
        x = x.view(N,C,H,W)
        if(self.affine):
            x = x*self.gamma.view(1,-1,1,1) + self.beta.view(1,-1,1,1)
        return x
        
class NoNorm(nn.Module):
    def __init__(self):
        super(NoNorm,self).__init__()
    def forward(self,x):
        return x

In [None]:
def normalization(dim,norm_type):
    if norm_type == 'default':
        return nn.BatchNorm2d(dim)
    elif norm_type == 'nn':
        return NoNorm()
    elif norm_type == 'bn':
        return BatchNorm(num_features = dim)
    elif norm_type == 'in':
        return InstanceNorm(num_features = dim)
    elif norm_type == 'bin':
        return BatchInstanceNorm(num_features = dim)
    elif norm_type == 'ln':
        return LayerNorm(num_features = dim)
    elif norm_type == 'gn':
        return GroupNorm(num_features = dim)
    

In [None]:
if torch.cuda.is_available():
    device = "cuda"
    print('using device: cuda')
else:
    device = "cpu"
    print('using device: cpu')

In [None]:
class Residual_Block(nn.Module):
    def __init__(self,in_channels,out_channels,norm_type = "default",stride = 1):
        super(Residual_Block,self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.norm_type = norm_type
        self.stride = stride
        self.conv1 = nn.Conv2d(self.in_channels,self.out_channels,kernel_size = 3,stride = stride,padding = 1,bias = False)
        self.bn1 = normalization(out_channels,norm_type)
        self.conv2 = nn.Conv2d(self.out_channels,self.out_channels,kernel_size = 3,stride = 1,padding = 1,bias = False)
        self.bn2 = normalization(out_channels,norm_type)
        self.relu = nn.ReLU()
        self.downsample = nn.Sequential()
        if(stride != 1 or self.in_channels!=self.out_channels):
            self.downsample = nn.Sequential(nn.Conv2d(self.in_channels,self.out_channels,
                                                      kernel_size = 3,stride = stride,padding = 1,bias = False),normalization(self.out_channels,norm_type))
    def forward(self,x):
        residual = self.downsample(x)
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.bn2(self.conv2(x))
        x += residual
        o = self.relu(x)
        return o
        
class ResNet(nn.Module):
    def __init__(self,n,r,norm_type = "default"):
        super(ResNet,self).__init__()
        self.n = n
        self.r = r
        self.norm_type = norm_type
        self.in_channels = 16
        self.conv1 = nn.Conv2d(3,16,kernel_size = 3,stride = 1,padding = 1,bias = False)
        self.bn1 = normalization(16,norm_type)
        self.relu = nn.ReLU()
        self.layer1 = self.create_layer(16,self.n)
        self.layer2 = self.create_layer(32,self.n,stride = 2)
        self.layer3 = self.create_layer(64,self.n,stride = 2)
        self.avg_pool = nn.AvgPool2d(kernel_size = 64)
        self.fc = nn.Linear(64,self.r)
            
    def create_layer(self,channels,n,stride = 1):
        layers = []
        layers.append(Residual_Block(self.in_channels,channels,self.norm_type,stride))
        self.in_channels = channels
        for i in range(1,n):
            layers.append(Residual_Block(self.in_channels,channels,self.norm_type))
        return nn.Sequential(*layers)
            
    def forward(self,x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [None]:
class Bird_dataset():
    def __init__(self,path,transform = None):
        self.data = datasets.ImageFolder(root = path,transform = transform)
    def __len__(self):
        return len(self.data)
    def __getitem__(self,idx):
        return self.data[idx]
transform = transforms.Compose([transforms.Resize((256,256)),transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
def get_data(path):
    path_train = path +'/train'
    path_val = path + '/val'
    path_test = path + '/test'
    train_data = Bird_dataset(path_train,transform)
    val_data = Bird_dataset(path_val,transform)
    test_data = Bird_dataset(path_test,transform)
    return train_data,val_data,test_data
def get_loader(train_data,val_data,test_data,batch_size = 32,num_workers = 4):
    train_loader = DataLoader(train_data,batch_size = batch_size,shuffle = True,num_workers = num_workers)
    val_loader = DataLoader(val_data,batch_size = batch_size,shuffle = False,num_workers = num_workers)
    test_loader = DataLoader(test_data,batch_size = batch_size,shuffle = False,num_workers = num_workers)
    return train_loader,val_loader,test_loader

In [None]:
train_data,val_data,test_data = get_data('/kaggle/input/bird-data/Birds_25')

In [None]:
train_loader,val_loader,test_loader = get_loader(train_data,val_data,test_data,batch_size = 64,num_workers = 4)

In [None]:
os.makedirs('/kaggle/working/val')
os.makedirs('/kaggle/working/test')
os.makedirs('/kaggle/working/train')
os.makedirs('/kaggle/working/model')
os.makedirs('/kaggle/working/result')

In [None]:
def train():
    norms = ['default','bn','in','bin','gn','nn','ln']
    model = ResNet(n = 2, r = 25,norm_type = norms[4])
#     print(model)
    model = nn.DataParallel(model)
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),lr = 0.1,weight_decay = 1e-4,momentum = 0.9)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,50,verbose = False)
    
    loss_dict = defaultdict(list)
    accuracy_dict = defaultdict(list)
    f1_dict_micro = defaultdict(list)
    f1_dict_macro = defaultdict(list)
    
    best_accuracy = -1
    best_accuracy_epoch = -1
    print("----------------------Training Starts---------------------------------")
    for epoch in range(50):
        print("\n--------------- epoch: ",epoch)
        
        loss = []
        predictions = []
        actuals = []
        
        model.train()
        for idx,batch in enumerate(train_loader):
            images,labels = batch
            images = images.to(device)
            labels = labels.to(device)
            
            outs = model(images)
            
            error = criterion(outs,labels)
            loss.append(error.item())
            error.backward()
            if (idx+1)%2 == 0:
                optimizer.step()
                optimizer.zero_grad()
            actuals.extend(labels.squeeze().tolist())
            predictions.extend(torch.argmax(outs,dim = 1).squeeze().tolist())
            
        loss_dict["train"].append(np.mean(loss))
        accuracy_dict["train"].append(round(accuracy_score(actuals,predictions)*100,2))
        f1_dict_micro["train"].append(round(f1_score(actuals,predictions,average = 'micro'),4))
        f1_dict_macro["train"].append(round(f1_score(actuals,predictions,average = 'macro'),4))
                                      
        scheduler.step()
        
        print("-----------------------Validation------------------------")
        model.eval()
        
        loss_val = []
        predictions = []
        actuals = []
                                      
        for idx,batch in enumerate(val_loader):
            
            images,labels = batch
            images = images.to(device)
            labels = labels.to(device)
                                      
            actuals.extend(labels.squeeze().tolist())
            outs = model(images)
            predictions.extend(torch.argmax(outs,dim = 1).squeeze().tolist())
                                      
            error = criterion(outs,labels)
            loss_val.append(error.item())
            
        f1_dict_micro["val"].append(round(f1_score(actuals,predictions,average = 'micro'),4))
        f1_dict_macro["val"].append(round(f1_score(actuals,predictions,average = 'macro'),4))
        loss_dict["val"].append(np.mean(loss_val))
        accuracy_dict["val"].append(round(accuracy_score(actuals,predictions)*100,2))
        val_accuracy = accuracy_dict["val"][-1]
        
        print("Epoch: {},Train Loss: {},Train Accuracy: {}%,Train_f1_micro: {},Train_f1_macro: {},Validation Loss: {}, Validation Accuracy: {}%,Validation_f1_micro: {}, Validation_f1_macro: {} ".format(epoch,loss_dict["train"][-1],accuracy_dict["train"][-1],f1_dict_micro["train"][-1],f1_dict_macro["train"][-1],loss_dict["val"][-1],accuracy_dict["val"][-1],f1_dict_micro["val"][-1],f1_dict_macro["val"][-1]))
                                      
        curr_state = {"accuracy":val_accuracy,"epoch":epoch,"best_accuracy":best_accuracy,"best_accuracy_epoch":best_accuracy_epoch}
        
        print(f"epoch: {epoch}, Saving model checkpoint")
              
        torch.save(model,os.path.join('/kaggle/working/model','latest_checkpoint_gn_128.pth'))
        
        with open(os.path.join('/kaggle/working/train','training_curr_state_gn_128.json'),'w') as outfile:
              json.dump(curr_state,outfile)
        
        if val_accuracy > best_accuracy:
            
            print(f"best accuracy updated = {val_accuracy} against {best_accuracy}")
            best_accuracy = val_accuracy
            best_accuracy_epoch = epoch
            state = {"accuracy":val_accuracy,"epoch":epoch,"best_accuracy":best_accuracy,"best_accuracy_epoch":best_accuracy_epoch}
            torch.save(model,os.path.join('/kaggle/working/model','best_model_checkpoint_gn_128.pth'))
            with open(os.path.join('/kaggle/working/train','training_best_state_gn_128.json'),'w') as outfile:
                json.dump(state,outfile)
        
        with open(os.path.join('/kaggle/working/result','loss_dict_gn_128.json'),"w") as outfile:
            json.dump(loss_dict,outfile)
        with open(os.path.join('/kaggle/working/result','accuracy_dict_gn_128.json'),"w") as outfile:
            json.dump(accuracy_dict,outfile)
        with open(os.path.join('/kaggle/working/result','f1_score_micro_dict_gn_128.json'),"w") as outfile:
            json.dump(f1_dict_micro,outfile)
        with open(os.path.join('/kaggle/working/result','f1_score_macro_dict_gn_128.json'),"w") as outfile:
            json.dump(f1_dict_macro,outfile)
    return

In [None]:
train()