#**Importing required libraries**

In [None]:
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [None]:
!pip install split-folders
import splitfolders as sf
import os

In [None]:
import time
import torch
import copy
from PIL import Image
from torch import nn, optim
from torchvision import transforms, models

#**Downloading inatuaralist dataset zip file from drive**

In [None]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [None]:
id = '19EA0yl7PM8i6aQTdhH0OiEyLcTsf6hmx'

In [None]:
downloaded = drive.CreateFile({'id':id})
downloaded.GetContentFile('nature_12K.zip')

#**Unzipping the content and distributing in train, validation and test folders**

In [None]:
!apt install unzip

In [None]:
!unzip 'nature_12K.zip'

In [None]:
road='/content/inaturalist_12K/'
roadtrn=road+"train"
op= road+"trainvalsplit"
sf.fixed(roadtrn, op, seed=1337, fixed=100, oversample=False, group_prefix=None)

#**Preprocessing the images**

In [None]:
train_data = []
train_label = []
path = '/content/inaturalist_12K/trainvalsplit/train/'
items = os.listdir(path)
items.sort()

data_augmentation = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ]
)


for i in range(10):
    image_folder_path = path + items[i]
    image_names = os.listdir(image_folder_path)
    for each_image in image_names:
        if each_image.endswith(".jpg"):
            full_path = image_folder_path + '/' + each_image
            image = Image.open(full_path)
            image = image.resize((224,224))
            if image.mode == 'L':
                continue
            normalized_image = data_augmentation(image)
            train_data.append((normalized_image, i))

In [None]:
val_data = []
val_label = []
path = '/content/inaturalist_12K/trainvalsplit/val/'
items = os.listdir(path)
items.sort()

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

for i in range(10):
    image_folder_path = path + items[i]
    image_names = os.listdir(image_folder_path)
    for each_image in image_names:
        if each_image.endswith(".jpg"):
            full_path = image_folder_path + '/' + each_image
            image = Image.open(full_path)
            image = image.resize((224,224))
            if image.mode == 'L':
                continue
            normalized_image = transform(image)
            val_data.append((normalized_image, i))

In [None]:
classes = ['Amphibia', 'Animalia', 'Arachnida', 'Aves', 'Fungi', 
           'Insecta', 'Mammalia', 'Mollusca', 'Plantae', 'Reptilia']

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#**Multiple Pre-Trained models**

In [None]:
# # Resnet50 model
def resnet():
    counttotal = 0
    resnet50_model = models.resnet50(pretrained=True)
    percent_of_layers_freezed = 0.25
    for param in resnet50_model.parameters():
        param.requires_grad = True
        counttotal += 1
    count = 0
    for param in resnet50_model.parameters():
        if count<int(percent_of_layers_freezed*counttotal):
            param.requires_grad = False
            count+=1

    num_features = resnet50_model.fc.in_features
    resnet50_model.fc = nn.Linear(num_features, 10)

    return resnet50_model

In [None]:
# # vgg16 model
def vgg():
    vgg16_model = models.vgg16_bn(pretrained=True)

    counttotal = 0
    percent_of_layers_freezed = 0.5
    for param in vgg16_model.features.parameters():
        param.requires_grad = True
        counttotal += 1
    count = 0
    for param in vgg16_model.features.parameters():
        if count<int(percent_of_layers_freezed*counttotal):
            param.requires_grad = False
            count+=1

    num_features = vgg16_model.classifier[6].in_features
    features = list(vgg16_model.classifier.children())[:-1]
    features.extend([nn.Linear(num_features, len(classes))])
    vgg16_model.classifier = nn.Sequential(*features)

    return vgg16_model

In [None]:
# # Alexnet model
def alexnet():
    alexnet_model = models.alexnet(pretrained=True)
    counttotal = 0
    percent_of_layers_freezed = 0.25
    for param in alexnet_model.parameters():
        param.requires_grad = True
        counttotal += 1
    count = 0
    for param in alexnet_model.parameters():
        if count<int(percent_of_layers_freezed*counttotal):
            param.requires_grad = False
            count+=1

    alexnet_model.classifier[6] = nn.Linear(4096,10)

    return alexnet_model

In [None]:
# # Squeezenet model
def squeezenet():
    squeezenet_model = models.squeezenet1_1(pretrained=True)
    counttotal = 0
    percent_of_layers_freezed = 0.25
    for param in squeezenet_model.parameters():
        param.requires_grad = True
        counttotal += 1
    count = 0
    for param in squeezenet_model.parameters():
        if count<int(percent_of_layers_freezed*counttotal):
            param.requires_grad = False
            count+=1

    squeezenet_model.classifier[1] = nn.Conv2d(512, 10, kernel_size=(1,1), stride=(1,1))

    return squeezenet_model

In [None]:
# # densenet model
def densenet():
    densenet_model = models.densenet161(pretrained=True)
    counttotal = 0
    percent_of_layers_freezed = 0.75
    for param in densenet_model.parameters():
        param.requires_grad = True
        counttotal += 1
    count = 0
    for param in densenet_model.parameters():
        if count<int(percent_of_layers_freezed*counttotal):
            param.requires_grad = False
            count+=1

    num_features = densenet_model.classifier.in_features
    densenet_model.classifier = nn.Linear(num_features, 10)

    return densenet_model

#**Defining training function and data loaders**

In [None]:
def train_model(model, criteria, optimizer, num_epochs=5, device='cuda'):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(1, num_epochs+1):
        print('Epoch {}/{}'.format(epoch, num_epochs ))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.

            if phase == 'train':
                f = train_loader
            else:
                f = val_loader
            for inputs, labels in f:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criteria(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            
            epoch_loss = 0
            epoch_acc = 0
            if phase == 'train':
                epoch_loss = running_loss / len(train_data)
                epoch_acc = running_corrects.double() /len(train_data)
            else:
                epoch_loss = running_loss / len(val_data)
                epoch_acc = running_corrects.double() / len(val_data)
            epoch_acc*=100
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best validation acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    #return model

In [None]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=100, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=100, shuffle=False)

#**Training the models**

In [None]:
num_epochs = 10
model = resnet().to(device)
# model = vgg().to(device)
# model = alexnet().to(device)
# model = squeezenet().to(device)
# model = densenet().to(device)
criteria = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, betas=(0.96,0.999))
train_model(model, criteria, optimizer, num_epochs, 'cuda')