# Mount G-Drive:


In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


# Config:

In [0]:
import os

DATA_PATH = '/content/gdrive/My Drive/aptos2019/'

MODEL_PATH = DATA_PATH + "models/"
TRAIN_PATH = DATA_PATH + "train/"
VALIDATION_PATH = DATA_PATH + "validation/"
EVEN_TRAIN_PATH = DATA_PATH + "train_even/"
ADD_EVEN_TRAIN_PATH = DATA_PATH + "add_train_even/"



# Load Data

In [0]:
import os
import torch
import torchvision
import torchvision.transforms as transforms




normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])

train_transforms = transforms.Compose([
	transforms.Resize((350,350)),
	transforms.RandomVerticalFlip(0.5),
	transforms.RandomHorizontalFlip(0.5),
	transforms.RandomRotation(360),
	transforms.ToTensor(),
	normalize
	])

validation_transforms = transforms.Compose([
	transforms.Resize((350,350)),
	transforms.ToTensor(),
	normalize
	])


def create_loader(folder, transforms, batch_size = 4):
	dataset = torchvision.datasets.ImageFolder(
		root = DATA_PATH + folder,
		transform = transforms)
	
	loader = torch.utils.data.DataLoader(
		dataset,
		batch_size = batch_size,
		shuffle = True,
		num_workers = 1)

	return loader



# test:

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F


device = torch.device("cuda")
test_loader = create_loader('validation/', validation_transforms)

def test(test_loader, model, device = device):
    """ 
    Loads the test data set and tests the model on it. 
    Returns achieved loss, correctly guessed samples and accuracy.
    """
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction = 'sum').item()
            pred = output.argmax(dim = 1, keepdim = True)
            
            correct += pred.eq(target.view_as(pred)).sum().item()

        test_loss /= len(test_loader.dataset)
        accuracy_percent = 100 * correct/len(test_loader.dataset)
        print('\nTest set: Average loss:  {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(test_loader.dataset), accuracy_percent))
    return test_loss, correct, len(test_loader.dataset), accuracy_percent


# Utils

In [0]:
import sys

def progress(count, total, epoch, suffix = ''):
	""" Writes a progress bar to console. """
	bar_len = 40
	filled_len = int(round(bar_len * count / float(total)))

	percents = round(100.0 * count / float(total), 1)
	bar = '=' * filled_len + '-' * (bar_len - filled_len)
	# sys.stdout.write('epoch %s:\r' %(epoch))
	sys.stdout.write('epoch %s: [%s] %s%s ...%s\r' % (epoch, bar, percents, '%', suffix))
	# sys.stdout.write('average_loss: %s\r' % (average_loss))
	sys.stdout.flush()  # As suggested by Rom Ruben

# train:

In [0]:
import torch
import torch.optim as optim


from torch import nn
import torchvision
import copy
import numpy as np
# import visualize


device = torch.device("cuda")

def train(model = None, training_data_path = "train/", criterion = None, training_epochs = 4, batch_size = 32, learning_rate = 0.001):
    """ 
    Runs through the training data, makes a prediction and computes loss, then backpropagates
    the result through the model and adjusts the weights and biases until a local minimum in the loss
    function is reached.
    """

    # optimizer searches fo a local minimum of in the lossfunction with different input parameters
    optimizer = optim.Adam(model.parameters(), lr = learning_rate, weight_decay = 0.03)
    graph_loss = []
    graph_accuracy = [(0,0)]
    graph_validation_loss = []
    validation_loader = create_loader("validation/", validation_transforms, batch_size = batch_size)
    best_model = None

    threshold = 0
    for epoch in range(training_epochs):
        print('epoch: ', epoch + 1)
        running_loss = 0.0

        training_loader = create_loader(training_data_path, train_transforms, batch_size = batch_size)
        average_loss = 0
        print('')
        number_of_files = len(training_loader.dataset)
        for i, data in enumerate(training_loader, 0):

            # get input for training
            inputs, labels = data
            
            inputs, labels = inputs.to(device), labels.to(device)

            # init optimizer with 0
            optimizer.zero_grad()

            # run data trough net
            outputs = model(inputs)

            # compute loss (compare output to label)
            loss = criterion(outputs, labels)

            # backpropagate loss
            loss.backward()

            # tweak parameters
            optimizer.step()

            # add loss to overall loss
            running_loss += loss.item()

            # pretty print progress
            if i % 10 == 9:  # append a the average of the last 10 losses as point to the loss/epoch graph_loss
                average_loss = running_loss/10
                graph_loss.append((epoch + i/(number_of_files/batch_size), average_loss))
                running_loss = 0.0
                print(i,(number_of_files/batch_size),'loss:', average_loss)
                



        model_name = 'epoch{}.pt'.format(epoch)
        # torch.save(best_model, MODEL_PATH + model_name)
        #print("\nmodel: " + model_name + " has been saved.")
        # Validate the result of the epoch
        test_loss, correct, dataset_size, accuracy_percent = test(validation_loader, model)
        graph_accuracy.append((epoch + 1, accuracy_percent/100))
        graph_validation_loss.append((epoch + 1, test_loss))

        
    return model, model_name # return namedtuple

# Main:

In [0]:

from torch import nn   
from datetime import datetime
import torch
import argparse
import torchvision.models as models







# init model
Dense_NET = models.resnext101_32x8d(pretrained=True, progress=False)
Dense_NET.cuda()


# start timer
start = datetime.now()
print(start)

EPOCHS_STAGE_1 = 1
EPOCHS_STAGE_2 = 1
LR_STAGE_1 = 0.001
LR_STAGE_2 = 0.001
BATCH_SIZE = 16


# train on long tailed training set
trained_on_long_tailed_dataset = train(
    model = Dense_NET,
    training_data_path = "train/",
    training_epochs = EPOCHS_STAGE_1,
    criterion = nn.CrossEntropyLoss(),
    learning_rate = LR_STAGE_1,
    batch_size = BATCH_SIZE)


# save models stage_dict after first stage
torch.save(trained_on_long_tailed_dataset[0].state_dict(), MODEL_PATH + "long_tailed_" + trained_on_long_tailed_dataset[1])





# use the model from first stage and load it with the saved stage dict  
model = trained_on_long_tailed_dataset[0]
model.load_state_dict(torch.load(MODEL_PATH + "long_tailed_" + trained_on_long_tailed_dataset[1]))


# now, do finetuning on the evenly distributed training set
finetuned_model = train(
	model = Dense_NET,
	training_data_path = "add_train_even/",
	training_epochs = EPOCHS_STAGE_2,
	criterion = nn.CrossEntropyLoss(),	
	learning_rate = LR_STAGE_2,
	batch_size = BATCH_SIZE)


torch.save(finetuned_model[0].state_dict(), MODEL_PATH + "finetuned_" + finetuned_model[1])



print("\nOverall training and testing time: " + str(datetime.now() - start))
