 # Developer: Vajira Thambawita<br>
 # Last modified date: 18/07/2018<br>
 # ##################################

 # Description ##################<br>
 # pythroch resnet18 training

#########################################

In [None]:
from __future__ import print_function, division

In [None]:
import datetime

#start = datetime.datetime.now()

In [None]:
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms, utils
import pickle
#from pandas_ml import ConfusionMatrix
import matplotlib as mpl
import matplotlib.pyplot as plt
import time
import os
import copy
import sys
import yaml
import pandas as pd
import numpy as np

In [None]:
import sklearn.metrics as mtc
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import itertools
from multiprocessing import Process, freeze_support
from torch.utils.tensorboard import SummaryWriter

In [None]:
from tqdm import tqdm
from torchsummary import summary
from torch.autograd import Variable

In [None]:
from dataset.Dataloader_with_path import ImageFolderWithPaths as dataset

=====================================<br>
Get and set all input parameters<br>
=====================================

In [None]:
parser = argparse.ArgumentParser()

Hardware

In [None]:
parser.add_argument("--device", default="gpu", help="Device to run the code")
parser.add_argument("--device_id", type=int, default=0, help="")

In [None]:
parser.add_argument("--py_file",default=os.path.abspath(__file__)) # store current python file

Directories

In [None]:
parser.add_argument("--data_train_folder", 
                default="/work/vajira/DATA/kvasir_capsule/data/new_splits/split_0",
                help="Train data folder")

In [None]:
parser.add_argument("--data_val_folder", 
                default="/work/vajira/DATA/kvasir_capsule/data/new_splits/split_1",
                help="Validation data folder")

In [None]:
parser.add_argument("--out_dir", 
                default="/work/vajira/DATA/kvasir_capsule/output",
                help="Main output dierectory")

In [None]:
parser.add_argument("--tensorboard_dir", 
                default="/work/vajira/DATA/kvasir_capsule/tensorboard",
                help="Folder to save output of tensorboard")

Hyper parameters

In [None]:
parser.add_argument("--bs", type=int, default=32, help="Mini batch size")
parser.add_argument("--lr", type=float, default=0.001, help="Learning rate for training")
parser.add_argument("--num_workers", type=int, default=16, help="Number of workers in dataloader")
parser.add_argument("--weight_decay", type=float, default=1e-5, help="weight decay of the optimizer")
parser.add_argument("--momentum", type=float, default=0.9, help="Momentum of SGD function")
parser.add_argument("--lr_sch_factor", type=float, default=0.1, help="Factor to reduce lr in the scheduler")
parser.add_argument("--lr_sch_patience", type=int, default=10, help="Num of epochs to be patience for updating lr")
parser.add_argument("--lr_to_stop", type=float, default=0.00001, help="Num of epochs to be patience for updating lr")

Action handling 

In [None]:
parser.add_argument("--num_epochs", type=int, default=2000, help="Numbe of epochs to train")
# parser.add_argument("--start_epoch", type=int, default=0, help="Start epoch in retraining")
parser.add_argument("action", type=str, help="Select an action to run", choices=["train", "retrain", "test", "check", "prepare"])
parser.add_argument("--checkpoint_interval", type=int, default=25, help="Interval to save checkpoint models")
#parser.add_argument("--val_fold", type=str, default="0", help="Select the validation fold", choices=["fold_1", "fold_2", "fold_3"])
#parser.add_argument("--all_folds", default=["0", "1"], help="list of all folds available in data folder")
parser.add_argument("--test_checkpoint", help="Checkpoint to test or generate results")
parser.add_argument("--weights", default=[0.0285, 1.0000, 0.1068, 0.1667, 0.0373, 0.0196, 0.0982, 0.0014, 0.0235, 0.0236, 0.0809], help="Weights for class")
opt = parser.parse_args()

=========================================<br>
Device handling<br>
=========================================

In [None]:
torch.cuda.set_device(opt.device_id)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

==========================================<br>
Folder handling<br>
==========================================

ake output folder if not exist

In [None]:
os.makedirs(opt.out_dir, exist_ok=True)

make subfolder in the output folder 

In [None]:
py_file_name = opt.py_file.split("/")[-1] # Get python file name (soruce code name)
checkpoint_dir = os.path.join(opt.out_dir, py_file_name + "/checkpoints")
os.makedirs(checkpoint_dir, exist_ok=True)

make tensorboard subdirectory for the experiment

In [None]:
tensorboard_exp_dir = os.path.join(opt.tensorboard_dir, py_file_name)
os.makedirs( tensorboard_exp_dir, exist_ok=True)

=========================================<br>
Tensorboard<br>
=========================================<br>
Initialize summary writer

In [None]:
writer = SummaryWriter(tensorboard_exp_dir)

#########################################################<br>
#########################################################<br>
#########################################

=========================================<br>
Prepare Data<br>
=========================================

In [None]:
def prepare_data():
    data_transforms = {
        'train': transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(256),
            transforms.Resize(224),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.RandomRotation(90),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ]),
        'validation': transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(256),
            transforms.Resize(224),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ]),
    }

    # Train dataset
    dataset_train = dataset(opt.data_train_folder, data_transforms["train"])

    # Validation dataset
    dataset_val = dataset(opt.data_val_folder, data_transforms["validation"])
                                                
    dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size=opt.bs,
                                                    shuffle=True, num_workers=opt.num_workers)
    dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=opt.bs,
                                                    shuffle=False, num_workers=opt.num_workers)
                    
    train_size = len(dataset_train)
    val_size = len(dataset_val)
    print("train dataset size =", train_size)
    print("validation dataset size=", val_size)
    print("dataset train class order= ", dataset_train.class_to_idx)
    print("dataset val class order= ", dataset_train.class_to_idx)

    #exit() # just for testing

In [None]:
   
    return {"train":dataloader_train, "val":dataloader_val, "dataset_size":{"train": train_size, "val":val_size} }

#######################################################################<br>
 Printing images just for testing<br>
#######################################################################


<br>
def imshow(img):<br>
    img = img / 2 + 0.5     # unnormalize<br>
    npimg = img.numpy()<br>
    plt.imshow(np.transpose(npimg, (1, 2, 0)))<br>
dataiter = iter(dataloaders['train'])<br>
sample_images, sample_labels = dataiter.next()<br>
npimg = sample_images[0].numpy()<br>
npimg = np.transpose(npimg,(1,2,0))<br>
plt.imshow(npimg[:,:, 0])<br>
plt.show()<br>
print(npimg[:, :, 0])<br>
#imshow(utils.make_grid(sample_images))<br>
input()<br>
exit()<br>


=========================================================<br>
Train model<br>
==========================================================

In [None]:
def train_model(model, optimizer, criterion, dataloaders: dict, scheduler, best_acc=0.0, start_epoch = 0):
    best_model_wts = copy.deepcopy(model.state_dict())
    
    for epoch in range(start_epoch , start_epoch + opt.num_epochs ):
        for phase in ["train", "val"]:
            if phase == "train":
                model.train()
                dataloader = dataloaders["train"]
            else:
                model.eval()
                dataloader = dataloaders["val"]
            
            
            running_loss = 0.0
            running_corrects = 0

In [None]:
            for i, data in enumerate(dataloader, 0):
                inputs, labels, paths = data
                inputs = inputs.to(device)
                labels = labels.to(device)
                # zero the parameter gradients
                optimizer.zero_grad()
                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    #  print("outputs=", outputs) # only for testing - vajira
                    #  print("labels = ", labels) # only for testing - vajira
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            epoch_loss = running_loss / dataloaders["dataset_size"][phase]
            epoch_acc = running_corrects.double() / dataloaders["dataset_size"][phase]

            # update tensorboard writer
            writer.add_scalars("Loss", {phase:epoch_loss}, epoch)
            writer.add_scalars("Accuracy" , {phase:epoch_acc}, epoch)
             # update the lr based on the epoch loss
            if phase == "val": 
                # keep best model weights
                if epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(model.state_dict())
                    best_epoch =epoch
                    best_epoch_loss = epoch_loss
                    best_epoch_acc = epoch_acc
                    print("Found a better model")
                # Get current lr
                lr = optimizer.param_groups[0]['lr']
                #print("lr=", lr)
                writer.add_scalar("LR", lr, epoch)
                scheduler.step(epoch_loss) 
                # Early stop if lr is too small
                if lr <= opt.lr_to_stop:
                    print("LR reached to :", lr)
                    save_model(best_model_wts, best_epoch, best_epoch_loss, best_epoch_acc)
                    print("Best model saved")
                    # print("LR reached to :", current_lr)
                    print("Model exits")
                    return 
            

In [None]:
            # Print output
            print('Epoch:\t  %d |Phase: \t %s | Loss:\t\t %.4f | Acc:\t %.4f '
                      % (epoch, phase, epoch_loss, epoch_acc))
    
    save_model(best_model_wts, best_epoch, best_epoch_loss, best_epoch_acc)
            
#===============================================
# Prepare models
#===============================================

In [None]:
def prepare_model():
    model = models.densenet161(pretrained=True)
    num_ftrs = model.classifier.in_features
    model.classifier = nn.Linear(num_ftrs, 11)
    model = model.to(device)
    
    return model

===================================<br>
Run training process<br>
===================================

In [None]:
def run_train(retrain=False):
    model = prepare_model()
    
    dataloaders = prepare_data()

    # optimizer = optim.Adam(model.parameters(), lr=opt.lr , weight_decay=opt.weight_decay)
    optimizer = optim.SGD(model.parameters(), lr=opt.lr, momentum=0.9)
    # optimizer = optim.SGD(model.parameters(), lr=opt.lr )

    # criterion =  nn.MSELoss() # backprop loss calculation
    weight_tensor = torch.FloatTensor(opt.weights).to(device)
    criterion = nn.CrossEntropyLoss(weight=weight_tensor) # weight=weights
    # criterion_validation = nn.L1Loss() # Absolute error for real loss calculations

    # LR shceduler
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=opt.lr_sch_factor, patience=opt.lr_sch_patience, verbose=True)

    # call main train loop
    if retrain:
        # train from a checkpoint
        checkpoint_path = input("Please enter the checkpoint path:")
        checkpoint = torch.load(checkpoint_path)
        model.load_state_dict(checkpoint["model_state_dict"])
        start_epoch = checkpoint["epoch"]
        loss = checkpoint["loss"]
        acc = checkpoint["acc"]
        train_model(model,optimizer,criterion, dataloaders, scheduler, best_acc=acc, start_epoch=start_epoch)
    else:
        train_model(model,optimizer,criterion, dataloaders, scheduler, best_acc=0.0, start_epoch=0)

====================================<br>
Save models<br>
====================================

In [None]:
def save_model(model_weights,  best_epoch,  best_epoch_loss, best_epoch_acc):
   
    check_point_name = py_file_name + "_epoch:{}_acc:{}.pt".format(best_epoch, best_epoch_acc) # get code file name and make a name
    check_point_path = os.path.join(checkpoint_dir, check_point_name)
    # save torch model
    torch.save({
        "epoch": best_epoch,
        "model_state_dict": model_weights,
        # "optimizer_state_dict": optimizer.state_dict(),
        # "train_loss": train_loss,
        "loss": best_epoch_loss,
        "acc": best_epoch_acc,
    }, check_point_path)

====================================<br>
Check model<br>
====================================

In [None]:
def check_model_graph():
    model = prepare_model()
    summary(model, (3, 224, 224)) # this run on GPU
    model = model.to('cpu')
    #dataloaders = prepare_data()
    #sample = next(iter(dataloaders["train"]))

    #inputs = sample["features"]
   # inputs = inputs.to(device, torch.float)
    #print(inputs.shape)
    print(model)
    dummy_input = Variable(torch.rand(13, 3, 224, 224))
    
    writer.add_graph(model, dummy_input) # this need the model on CPU

==============================================<br>
 Model testing method<br>
==============================================

In [None]:
def test_model():
    
    test_model_checkpoint = opt.test_checkpoint #input("Please enter the path of test model:")
    checkpoint = torch.load(test_model_checkpoint)
    model = prepare_model()
    model.load_state_dict(checkpoint["model_state_dict"])
    model.eval()
    dataloaders = prepare_data()
    test_dataloader = dataloaders["val"]

    # TO collect data
    correct = 0
    total = 0
    all_labels_d = torch.tensor([], dtype=torch.long).to(device)
    all_predictions_d = torch.tensor([], dtype=torch.long).to(device)
    all_predictions_probabilities_d = torch.tensor([], dtype=torch.float).to(device)

In [None]:
    with torch.no_grad():
        for i, data in enumerate(test_dataloader, 0):
            inputs, labels, paths = data
            # print(labels)
            inputs = inputs.to(device)
            labels = labels.to(device)

In [None]:
            outputs = model(inputs)
            outputs = F.softmax(outputs, 1)
            predicted_probability, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum()
            all_labels_d = torch.cat((all_labels_d, labels), 0)
            all_predictions_d = torch.cat((all_predictions_d, predicted), 0)
            all_predictions_probabilities_d = torch.cat((all_predictions_probabilities_d, predicted_probability), 0)
            #all_timePerFrame_host = all_timePerFrame_host + [time_per_image]
            # print("testing")
    print('copying some data back to cpu for generating confusion matrix...')
    y_true = all_labels_d.cpu()
    y_predicted = all_predictions_d.cpu()  # to('cpu')
    testset_predicted_probabilites = all_predictions_probabilities_d.cpu()  # to('cpu')

In [None]:
    #return y_predicted, testset_predicted_probabilites, all_timePerFrame_host

In [None]:
    cm = confusion_matrix(y_true, y_predicted)  # confusion matrix

In [None]:
    print('Accuracy of the network on the %d test images: %f %%' % (total, (
            100.0 * correct / total)))
    print(cm)
    print("taking class names to plot CM")
    class_names = test_dataloader.dataset.classes #test_datasets.classes  # taking class names for plotting confusion matrix
    print("Generating confution matrix")
    plot_confusion_matrix(cm, classes=class_names, title='my confusion matrix')
    

    ##################################################################
    # classification report
    #################################################################
    print(classification_report(y_true, y_predicted, target_names=class_names))

    ##################################################################
    # Standard metrics for medico Task
    #################################################################
    print("Printing standard metric for medico task")
    print("Accuracy =",mtc.accuracy_score(y_true, y_predicted))
    print("Precision score =", mtc.precision_score(y_true,y_predicted, average="weighted"))
    print("Recall score =", mtc.recall_score(y_true, y_predicted, average="weighted"))
    print("F1 score =", mtc.f1_score(y_true, y_predicted, average="weighted"))
    print("Specificity =")
    print("MCC =", mtc.matthews_corrcoef(y_true, y_predicted))

    ##################################################################
    # Standard metrics for medico Task
    #################################################################
    print("Printing standard metric for medico task")

In [None]:
    print("1. Recall score (REC) =", mtc.recall_score(y_true, y_predicted, average="weighted"))
    print("2. Precision score (PREC) =",
            mtc.precision_score(y_true, y_predicted, average="weighted"))
    print("3. Specificity (SPEC) =")
    # print("4. Accuracy (ACC) =", mtc.accuracy_score(y_true, y_predicted, weights))
    print("5. Matthews correlation coefficient(MCC) =", mtc.matthews_corrcoef(y_true, y_predicted))
    print("6. F1 score (F1) =", mtc.f1_score(y_true, y_predicted, average="weighted"))
    
    print('Finished.. ')

    #====================================================================
    # Writing to a file
    #=====================================================================
    
    np.set_printoptions(linewidth=np.inf)
    with open("%s/%s_evaluation.csv" % (opt.out_dir, py_file_name), "w") as f:
        f.write(np.array2string(mtc.confusion_matrix(y_true, y_predicted), separator=", "))
        f.write("\n\n\n\n")
        f.write("--- Macro Averaged Resutls ---\n")
        f.write("Precision: %s\n" % mtc.precision_score(y_true, y_predicted, average="macro"))
        f.write("Recall: %s\n" % mtc.recall_score(y_true, y_predicted, average="macro"))
        f.write("F1-Score: %s\n\n" % mtc.f1_score(y_true, y_predicted, average="macro"))

In [None]:
        f.write("--- Micro Averaged Resutls ---\n")
        f.write("Precision: %s\n" % mtc.precision_score(y_true, y_predicted, average="micro"))
        f.write("Recall: %s\n" % mtc.recall_score(y_true, y_predicted, average="micro"))
        f.write("F1-Score: %s\n\n" % mtc.f1_score(y_true, y_predicted, average="micro"))
        f.write("--- Other Resutls ---\n")
        f.write("MCC: %s\n" % mtc.matthews_corrcoef(y_true, y_predicted))
    f.close()
    print("Report generated")

    #==========================================================================

=============================================<br>
Prepare submission file with probabilities<br>
==============================================

In [None]:
def prepare_prediction_file():
    if opt.bs != 1:
        print("Please run with bs = 1")
        exit()

In [None]:
    test_model_checkpoint = opt.test_checkpoint #input("Please enter the path of test model:")
    checkpoint = torch.load(test_model_checkpoint)
    model = prepare_model()
    model.load_state_dict(checkpoint["model_state_dict"])
    model.eval()
    dataloaders = prepare_data()
    test_dataloader = dataloaders["val"]
    class_names = test_dataloader.dataset.classes
    df = pd.DataFrame(columns=["filename", "predicted-label", "actual-label"] + class_names)
    print(df.head())
   #  exit()
    with torch.no_grad():
        for i, data in tqdm(enumerate(test_dataloader, 0)):
            
            inputs, labels, paths = data
                
            df_temp = pd.DataFrame(columns=["filename", "predicted-label", "actual-label"] + class_names)

In [None]:
            #print("paths:", paths)
            filename = [list(paths)[0].split("/")[-1]]
            #print("filenames:", filename)
            
            df_temp["filename"] = filename
           
            inputs = inputs.to(device)
            labels = labels.to(device)

In [None]:
            outputs = model(inputs)
            outputs = F.softmax(outputs, 1)
            predicted_probability, predicted = torch.max(outputs.data, 1)
            
            df_temp["predicted-label"] = class_names[predicted.item()]
            df_temp["actual-label"] = class_names[labels.item()]
            

            # print("actual label:", labels.item())
            #print("predicted label:", predicted.item())
            # print("probabilities :", outputs.cpu())
            probabilities = outputs.cpu().squeeze()
            probabilities = probabilities.tolist()
            probabilities = np.around(probabilities, decimals=3)
            #print(probabilities)
            df_temp[class_names] = probabilities

            #record = record + [class_names[labels.item()]] + [class_names[predicted.item()]] 

            #print(record)
            #print(df_temp.head())
            df = df.append(df_temp)
           # break
        print(df.head())
        print("length of DF:", len(df))
        prob_file_name = "%s/%s_probabilities.csv" % (opt.out_dir, py_file_name)
        df.to_csv(prob_file_name, index=False)

########################################################<br>
Prepare submission file:<br>
########################################################

In [None]:
def prepare_submission_file(image_names, predicted_labels, max_probability, time_per_image, submit_dir, data_classes):
    predicted_label_names = []
    for i in predicted_labels:
        predicted_label_names = predicted_label_names + [data_classes[i]]

    #  print(predicted_label_names)
    submission_dataframe = pd.DataFrame(np.column_stack([image_names,
                                                            predicted_label_names,
                                                            max_probability,
                                                            time_per_image]),
                                    columns=['images', 'labels', 'PROB', 'time'])
    #print("image names:{0}".format(image_names))
    submission_dataframe.to_csv(os.path.join(submit_dir, "method_3_test_output"), index=False)
    print(submission_dataframe)
    print("successfully created submission file")
###########################################################

#########################################################<br>
 Ploting history and save plots to plots directory<br>
#########################################################

##########################################################<br>
Plot confusion matrix - method<br>
##########################################################

In [None]:
def plot_confusion_matrix(cm, classes,
                            normalize=False,
                            title='Confusion matrix',
                            cmap=plt.cm.Blues,
                            plt_size=[10,10]):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.rcParams['figure.figsize'] = plt_size
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')
    print(cm)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                    horizontalalignment="center",
                    color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    # plt.savefig(os.path.join(plot_dir, cm_plot_name))
    figure = plt.gcf()
    writer.add_figure("Confusion Matrix", figure)
    print("Finished confusion matrix drawing...")

In [None]:
if __name__ == '__main__':
    print("Started data preparation")
    data_loaders = prepare_data()
    print(vars(opt))
    print("Data is ready")

    # Train or retrain or inference
    if opt.action == "train":
        print("Training process is strted..!")
        run_train()
       # pass
    elif opt.action == "retrain":
        print("Retrainning process is strted..!")
        run_train(retrain=True)
       # pass
    elif opt.action == "test":
        print("Inference process is strted..!")
        test_model()
    elif opt.action == "check":
        check_model_graph()
        print("Check pass")
    elif opt.action == "prepare":
        prepare_prediction_file()
        print("Probability file prepared..!")

    # Finish tensorboard writer
    writer.close()