In [7]:
import os
import time
import torch
import torchvision
import logging
import warnings
import datetime
import itertools
import numpy as np
import pandas as pd
from glob import glob
from PIL import Image
import torch.nn as nn
from tqdm import tqdm
from random import seed
import torch.optim as optim
from torch.utils import data
from torchvision import models
import pretrainedmodels as ptm
import sklearn.metrics as skmet
import matplotlib.pyplot as plt
import torch.nn.functional as nnF
import pathlib
from imgaug import augmenters as iaa
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from sklearn.model_selection import StratifiedKFold
from torch.utils.tensorboard import SummaryWriter

In [8]:
# But first we need to tell PyTorch where to 'keep' the model 
# On GPU or on CPU 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 
print('The model will run on', device)

The model will run on cpu


In [155]:
#Transforms
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(size=50),
    transforms.RandomRotation(degrees=90),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], 
                         [0.229, 0.224, 0.225])])

transformer=transforms.Compose([
    transforms.Resize((50,50)),
    transforms.ToTensor(), 
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

In [156]:
#Dataloader

#Path for training and testing directory
train_path='E:/N-Dataset/BINARY/Final/train'
valid_path = 'E:/N-Dataset/BINARY/Final/validation'
test_path='E:/N-Dataset/BINARY/Final/test'

train_loader=DataLoader(
    torchvision.datasets.ImageFolder(train_path,transform=train_transform),
    batch_size=32, shuffle=True
)
valid_loader = DataLoader(
    torchvision.datasets.ImageFolder(valid_path,transform=transformer),
    batch_size=32, shuffle=True
)

test_loader=DataLoader(
    torchvision.datasets.ImageFolder(test_path,transform=transformer),
    batch_size=32, shuffle=True
)

In [157]:
#categories
root=pathlib.Path(train_path)
classes=sorted([j.name.split('/')[-1] for j in root.iterdir()])

In [158]:
print(classes)

['Abnormal', 'Healthy']


In [159]:
# Load pretrained ResNet50 Model
model = models.mobilenet_v2(pretrained=True)

In [160]:
#Optmizer and loss function
optimizer=optim.Adam(model.parameters(),lr=0.001)
loss_function=nn.CrossEntropyLoss()

In [161]:
# Freeze model parameter
for param in model.parameters():
    param.requires_grad = False

In [162]:
model=model.to(device)

In [163]:
num_ftrs=model.classifier[-1].in_features
model.classifier=nn.Sequential(
    nn.Dropout(p=0.2, inplace=False),
    nn.Linear(in_features=num_ftrs, out_features=2, bias=True))   

In [164]:
for num, (name, param) in enumerate(model.named_parameters()):
    print(num, name, param.requires_grad )
summary(model, input_size=(3, 224, 224))
print(model)

0 features.0.0.weight False
1 features.0.1.weight False
2 features.0.1.bias False
3 features.1.conv.0.0.weight False
4 features.1.conv.0.1.weight False
5 features.1.conv.0.1.bias False
6 features.1.conv.1.weight False
7 features.1.conv.2.weight False
8 features.1.conv.2.bias False
9 features.2.conv.0.0.weight False
10 features.2.conv.0.1.weight False
11 features.2.conv.0.1.bias False
12 features.2.conv.1.0.weight False
13 features.2.conv.1.1.weight False
14 features.2.conv.1.1.bias False
15 features.2.conv.2.weight False
16 features.2.conv.3.weight False
17 features.2.conv.3.bias False
18 features.3.conv.0.0.weight False
19 features.3.conv.0.1.weight False
20 features.3.conv.0.1.bias False
21 features.3.conv.1.0.weight False
22 features.3.conv.1.1.weight False
23 features.3.conv.1.1.bias False
24 features.3.conv.2.weight False
25 features.3.conv.3.weight False
26 features.3.conv.3.bias False
27 features.4.conv.0.0.weight False
28 features.4.conv.0.1.weight False
29 features.4.conv.0.1.

NameError: name 'summary' is not defined

In [165]:
class AVGMetrics (object):
    """
        This is a simple class to control the AVG for a given value. It's used to control loss and accuracy for start
        and evaluate partition
    """
    def __init__(self):
        self.sum_value = 0
        self.avg = 0
        self.count = 0
        self.values = []

    def __call__(self):
        return self.avg

    def update(self, val):
        self.values.append(val)
        self.sum_value += val
        self.count += 1
        self.avg = self.sum_value / float(self.count)

    def print (self):
        print('\nsum_value: ', self.sum_value)
        print('count: ', self.count)
        print('avg: ', self.avg)

In [166]:
class TrainHistory:
    
    def __init__(self):
        self.val_loss = list()
        self.val_acc = list()
        self.train_loss = list()
        self.train_acc = list()
        
    
    def update (self, loss_train, loss_val, acc_train, acc_val):

        self.train_loss.append(loss_train)
        self.val_loss.append(loss_val)
        self.train_acc.append(acc_train)
        self.val_acc.append(acc_val)


    def save (self, folder_path):

        path = os.path.join(folder_path, 'history')
        if not os.path.isdir(path):
            os.mkdir(path)

        print ("Saving history CSVs in {}".format(path))

        np.savetxt(os.path.join(path, "train_loss.csv"), np.asarray(self.train_loss), fmt='%.3f', delimiter=',')
        np.savetxt(os.path.join(path, "val_loss.csv"), np.asarray(self.val_loss), fmt='%.3f', delimiter=',')

        np.savetxt(os.path.join(path, "train_acc.csv"), np.asarray(self.train_acc), fmt='%.3f', delimiter=',')
        np.savetxt(os.path.join(path, "val_acc.csv"), np.asarray(self.val_acc), fmt='%.3f', delimiter=',')



    def save_plot (self, folder_path):
        path = os.path.join(folder_path, 'history')
        if not os.path.isdir(path):
            os.mkdir(path)

        epochs = [i + 1 for i in range(len(self.train_loss))]

        print("Saving history plots in {}".format(path))

        plt.plot(epochs, self.train_loss, color='r', linestyle='solid')
        plt.plot(epochs, self.val_loss, color='b', linestyle='solid')
        plt.grid(color='black', linestyle='dotted', linewidth=0.7)
        plt.legend(['Train', 'Validation'], loc='upper right')
        plt.xlabel("Epoch")
        plt.ylabel("Loss")
        plt.title("Training Loss")
        plt.tight_layout()
        plt.savefig(os.path.join(path, "loss_history.jpg"), dpi=300)

        plt.figure()

        plt.plot(epochs, self.train_acc, color='r', linestyle='solid')
        plt.plot(epochs, self.val_acc, color='b', linestyle='solid')
        plt.grid(color='black', linestyle='dotted', linewidth=0.7)
        plt.legend(['Train', 'Validation'], loc='upper left')
        plt.xlabel("Epoch")
        plt.ylabel("Accuracy")
        plt.title("Training Accuracy")
        plt.tight_layout()
        plt.savefig(os.path.join(path, "acc_history.jpg"), dpi=200)

        plt.figure()

In [167]:
def save_model (model, folder_path, epoch, opt_fn, loss_fn, is_best, multi_gpu=False, verbose=False):

    last_check_path = os.path.join(folder_path, 'last-checkpoint')
    best_check_path = os.path.join(folder_path, 'best-checkpoint')

    if not os.path.exists(last_check_path):
        if verbose:
            print ('last-checkpoint folder does not exist. I am creating it!')
        os.mkdir(last_check_path)
    else:
        if verbose:
            print ('last-checkpoint folder exist! Perfect, I will just use it.')

    if not os.path.exists(best_check_path):
        if verbose:
            print('best-checkpoint folder does not exist. I am creating it!')
        os.mkdir(best_check_path)
    else:
        if verbose:
            print('best-checkpoint folder exist! Perfect, I will just use it.')

    info_to_save = {
        'epoch': epoch,
        'model_state_dict': model.module.state_dict() if multi_gpu else model.state_dict(),
        'optimizer_state_dict': opt_fn.state_dict(),
        'loss': loss_fn,
    }

    torch.save(info_to_save, os.path.join(last_check_path, "last-checkpoint.pth"))

    if is_best:
        torch.save(info_to_save, os.path.join(best_check_path, 'MCC.pth'))

In [168]:
def load_model (checkpoint_path, model, opt_fn=None, loss_fn=None, epoch=None):

    if not os.path.exists(checkpoint_path):
        raise Exception ("The {} does not exist!".format(checkpoint_path))

    ckpt = torch.load(checkpoint_path)
    model.load_state_dict(ckpt['model_state_dict'])

    if opt_fn is not None and loss_fn is not None:
        opt_fn.load_state_dict(ckpt['optimizer_state_dict'])
        epoch = ckpt['epoch']
        loss_fn = ckpt['loss']
        return model, opt_fn, loss_fn, epoch
    else:
        return model

In [169]:
def _check_dim (lab_real, lab_pred, mode='labels'):
    if mode == 'labels':
        if lab_real.ndim == 2:
            lab_real = lab_real.argmax(axis=1)
        if lab_pred.ndim == 2:
            lab_pred = lab_pred.argmax(axis=1)

    elif mode == 'scores':
        if lab_real.ndim == 1:
            lab_real = one_hot_encoding(lab_real)
        if lab_pred.ndim == 1:
            lab_pred = one_hot_encoding(lab_pred)

    else:
        raise Exception ('There is no mode called {}. Please, choose between score or labels'.format(mode))

    return lab_real, lab_pred

In [170]:
def accuracy (lab_real, lab_pred, verbose=False):
    # Checkin the array dimension
    lab_real, lab_pred = _check_dim (lab_real, lab_pred, mode='labels')

    acc = skmet.accuracy_score(lab_real, lab_pred)

    if verbose:
        print('- Accuracy - {:.3f}'.format(acc))

    return acc

In [171]:
def conf_matrix (lab_real, lab_pred, normalize=True):

    # Checkin the array dimension
    lab_real, lab_pred = _check_dim(lab_real, lab_pred, mode='labels')

    cm = skmet.confusion_matrix(lab_real, lab_pred)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    return cm

In [172]:
def plot_conf_matrix(cm, class_names, normalize=True, save_path=None, title='Confusion matrix', cmap=plt.cm.GnBu):

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(class_names))
    plt.xticks(tick_marks, class_names, rotation=0)
    plt.yticks(tick_marks, class_names)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

    if isinstance(save_path, str):
        plt.savefig(save_path, dpi=200)
        plt.clf()
    elif save_path:
        plt.show()
    else:
        plt.clf()

In [173]:
class TrainHistory:
    
    def __init__(self):
        self.val_loss = list()
        self.val_acc = list()
        self.train_loss = list()
        self.train_acc = list()
        
    
    def update (self, loss_train, loss_val, acc_train, acc_val):
        """
        This function appends a new value to the loss/train loss and acc. These values are stored by epoch
        :param loss_train: the train loss of the ith epoch
        :param loss_val: the val loss of the ith epoch
        :param acc_train: the train accuracy of the ith epoch
        :param acc_val: the val accuracy of the ith epoch
        """

        self.train_loss.append(loss_train)
        self.val_loss.append(loss_val)
        self.train_acc.append(acc_train)
        self.val_acc.append(acc_val)


    def save (self, folder_path):
        """
        This function saves the loss and accuracy history as csv files
        :param folder_path: a string with the base folder path
        """

        path = os.path.join(folder_path, 'history')
        if not os.path.isdir(path):
            os.mkdir(path)

        print ("Saving history CSVs in {}".format(path))

        np.savetxt(os.path.join(path, "train_loss.csv"), np.asarray(self.train_loss), fmt='%.3f', delimiter=',')
        np.savetxt(os.path.join(path, "val_loss.csv"), np.asarray(self.val_loss), fmt='%.3f', delimiter=',')

        np.savetxt(os.path.join(path, "train_acc.csv"), np.asarray(self.train_acc), fmt='%.3f', delimiter=',')
        np.savetxt(os.path.join(path, "val_acc.csv"), np.asarray(self.val_acc), fmt='%.3f', delimiter=',')



    def save_plot (self, folder_path):
        """
        This function saves a plot of the loss and accuracy history
        :param folder_path: a string with the base folder path
        """

        path = os.path.join(folder_path, 'history')
        if not os.path.isdir(path):
            os.mkdir(path)

        epochs = [i + 1 for i in range(len(self.train_loss))]

        print("Saving history plots in {}".format(path))

        plt.plot(epochs, self.train_loss, color='r', linestyle='solid')
        plt.plot(epochs, self.val_loss, color='b', linestyle='solid')
        plt.grid(color='black', linestyle='dotted', linewidth=0.7)
        plt.legend(['Train', 'Validation'], loc='upper right')
        plt.xlabel("Epoch")
        plt.ylabel("Loss")
        plt.title("Training Loss")
        plt.tight_layout()
        plt.savefig(os.path.join(path, "loss_history.jpg"), dpi=300)

        plt.figure()

        plt.plot(epochs, self.train_acc, color='r', linestyle='solid')
        plt.plot(epochs, self.val_acc, color='b', linestyle='solid')
        plt.grid(color='black', linestyle='dotted', linewidth=0.7)
        plt.legend(['Train', 'Validation'], loc='upper left')
        plt.xlabel("Epoch")
        plt.ylabel("Accuracy")
        plt.title("Training Accuracy")
        plt.tight_layout()
        plt.savefig(os.path.join(path, "acc_history.jpg"), dpi=200)

        plt.figure()

In [174]:
def precision_recall_report (lab_real, lab_pred, class_names=None, verbose=False, output_dict=False):

    # Checking the array dimension
    lab_real, lab_pred = _check_dim(lab_real, lab_pred, mode='labels')

    report = skmet.classification_report(lab_real, lab_pred, target_names=class_names, output_dict=output_dict)

    if verbose:
         print(report)

    return report

In [175]:
class Metrics:

    def __init__(self, metrics_names, class_names=None, options=None):

        self.metrics_names = metrics_names
        self.metrics_values = dict()        
        self.options = options
        
        self.pred_scores = None
        self.label_scores = None
        self.img_names = None
        
        self.class_names = class_names


    def compute_metrics (self):

        save_all_path = None
        # Checking if save_all is informed
        if self.options is not None:
            if "save_all_path" in self.options.keys():
                # Checking if the folder doesn't exist. If True, we must create it.
                if not os.path.isdir(self.options["save_all_path"]):
                    os.mkdir(self.options["save_all_path"])
                save_all_path = self.options["save_all_path"]

        if self.metrics_names is None:
            return None
        
        if self.metrics_names == "all":
            self.metrics_names = ["accuracy", "conf_matrix", "plot_conf_matrix", "precision_recall_report"]
        
        
        for mets in self.metrics_names:
            if mets == "accuracy":
                self.metrics_values["accuracy"] = accuracy(self.label_scores, self.pred_scores)

            
            elif mets == "conf_matrix":
                
                # Checking the options
                normalize = True
                if self.options is not None:
                    if "normalize_conf_matrix" in self.options.keys():
                        normalize = self.options["normalize_conf_matrix"]
                
                self.metrics_values["conf_matrix"] = conf_matrix(self.label_scores, self.pred_scores, normalize)
            elif mets == "plot_conf_matrix":
                
                # Checking if the class names are defined
                if self.class_names is None:
                    raise Exception ("You are trying to plot the confusion matrix without defining the classes name")
                
                # Checking the options
                save_path = None
                normalize = True
                title = "Confusion Matrix"   
                
                if self.options is not None:
                    if save_all_path is not None:
                        save_path = os.path.join(save_all_path, "conf_mat.jpg")
                    if "save_path_conf_matrix" in self.options.keys():
                        save_path = self.options["save_path_conf_matrix"]
                    if "normalize_conf_matrix" in self.options.keys():
                        normalize = self.options["normalize_conf_matrix"]
                    if "title_conf_matrix" in self.options.keys():
                        title = self.options["title_conf_matrix"]
                        
                if "conf_matrix" in self.metrics_values.keys():
                    cm = self.metrics_values["conf_matrix"]
                else:
                    cm = conf_matrix(self.label_scores, self.pred_scores, normalize)
                
                plot_conf_matrix(cm, self.class_names, normalize, save_path, title)
                
                
            elif mets == "precision_recall_report":

                self.metrics_values["precision_recall_report"] = precision_recall_report(self.label_scores,
                                                                                              self.pred_scores,
                                                                                              self.class_names)

            
    def print (self):
        """
        This method just prints the metrics on the screen
        """
        
        if self.metrics_names is None:
            print ("Since metrics name is None, there is no metric to print")
            
        else:        
            for met in self.metrics_values.keys():
                if met == "loss":
                    print ('- Loss: {:.3f}'.format(self.metrics_values[met]))
                elif met == "accuracy":
                    print ('- Accuracy: {:.3f}'.format(self.metrics_values[met]))
                elif met == "conf_matrix":
                    print('- Confusion Matrix: \n{}'.format(self.metrics_values[met]))
                elif met == "precision_recall_report":
                    print('- Precision and Recall report: \n{}'.format(self.metrics_values[met]))

    def add_metric_value (self, value_name, value):
        """
        Adding a new value from a external source into the metrics
        :param value_name (string): the key for the dict
        :param value: the value to be saved in the self.metrics_values
        """
        self.metrics_values[value_name] = value


    def update_scores (self, label_batch, pred_batch, img_name_batch=None):
        """
        The evaluation is made using batchs. So, every batch we get just a piece of the prediction. This method
        concatenate all prediction and labels in order to compute the metrics
        :param pred (np.array): an array containing part of the predictions outputed by the model
        :param label (np.array): an array contaning the true labels
        """

        if self.label_scores is None and self.pred_scores is None:
            self.label_scores = label_batch
            self.pred_scores = pred_batch
        else:
            if pred_batch is not None:
                self.pred_scores = np.concatenate((self.pred_scores, pred_batch))
            if label_batch is not None:
                self.label_scores = np.concatenate((self.label_scores, label_batch))

    def save_metrics (self, folder_path, name="metrics.txt"):
        """
        This method saves the computed metrics
        :param folder_path (string): the folder you'd like to save the metrics
        :param name (string): the file name. Default is metrics.txt
        """
        
        if self.metrics_names is None:
            print ("Since metrics name is None, there is no metric to save")
            
        else:        
            with open(os.path.join(folder_path, name), "w") as f:
    
                f.write("- METRICS REPORT -\n\n")
    
                for met in self.metrics_values.keys():
                    if met == "loss":
                        f.write('- Loss: {:.3f}\n'.format(self.metrics_values[met]))
                    elif met == "accuracy":
                        f.write('- Accuracy: {:.3f}\n'.format(self.metrics_values[met]))
                    elif met == "conf_matrix":
                        f.write('- Confusion Matrix: \n{}\n'.format(self.metrics_values[met]))
                    elif met == "precision_recall_report":
                        f.write('- Precision and Recall report: \n{}\n'.format(self.metrics_values[met]))


    def save_scores (self, folder_path=None, pred_name="predictions.csv"):
        """
        This method saves the concatenated scores in the disk
        :param folder_path (string): the folder you'd like to save the scores
        :param pred_name (string): the predictions' score file name. Default is predictions.csv
        :param labels_name (string): the labels' score file name. Default is labels.csv
        """

        if folder_path is not None:
            # Checking if the folder doesn't exist. If True, we must create it.
            if not os.path.isdir(folder_path):
                os.mkdir(folder_path)
        elif self.options is not None:
            if "save_all_path" in self.options.keys():
                folder_path = self.options["save_all_path"]
            elif "save_path_scores" in self.options.keys():
                folder_path = self.options["save_path_scores"]
            else:
                raise ("The options doesnt have any folder to save the scores")

            if 'pred_name_scores' in self.options.keys():
                pred_name = self.options['pred_name_scores']
        else:
            raise ("You must set the path to save the score eithe in options or in folder_path parameter")


        # Getting the list of classications and predict labels
        if self.class_names is not None:
            if self.label_scores is not None:
                real_labels = [self.class_names[int(l)] for l in self.label_scores]
                real_labels = np.asarray(real_labels)
                real_labels = real_labels.reshape(real_labels.shape[0], 1)

            if self.img_names is not None:
                img_names = np.asarray(self.img_names)
                img_names = img_names.reshape(img_names.shape[0], 1)

            pred_labels = [self.class_names[ps.argmax()] for ps in self.pred_scores]
            pred_labels = np.asarray(pred_labels)
            pred_labels = pred_labels.reshape(pred_labels.shape[0], 1)
        else:
            raise ("You need to inform the class names to use this function")

        if self.img_names is not None and self.label_scores is not None:
            both_data = np.concatenate((img_names, real_labels, pred_labels, self.pred_scores), axis=1)
            cols = ['image', 'REAL', 'PRED', *self.class_names]
        elif self.img_names is None and self.label_scores is not None:
            both_data = np.concatenate((real_labels, pred_labels, self.pred_scores), axis=1)
            cols = ['REAL', 'PRED', *self.class_names]
        elif self.img_names is not None and self.label_scores is None:
            both_data = np.concatenate((img_names, pred_labels, self.pred_scores), axis=1)
            cols = ['image', 'PRED', *self.class_names]
        else:
            both_data = np.concatenate((real_labels, pred_labels, self.pred_scores), axis=1)
            cols = ['REAL', 'PRED', *self.class_names]

        df = pd.DataFrame(both_data, columns=cols)
        print ("Saving the scores in {}".format(folder_path))

        df.to_csv(os.path.join(folder_path, pred_name), index=False)

In [218]:
def _config_logger(save_path, file_name):
    """
        Internal function to configure the logger

    """
    
    logger = logging.getLogger("Train-Logger")
    # Checking if the folder logs doesn't exist. If True, we must create it.
    if not os.path.isdir(save_path):
        os.makedirs(save_path)
    logger_filename = os.path.join(save_path, f"{file_name}_{str(time.time()).replace('.','')}.log")
    fhandler = logging.FileHandler(filename=logger_filename, mode='a')
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fhandler.setFormatter(formatter)
    logger.addHandler(fhandler)
    logger.setLevel(logging.INFO)
    return logger


def _train_epoch (model, optimizer, loss_fn, data_loader, c_epoch, t_epoch, device=None):
  
    # setting the model to training mode
    model.train()

    print ("Training...")
    # Setting tqdm to show some information on the screen
    with tqdm(total=len(train_loader), ascii=True, desc='Epoch {}/{}: '.format(c_epoch, t_epoch), ncols=100) as t:


        # Variables to store the avg metrics
        loss_avg = AVGMetrics()
        acc_avg = AVGMetrics()

        # Getting the data from the DataLoader generator
        for i, (images,labels) in enumerate(train_loader):
            
            if torch.cuda.is_available():
                images=Variable(images.cuda())
                labels=Variable(labels.cuda())
                # Doing the forward pass
            out = model(images)
            # Computing loss function
            loss = loss_fn(out, labels)
            # Computing the accuracy
            acc = accuracy(out, labels)

            # Getting the avg metrics
            loss_avg.update(loss.item())
            acc_avg.update(acc.item())

            # Zero the parameters gradient
            optimizer.zero_grad()

            # Computing gradients and performing the update step
            loss.backward()
            optimizer.step()

            # Updating tqdm
            t.set_postfix(loss='{:05.3f}'.format(loss_avg()))
            t.update()

    return {"loss": loss_avg(), "accuracy": acc_avg()}

def fit_model (model, train_loader, valid_loader, optimizer=None, loss_fn=None, epochs=1,
               epochs_early_stop=None, save_folder=None, initial_model=None, best_metric="loss", device=None,
               schedule_lr=None, config_bot=None, model_name="CNN", resume_train=False, history_plot=True,
               val_metrics=('accuracy'), metric_early_stop=None):

    logger = _config_logger(save_folder, model_name)
    logger.info("Starting the training phase")

    if epochs_early_stop is not None:
        logger.info('Early stopping is set using the number of epochs without improvement')
    if metric_early_stop is not None:
        logger.info('Early stopping is set using the min/max metric as threshold')
    if epochs_early_stop is None and metric_early_stop is None:
        logger.info('No early stopping is set')

    history = TrainHistory()

    # Checking if we have a saved model. If we have, load it, otherwise, let's start the model from scratch
    epoch_resume = 0
    if initial_model is not None:
        logger.info("Loading the saved model in {} folder".format(initial_model))

        if resume_train:
            model, optimizer, loss_fn, epoch_resume = load_model(initial_model, model)
            logger.info("Resuming the training from epoch {} ...".format(epoch_resume))
        else:
            model = load_model(initial_model, model)

    else:
        logger.info("The model will be trained from scratch")

        # Moving the model to the given device
    model.to(device)

    # Setting data to store the best mestric
    logging.info("The best metric to get the best model will be {}".format(best_metric))
    if best_metric == 'loss':
        best_metric_value = 1000
    else:
        best_metric_value = 0
    best_flag = False

    # setting a flag for the early stop
    early_stop_count = 0
    best_epoch = 0

    # writer is used to generate the summary files to be loaded at tensorboard
    writer = SummaryWriter (os.path.join(save_folder, 'summaries'))

    
    # Let's iterate for `epoch` epochs or a tolerance.
    # It always start from epoch resume. If it's set, it starts from the last epoch the training phase was stopped,
    # otherwise, it starts from 0
    epoch = epoch_resume
    while epoch < epochs:

        # Updating epoch
        epoch += 1

        # Training and getting the metrics for one epoch
        train_metrics = _train_epoch(model, optimizer, loss_fn, train_loader, epochs, device)

        # After each epoch, we evaluate the model for the training and validation data
        val_metrics = metrics_for_eval (model, valid_loader, device, loss_fn)

        # Checking the schedule if applicable
        if isinstance(schedule_lr, torch.optim.lr_scheduler.ReduceLROnPlateau):
            schedule_lr.step(best_metric_value)
        elif isinstance(schedule_lr, torch.optim.lr_scheduler.MultiStepLR):
            schedule_lr.step(epoch)

        # Getting the current LR
        current_LR = None
        for param_group in optimizer.param_groups:
            current_LR = param_group['lr']


        writer.add_scalars('Loss', {'val-loss': val_metrics['loss'],
                                                 'start-loss': train_metrics['loss']},
                                                 epoch)

        writer.add_scalars('Accuracy', {'val-loss': val_metrics['accuracy'],
                                    'start-loss': train_metrics['accuracy']},
                                    epoch)

        history.update(train_metrics['loss'], val_metrics['loss'], train_metrics['accuracy'], val_metrics['accuracy'])


        # Getting the metrics for the training partition epoch
        train_print = "-- Loss: {:.3f}\n-- Acc: {:.3f}\n".format(train_metrics["loss"],train_metrics["accuracy"])

        # Getting the metrics for the validation partition in this epoch
        val_print = "-- Loss: {:.3f}\n-- Acc: {:.3f}\n".format(val_metrics["loss"],val_metrics["accuracy"])

        early_stop_count += 1
        new_best_print = None
        # Defining the best metric for validation
        if best_metric == 'loss':
            if val_metrics[best_metric] <= best_metric_value:
                best_metric_value = val_metrics[best_metric]
                new_best_print = '\n-- New best {}: {:.3f}'.format(best_metric, best_metric_value)
                best_flag = True
                best_epoch = epoch
                early_stop_count = 0
        else:
            if val_metrics[best_metric] >= best_metric_value:
                best_metric_value = val_metrics[best_metric]
                new_best_print = '\-- New best {}: {:.3f}'.format(best_metric, best_metric_value)
                best_flag = True
                best_epoch = epoch
                early_stop_count = 0

        # Check if it's the best model in order to save it
        if save_folder is not None:
            save_model(model, save_folder, epoch, optimizer, loss_fn, best_flag)
        best_flag = False

        # Updating the logger
        msg = "Metrics for epoch {} out of {}\n".format(epoch, epochs)
        msg += "- Train\n"
        msg += train_print + "\n"
        msg += "\n- Validation\n"
        msg += val_print + "\n"
        msg += "\n- Training info"
        msg += "\n-- Early stopping counting: {} max to stop is {}".format(early_stop_count, epochs_early_stop)
        msg += "\n-- Current LR: {}".format(current_LR)
        if new_best_print is not None:
            msg += new_best_print
        msg += "\n-- Best {} so far: {:.3f} on epoch {}\n".format(best_metric, best_metric_value, best_epoch)

        # Checking the early stop
        if epochs_early_stop is not None:
            if early_stop_count >= epochs_early_stop:
                logger.info(msg)
                logger.info("The early stop trigger was activated. The validation {} " .format(best_metric) +
                            "{:.3f} did not improved for {} epochs.".format(best_metric_value,
                                                                            epochs_early_stop) +
                            "The training phase was stopped.")

                break

        # Checking the early stop
        if metric_early_stop is not None:
            stop = False
            if best_metric == 'loss':
                if metric_early_stop >= best_metric_value:
                    stop = True
            else:
                if metric_early_stop <= best_metric_value:
                    stop = True

            if stop:
                logger.info(msg)
                logger.info("The early stop trigger was activated. The validation {} ".format(best_metric) +
                            "{:.3f} achieved the defined threshold {:.3f}.".format(best_metric_value,
                                                                            metric_early_stop) +
                            "The training phase was stopped.")
                break

        # Sending all message to the logger
        logger.info(msg)


    if history_plot:
        history.save_plot(save_folder)

    history.save(save_folder)
    print('\n')

    writer.close()

In [219]:
def metrics_for_eval (model, valid_loader, device, loss_fn):
   # setting the model to evaluation mode
    model.eval()
    print ("\nEvaluating...")
    # Setting tqdm to show some information on the screen
    
    with tqdm(total=len(valid_loader), ascii=True, ncols=100) as t:

        # Setting require_grad=False in order to dimiss the gradient computation in the graph
        with torch.no_grad():

            loss_avg = AVGMetrics()
            acc_avg = AVGMetrics()


            for i, (images,labels) in enumerate(valid_loader):
                if torch.cuda.is_available():
                    images=Variable(images.cuda())
                    labels=Variable(labels.cuda())
                    # Doing the forward pass without using meta-data
                pred_batch = model(images)

                # Computing the loss
                L = loss_fn(pred_batch, labels)
                # Computing the accuracy
                acc  = accuracy(pred_batch, labels)

                loss_avg.update(L.item())
                acc_avg.update(acc.item())
                
                labels_batch_np = labels.cpu().numpy()

                if Metrics is not None:
                    # Moving the data to CPU and converting it to numpy in order to compute the metrics
                    pred_batch_np = nnF.softmax(pred_batch,dim=1).cpu().numpy()
                    # updating the scores
                    Metrics.update_scores(labels_batch_np, pred_batch_np)

                # Updating tqdm
                t.set_postfix(loss='{:05.3f}'.format(loss_avg()))
                t.update()

    return {"loss": loss_avg(), "accuracy": acc_avg()}


# Testing the model
def test_model (model, data_loader, checkpoint_path=None, loss_fn=None, device=None, save_pred=False,
                    partition_name='Test', metrics_to_comp=('all'), class_names=None, metrics_options=None,
                    apply_softmax=True, verbose=True, full_path_pred=None):

    # setting the model to evaluation mode
    model.eval()

    def _get_predictions (model, image):        
        with torch.no_grad():
            pred_batch = model(image)
        return pred_batch

    if checkpoint_path is not None:
        model = load_model(checkpoint_path, model)

    if device is None:
        # Setting the device
        if torch.cuda.is_available():
            device = torch.device("cuda:" + str(torch.cuda.current_device()))
        else:
            device = torch.device("cpu")

    # Moving the model to the given device
    model.to(device)

    if loss_fn is None:
        loss_fn = nn.CrossEntropyLoss()

    # Setting the metrics object
    metrics = Metrics (metrics_to_comp, class_names, metrics_options)

    print("Testing...")
    # Setting tqdm to show some information on the screen
    with tqdm(total=len(valid_loader), ascii=True, ncols=100) as t:

        loss_avg = AVGMetrics()
        acc_avg = AVGMetrics()

        for i, (images,labels) in enumerate(valid_loader):
            
            if torch.cuda.is_available():
                images=Variable(images.cuda())
                labels=Variable(labels.cuda())

                # Doing the forward pass without using meta-data
            pred_batch = _get_predictions(model, images)
            # Computing the loss
            L = loss_fn(pred_batch, labels)
            acc  = accuracy(pred_batch, labels)
            loss_avg.update(L.item())
            acc_avg.update(acc.item())
                
            labels_batch_np = labels.cpu().numpy()
            # Moving the data to CPU and converting it to numpy in order to compute the metrics
            if apply_softmax:
                pred_batch_np = nnF.softmax(pred_batch,dim=1).cpu().numpy()
            # updating the scores
            metrics.update_scores(labels_batch_np, pred_batch_np)

            # Updating tqdm
            if metrics.metrics_names is None:
                t.set_postfix(loss='{:05.3f}'.format(0.0))
                t.set_postfix(accuracy='{:05.3f}'.format(0.0))
            else:
                t.set_postfix(loss='{:05.3f}'.format(loss_avg()))
                #t.set_postfix(accuracy='{:05.3f}'.format(acc_avg()))

            t.update()

    # Adding loss into the metric values
    metrics.add_metric_value("loss", loss_avg())

    # Getting the metrics
    metrics.compute_metrics()

    if save_pred or metrics.metrics_names is None:
        if full_path_pred is None:
            metrics.save_scores()
        else:
            _spt = full_path_pred.split('/')
            _folder = "/".join(_spt[0:-1])
            _p = _spt[-1]
            metrics.save_scores(folder_path=_folder, pred_name=_p)

    if verbose:
        print('- {} metrics:'.format(partition_name))
        metrics.print()


    return metrics.metrics_values

In [220]:
batch_size = 32
epochs = 2
best_metric = "loss"
lr_init = 0.0001
sched_factor = 0.1
sched_min_lr = 0.00001
sched_patience = 10
early_stop = 15
save_folder = "E:/Results/"

In [221]:
def main (lr_init, sched_factor, sched_min_lr, sched_patience, batch_size, epochs, early_stop, save_folder, best_metric,):

    metric_options = {
        'save_all_path': os.path.join(save_folder, "best_metrics"),
        'pred_name_scores': 'predictions_best_test.csv',
        'normalize_conf_matrix': True}
    checkpoint_best = os.path.join(save_folder, 'best-checkpoint/MCC.pth')

    ####################################################################################################################
    print("- Loading...")
    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr_init)
    scheduler_lr = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=sched_factor, min_lr=sched_min_lr,
                                                                    patience=sched_patience)
    ####################################################################################################################

    print("- Starting the training phase...")
    print("-" * 50)
    fit_model (model, train_loader, valid_loader, optimizer=optimizer, loss_fn=loss_fn, epochs=epochs,
               epochs_early_stop=early_stop, save_folder=save_folder, initial_model=None, device=None, 
               schedule_lr=scheduler_lr, model_name="CNN", resume_train=False, history_plot=True,
               best_metric=best_metric)
    # Testing the validation partition
    print("- Evaluating the validation partition...")
    test_model (model, valid_loader, checkpoint_path=checkpoint_best, loss_fn=loss_fn, save_pred=True, 
                metrics_to_comp=['accuracy'], class_names=classes, metrics_options=metric_options, 
                apply_softmax=True, verbose=False)

    metric_options = {
        'save_all_path': os.path.join(save_folder, "best_metrics"),
        'pred_name_scores': 'predictions.csv',
        'plot_conf_matrix': True,
        'normalize_conf_matrix': True
    }
    print("=" * 30)

    # Testing the test partition
    print("\n- Evaluating the testing partition...")
    test_model(model, test_loader, checkpoint_path=None, metrics_to_comp='all', class_names=classes,
                   metrics_options=metric_options, save_pred=True, verbose=False)
    ####################################################################################################################

In [222]:
k = main( lr_init = lr_init , sched_factor = sched_factor, sched_min_lr = sched_min_lr, sched_patience = sched_patience ,
         batch_size = batch_size, epochs = epochs, early_stop = early_stop, save_folder = save_folder, 
         best_metric = best_metric)

INFO:Train-Logger:Starting the training phase
INFO:Train-Logger:Early stopping is set using the number of epochs without improvement
INFO:Train-Logger:The model will be trained from scratch


- Loading...
- Starting the training phase...
--------------------------------------------------


Epoch 2/None:   0%|                                                          | 0/16 [00:00<?, ?it/s]

Training...


Epoch 2/None: 100%|#####################################| 16/16 [00:10<00:00,  1.50it/s, loss=0.499]
  0%|                                                                         | 0/2 [00:00<?, ?it/s]


Evaluating...


  0%|                                                                         | 0/2 [00:00<?, ?it/s]


TypeError: update_scores() missing 1 required positional argument: 'pred_batch'