In [2]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import pandas as pd
from sklearn.metrics import confusion_matrix
import itertools
from sklearn.metrics import precision_score, recall_score, confusion_matrix, f1_score, accuracy_score, balanced_accuracy_score , mean_squared_error, r2_score
import pickle
from collections import namedtuple

In [3]:
def get_results(y_test, y_pred, model_type):
    
    if model_type == 'clf':
        results = {}
        results["Accuracy"] = accuracy_score(y_test, y_pred)
        results["Precision"] = precision_score(y_test, y_pred, average='macro')
        results["Recall"] = recall_score(y_test, y_pred, average='macro')
        results["F1 Score Macro"] = f1_score(y_test, y_pred, average='macro')
        results["F1 Score Micro"] = f1_score(y_test, y_pred, average='micro')
        results["Balanced Accuracy"] = balanced_accuracy_score(y_test, y_pred)
    else:
        results = {}
        results['RMSE'] = np.sqrt(mean_squared_error(y_test, y_pred))
        results['R2'] = r2_score(y_test,y_pred)
    return results

In [1]:
def collate_results(r, user, label, duration, num_classes, size, model_type, n_epochs, window_size_samples):
    """
    Method for collating the results in a single tuple
    :r: results 
    :user: current user
    :num_classes: number of targets 
    """
    if model_type == 'clf':        
        Results = namedtuple("Results","user label n_epochs window_size duration num_classes size accuracy bal_acc precision recall f1_score_macro f1_score_micro")
        collated_results = Results(user, label, n_epochs, window_size_samples, duration,  num_classes, size, r['Accuracy'], r['Balanced Accuracy'], r['Precision'], r['Recall'], 
                                  r['F1 Score Macro'], r['F1 Score Micro'])
    else: 
        Results = namedtuple("Results","user label n_epochs window_size duration num_classes size RMSE R2")
        collated_results = Results(user, label, n_epochs, window_size_samples, duration, num_classes, size, r['RMSE'], r['R2'])
    return collated_results

In [7]:
def plot_confusion_matrix(cm, classes, file,  normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    plt.figure()
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.savefig(file)

In [6]:
def plot_loss_curve(clf):
    
    # Extract loss and accuracy values for plotting from history object
    results_columns = ['train_loss', 'valid_loss', 'train_accuracy', 'valid_accuracy']
    df = pd.DataFrame(clf.history[:, results_columns], columns=results_columns,
                      index=clf.history[:, 'epoch'])

    # get percent of misclass for better visual comparison to loss
    df = df.assign(train_misclass=100 * df.train_accuracy,
                   valid_misclass=100 * df.valid_accuracy)

    plt.style.use('seaborn')
    fig, ax1 = plt.subplots(figsize=(16, 6))
    df.loc[:, ['train_loss', 'valid_loss']].plot(
        ax=ax1, style=['-', ':'], color='tab:blue', legend=False, fontsize=14)

    ax1.tick_params(axis='y', labelcolor='tab:blue', labelsize=14)
    ax1.set_ylabel("Loss", color='tab:blue', fontsize=14)

    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

    df.loc[:, ['train_misclass', 'valid_misclass']].plot(
        ax=ax2, style=['-', ':'], color='tab:red', legend=False)
    ax2.tick_params(axis='y', labelcolor='tab:red', labelsize=14)
    ax2.set_ylabel("Accuracy [%]", color='tab:red', fontsize=14)
    ax2.set_ylim(ax2.get_ylim()[0], 85)  # make some room for legend
    ax1.set_xlabel("Epoch", fontsize=14)

    # where some data has already been plotted to ax
    handles = []
    handles.append(Line2D([0], [0], color='black', linewidth=1, linestyle='-', label='Train'))
    handles.append(Line2D([0], [0], color='black', linewidth=1, linestyle=':', label='Valid'))
    plt.legend(handles, [h.get_label() for h in handles], fontsize=14)
    plt.tight_layout()

In [9]:
def plot_model(y, y_hat, user, label, file ,c='black'):
    # Create a dictionary to pass to matplotlib
    # These settings make the plots readable on slides, feel free to change
    # This is an easy way to set many parameters at once
    plt.figure()

    fontsize = "30";
    params = {'figure.autolayout':True,
              'legend.fontsize': fontsize,
              'figure.figsize': (12, 8),
             'axes.labelsize': fontsize,
             'axes.titlesize': fontsize,
             'xtick.labelsize':fontsize,
             'ytick.labelsize':fontsize}
    plt.rcParams.update(params)
    
    # Create a new figure and an axes objects for the subplot
    # We only have one plot here, but it's helpful to be consistent
    fig, ax = plt.subplots()
    
    # Draw a scatter plot of the first column of x vs second column.
    ax.scatter(y,y_hat, color = c)
    ax.set_xlabel("Observed {0}".format(label))
    ax.set_ylabel("Predicted {0}".format(label))
    ax.grid(color='lightgray', linestyle='-', linewidth=1)
    ax.set_axisbelow(True)
    
    m, b = np.polyfit(y, y_hat, 1)
    ax.plot(y, m*y + b, color='red')
    plt.savefig(file)

In [1]:
def load_file(path):
    """
    Method for loading files based on a given path
    """
    with open(path, 'rb') as handle:
        file = pickle.load(handle)
    return file

In [2]:
def save_file(path, file):
    with open(path, 'wb') as handle:            
        pickle.dump(file, handle, protocol=pickle.HIGHEST_PROTOCOL)