# Result comparison for AL

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [3]:
def load_mses(folder):
    """
    Load the mses of a certain model, being called by the aggregate function below
    """
    test_mse = np.load(os.path.join(folder, 'test_mse.npy'))
    train_mse = np.load(os.path.join(folder, 'train_mse.npy'))
    selected_after_train_mse = np.load(os.path.join(folder, 'mse_selected_after_train.npy'))
    return test_mse, train_mse, selected_after_train_mse

def load_ensemble_mses(folder, num_trails, avg_test_mse_mat=None, avg_train_mse_mat=None, avg_selected_after_train_mse_mat=None):
    """
    Load multiple mse file from different trails from num_trails parameter and 
    aggregate them by taking mean
    """
    if avg_test_mse_mat is None:
        avg_test_mse_mat, avg_train_mse_mat, avg_selected_after_train_mse_mat = load_matrix(folder, num_trails)
    # Take the average
    return np.mean(avg_test_mse_mat, axis=1), np.mean(avg_train_mse_mat, axis=1), np.mean(avg_selected_after_train_mse_mat, axis=1)

def load_matrix(folder, num_trails):
    """
    As the error is prone to outlier in the x value, therefore we can not simply do average (the outliers have explicitly higher weights)
    Here we are showing the ratio of training and testing
    """
    avg_test_mse_mat, avg_train_mse_mat, avg_selected_after_train_mse_mat = [], [], []
    for i in range(num_trails):
        # Replace to get the new folder name
        folder_name = folder.replace('trail_0','trail_{}'.format(i))
        # From folder name get all the mse lists
        test_mse, train_mse, selected_after_train_mse = load_mses(folder_name)
        # Average them
        avg_test_mse_mat.append(test_mse)
        avg_train_mse_mat.append(train_mse)
        avg_selected_after_train_mse_mat.append(selected_after_train_mse)
    avg_test_mse_mat = np.array(avg_test_mse_mat).transpose()
    avg_train_mse_mat = np.array(avg_train_mse_mat).transpose()
    avg_selected_after_train_mse_mat = np.array(avg_selected_after_train_mse_mat).transpose()
    print('shape = ', np.shape(avg_test_mse_mat))
    return avg_test_mse_mat, avg_train_mse_mat, avg_selected_after_train_mse_mat


In [5]:
num_trails = 5
#MSE_retrain_False_bs_1024_pool_10_dx_5_x0_20_nmod_5
#folder = 'results/fig'
folder = '/home/sr365/NAAL/results/fig/retrain_True'
#folder = '/home/sr365/NAAL/results/fig/retrain_False'
#folder = 'results/fig/retrain_true_al_step_50/x0_20'

#folder_list = ['/home/sr365/NAAL/results/fig/retrain_True', '/home/sr365/NAAL/results/fig/retrain_False','results/fig/retrain_true_al_step_50/x0_20']
folder_list = ['results/fig/retrain_true_al_step_50/x0_10']
for folder in folder_list:
    for file in os.listdir(folder):
        cur_folder = os.path.join(folder, file)
        # Makue sure this is a folder
        if not os.path.isdir(cur_folder):
            continue
        # Only start from the M SE folders and look for comparison
        if not 'MSE' in cur_folder or not 'trail_0' in cur_folder:
            continue
        
        # The reading part
        #MSE_test_mse, MSE_train_mse, MSE_selected_after_train_mse = load_mses(cur_folder)
        MSE_test_mse_mat, MSE_train_mse_mat, MSE_selected_after_train_mse_mat = load_matrix(cur_folder, num_trails)
        MSE_test_mse, MSE_train_mse, MSE_selected_after_train_mse = load_ensemble_mses(cur_folder, num_trails, MSE_test_mse_mat, MSE_train_mse_mat, MSE_selected_after_train_mse_mat)
        
        random_folder = cur_folder.replace('MSE','Random')
        #RD_test_mse, RD_train_mse, RD_selected_after_train_mse = load_mses(random_folder)
        Random_test_mse_mat, Random_train_mse_mat, Random_selected_after_train_mse_mat = load_matrix(random_folder, num_trails)
        RD_test_mse, RD_train_mse, RD_selected_after_train_mse = load_ensemble_mses(random_folder, num_trails, Random_test_mse_mat, Random_train_mse_mat, Random_selected_after_train_mse_mat)
        
        
        dx = int(file.split('_dx_')[-1].split('_')[0])
        x0 = int(file.split('_x0_')[-1].split('_')[0])
        print('for {}, dx = {}, x0 = {}'.format(file, dx, x0))
        # Use the xlabel with number of data points
        x_label = np.array(range(len(MSE_test_mse))) * dx + x0
        save_name = os.path.join(folder, file.replace('MSE_','').replace('_trail_0_',''))
        ##########################################
        # Plotting various MSE metrics together #
        ##########################################
        f = plt.figure(figsize=[8, 4])
        #ax1 = plt.subplot(211)
        plt.plot(x_label, MSE_test_mse, '-x', c='tab:blue', label='MSE test')
        plt.plot(x_label, RD_test_mse, '-x', c='tab:orange', label='RD test')
        plt.plot(x_label, MSE_train_mse, '--x', c='tab:blue', linewidth=2, label='MSE train')
        plt.plot(x_label, RD_train_mse, '--rx', c='tab:orange', linewidth=2, label='RD train')
        plt.legend()
        plt.yscale('log')
        plt.xlabel('num_train')
        plt.ylabel('MSE')
        plt.title(save_name)
        plt.savefig(save_name+'.png')
        plt.clf()

        ###################################
        # Plotting AL/Raondom Ratio plot #
        ###################################
        # Get the MSE/Random ratio
        test_mse_ratio = MSE_test_mse_mat / Random_test_mse_mat
        #train_mse_ratio =  MSE_train_mse_mat / Random_train_mse_mat
        percent_better = np.mean(test_mse_ratio < 1)
        test_mse_ratio[test_mse_ratio < 1] = -np.log(1/test_mse_ratio[test_mse_ratio < 1])
        test_mse_ratio[test_mse_ratio >= 1] = np.log(test_mse_ratio[test_mse_ratio >= 1])
        # Plot the ratio figure
        f = plt.figure(figsize=[8, 4])
        for i in range(num_trails):
            plt.plot(x_label, test_mse_ratio[:, i],'x')
        plt.plot(x_label, np.zeros_like(x_label),'--r', label='ref: {:.1f}%'.format(percent_better*100))
        plt.legend()
        #plt.ylim([-2, 2])
        #plt.yscale('log')
        plt.xlabel('num_train')
        plt.ylabel('MSE / Random Ratio')
        plt.title(save_name)
        plt.savefig(save_name + 'ratio_plot.png')
        plt.clf()
        
        #######################################
        # # Plotting Test MSE comparison plot #
        #######################################
        # Get the comparison plot for all
        f = plt.figure(figsize=[8, 4])
        # For easy legend
        plt.plot(x_label, MSE_test_mse_mat[:, 0], '-', alpha=0.5,  c='tab:blue', label='AL')
        plt.plot(x_label, Random_test_mse_mat[:, 0], '-', alpha=0.5,  c='tab:orange', label='Random')
        for i in range(1, num_trails):
            plt.plot(x_label, MSE_test_mse_mat[:, i], '-', alpha=0.5,  c='tab:blue')
            plt.plot(x_label, Random_test_mse_mat[:, i], '-', alpha=0.5,  c='tab:orange')
        plt.xlabel('num_train')
        plt.ylabel('MSE / Random Ratio')
        plt.title(save_name)
        plt.yscale('log')
        plt.legend()
        plt.savefig(save_name + 'test_loss_overlay.png')
        plt.clf()
        #break
        
        

shape =  (50, 5)
shape =  (50, 5)
for MSE_retrain_True_bs_1024_pool_50_dx_10_step_50_x0_10_nmod_5_trail_0, dx = 10, x0 = 10
shape =  (50, 5)
shape =  (50, 5)
for MSE_retrain_True_bs_1024_pool_200_dx_20_step_50_x0_10_nmod_5_trail_0, dx = 20, x0 = 10
shape =  (50, 5)
shape =  (50, 5)
for MSE_retrain_True_bs_1024_pool_100_dx_10_step_50_x0_10_nmod_5_trail_0, dx = 10, x0 = 10
shape =  (50, 5)
shape =  (50, 5)
for MSE_retrain_True_bs_1024_pool_50_dx_5_step_50_x0_10_nmod_5_trail_0, dx = 5, x0 = 10
shape =  (50, 5)
shape =  (50, 5)
for MSE_retrain_True_bs_1024_pool_100_dx_20_step_50_x0_10_nmod_5_trail_0, dx = 20, x0 = 10
shape =  (50, 5)
shape =  (50, 5)
for MSE_retrain_True_bs_1024_pool_25_dx_5_step_50_x0_10_nmod_5_trail_0, dx = 5, x0 = 10


<Figure size 576x288 with 0 Axes>

<Figure size 576x288 with 0 Axes>

<Figure size 576x288 with 0 Axes>

<Figure size 576x288 with 0 Axes>

<Figure size 576x288 with 0 Axes>

<Figure size 576x288 with 0 Axes>

<Figure size 576x288 with 0 Axes>

<Figure size 576x288 with 0 Axes>

<Figure size 576x288 with 0 Axes>

<Figure size 576x288 with 0 Axes>

<Figure size 576x288 with 0 Axes>

<Figure size 576x288 with 0 Axes>

<Figure size 576x288 with 0 Axes>

<Figure size 576x288 with 0 Axes>

<Figure size 576x288 with 0 Axes>

<Figure size 576x288 with 0 Axes>

<Figure size 576x288 with 0 Axes>

<Figure size 576x288 with 0 Axes>