In [3]:
import ROOT as r
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sklearn as skl
import pickle
from joblib import dump, load
from sklearn.metrics import ConfusionMatrixDisplay
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib.gridspec as gridspec

Welcome to JupyROOT 6.18/04


# Confusion Plots

## Changing Pulse Separation, test and train

In [19]:
fig, ax = plt.subplots(2,5, sharex=False, sharey=True)
fig.set_size_inches(20,10)

fig.text(0.5, 0, 'Truth Number of Pulses', size='30', horizontalalignment='center')
fig.text(-0.03, 0.5, 'Predicted Number of Pulses', size='30', verticalalignment='center', rotation=90)
fig.text(0.5, 1.01, 'k Nearest Neighbors Confusion Matrices Trained and Tested with Varying Minimum Pulse Separation', size='30', horizontalalignment='center')


for num in range(0, 10):
    j = num % 5
    if (num < 5):
        i = 0
    else:
        i = 1
    
    minTime = num * 0.5
    
    if(minTime < 1):
        name = '0' + str(int(minTime*10))
    else:
        name = str(int(minTime*10))
    

    df = pd.read_csv('metrics/confusion_knn_'+name+'.csv', sep=',', index_col=0)
    grid = np.array(df.values)
    
    im = ax[i][j].imshow(grid)
    
    for k in range (0, len(grid)):
        for l in range (0, len(grid[0])):
            ax[i][j].text(l-0.25, k-0.2, str(grid[k][l])[:4], size=12, color='white')


    
    divider = make_axes_locatable(ax[i][j])
    cax = divider.append_axes("right", size="5%", pad=0.05)
    ax[i][j].set_title('Minimum Separation: ' + str(minTime) + ' ns', fontsize='15')
    plt.colorbar(im, cax=cax)
    ax[i][j].set_ylim(-0.5, 4.5)
    
    #plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)

fig.tight_layout()
fig.subplots_adjust(wspace=0.2, hspace=0)
plt.savefig('./figures/knn_confusion.png', bbox_inches='tight')
plt.close()

In [20]:
fig, ax = plt.subplots(2,5, sharex=False, sharey=True)
fig.set_size_inches(20,10)

fig.text(0.5, 0, 'Truth Number of Pulses', size='30', horizontalalignment='center')
fig.text(-0.03, 0.5, 'Predicted Number of Pulses', size='30', verticalalignment='center', rotation=90)
fig.text(0.5, 1.01, 'Random Forest Confusion Matrices Trained and Tested with Varying Minimum Pulse Separation', size='30', horizontalalignment='center')


for num in range(0, 10):
    j = num % 5
    if (num < 5):
        i = 0
    else:
        i = 1
    
    minTime = num * 0.5
    
    if(minTime < 1):
        name = '0' + str(int(minTime*10))
    else:
        name = str(int(minTime*10))
    

    df = pd.read_csv('metrics/confusion_rfc_'+name+'.csv', sep=',', index_col=0)
    grid = np.array(df.values)

    im = ax[i][j].imshow(grid)
    
    for k in range (0, len(grid)):
        for l in range (0, len(grid[0])):
            ax[i][j].text(l-0.25, k-0.2, str(grid[k][l])[:4], size=12, color='white')
    
    divider = make_axes_locatable(ax[i][j])
    cax = divider.append_axes("right", size="5%", pad=0.05)
    ax[i][j].set_title('Minimum Separation: ' + str(minTime) + ' ns', fontsize='15')
    plt.colorbar(im, cax=cax)
    ax[i][j].set_ylim(-0.5, 4.5)
    
    #plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)

fig.tight_layout()
fig.subplots_adjust(wspace=0.2, hspace=0)
plt.savefig('./figures/rfc_confusion.png', bbox_inches='tight')
plt.close()

In [21]:
fig, ax = plt.subplots(2,5, sharex=False, sharey=True)
fig.set_size_inches(20,10)

fig.text(0.5, 0, 'Truth Number of Pulses', size='30', horizontalalignment='center')
fig.text(-0.03, 0.5, 'Predicted Number of Pulses', size='30', verticalalignment='center', rotation=90)
fig.text(0.5, 1.01, 'Multi-Layer Perceptron Confusion Matrices Trained and Tested with Varying Minimum Pulse Separation', size='30', horizontalalignment='center')


for num in range(0, 10):
    j = num % 5
    if (num < 5):
        i = 0
    else:
        i = 1
    
    minTime = num * 0.5
    
    if(minTime < 1):
        name = '0' + str(int(minTime*10))
    else:
        name = str(int(minTime*10))
    

    df = pd.read_csv('metrics/confusion_mlp_'+name+'.csv', sep=',', index_col=0)
    grid = np.array(df.values)

    im = ax[i][j].imshow(grid)
    
    for k in range (0, len(grid)):
        for l in range (0, len(grid[0])):
            ax[i][j].text(l-0.25, k-0.2, str(grid[k][l])[:4], size=12, color='white')
            
    divider = make_axes_locatable(ax[i][j])
    cax = divider.append_axes("right", size="5%", pad=0.05)
    ax[i][j].set_title('Minimum Separation: ' + str(minTime) + ' ns', fontsize='15')
    plt.colorbar(im, cax=cax)
    ax[i][j].set_ylim(-0.5, 4.5)
    
    #plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)

fig.tight_layout()
fig.subplots_adjust(wspace=0.2, hspace=0)
plt.savefig('./figures/mlp_confusion.png', bbox_inches='tight')
plt.close()

## Changing Pulse Separation, test only

In [22]:
fig, ax = plt.subplots(2,5, sharex=False, sharey=True)
fig.set_size_inches(20,10)

fig.text(0.5, 0, 'Truth Number of Pulses', size='30', horizontalalignment='center')
fig.text(-0.03, 0.5, 'Predicted Number of Pulses', size='30', verticalalignment='center', rotation=90)
fig.text(0.5, 1.01, 'k Nearest Neighbors Confusion Matrices only Tested with Varying Minimum Pulse Separation', size='30', horizontalalignment='center')


for num in range(0, 10):
    j = num % 5
    if (num < 5):
        i = 0
    else:
        i = 1
    
    minTime = num * 0.5
    
    if(minTime < 1):
        name = '0' + str(int(minTime*10))
    else:
        name = str(int(minTime*10))
    

    df = pd.read_csv('metrics_00/confusion_knn_'+name+'.csv', sep=',', index_col=0)
    grid = np.array(df.values)


    im = ax[i][j].imshow(grid)
    
    for k in range (0, len(grid)):
        for l in range (0, len(grid[0])):
            ax[i][j].text(l-0.25, k-0.2, str(grid[k][l])[:4], size=12, color='white')
            
            
    divider = make_axes_locatable(ax[i][j])
    cax = divider.append_axes("right", size="5%", pad=0.05)
    ax[i][j].set_title('Minimum Separation: ' + str(minTime) + ' ns', fontsize='15')
    plt.colorbar(im, cax=cax)
    ax[i][j].set_ylim(-0.5, 4.5)
    
    #plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)

fig.tight_layout()
fig.subplots_adjust(wspace=0.2, hspace=0)
plt.savefig('./figures/knn_confusion_00.png', bbox_inches='tight')
plt.close()

In [23]:
fig, ax = plt.subplots(2,5, sharex=False, sharey=True)
fig.set_size_inches(20,10)

fig.text(0.5, 0, 'Truth Number of Pulses', size='30', horizontalalignment='center')
fig.text(-0.03, 0.5, 'Predicted Number of Pulses', size='30', verticalalignment='center', rotation=90)
fig.text(0.5, 1.01, 'Random Forest Confusion Matrices only Tested with Varying Minimum Pulse Separation', size='30', horizontalalignment='center')


for num in range(0, 10):
    j = num % 5
    if (num < 5):
        i = 0
    else:
        i = 1
    
    minTime = num * 0.5
    
    if(minTime < 1):
        name = '0' + str(int(minTime*10))
    else:
        name = str(int(minTime*10))
    

    df = pd.read_csv('metrics_00/confusion_rfc_'+name+'.csv', sep=',', index_col=0)
    grid = np.array(df.values)


    im = ax[i][j].imshow(grid)
    
    for k in range (0, len(grid)):
        for l in range (0, len(grid[0])):
            ax[i][j].text(l-0.25, k-0.2, str(grid[k][l])[:4], size=12, color='white')
    
    divider = make_axes_locatable(ax[i][j])
    cax = divider.append_axes("right", size="5%", pad=0.05)
    ax[i][j].set_title('Minimum Separation: ' + str(minTime) + ' ns', fontsize='15')
    plt.colorbar(im, cax=cax)
    ax[i][j].set_ylim(-0.5, 4.5)
    
    #plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)

fig.tight_layout()
fig.subplots_adjust(wspace=0.2, hspace=0)
plt.savefig('./figures/rfc_confusion_00.png', bbox_inches='tight')
plt.close()

In [24]:
fig, ax = plt.subplots(2,5, sharex=False, sharey=True)
fig.set_size_inches(20,10)

fig.text(0.5, 0, 'Truth Number of Pulses', size='30', horizontalalignment='center')
fig.text(-0.03, 0.5, 'Predicted Number of Pulses', size='30', verticalalignment='center', rotation=90)
fig.text(0.5, 1.01, 'Multi-Layer Perceptron Confusion Matrices only Tested with Varying Minimum Pulse Separation', size='30', horizontalalignment='center')


for num in range(0, 10):
    j = num % 5
    if (num < 5):
        i = 0
    else:
        i = 1
    
    minTime = num * 0.5
    
    if(minTime < 1):
        name = '0' + str(int(minTime*10))
    else:
        name = str(int(minTime*10))
    

    df = pd.read_csv('metrics_00/confusion_mlp_'+name+'.csv', sep=',', index_col=0)
    grid = np.array(df.values)


    im = ax[i][j].imshow(grid)
    
    for k in range (0, len(grid)):
        for l in range (0, len(grid[0])):
            ax[i][j].text(l-0.25, k-0.2, str(grid[k][l])[:4], size=12, color='white')
    
    divider = make_axes_locatable(ax[i][j])
    cax = divider.append_axes("right", size="5%", pad=0.05)
    ax[i][j].set_title('Minimum Separation: ' + str(minTime) + ' ns', fontsize='15')
    plt.colorbar(im, cax=cax)
    ax[i][j].set_ylim(-0.5, 4.5)
    
    #plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)

fig.tight_layout()
fig.subplots_adjust(wspace=0.2, hspace=0)
plt.savefig('./figures/mlp_confusion_00.png', bbox_inches='tight')
plt.close()

## Changing Size of Train Set

In [159]:
fig, ax = plt.subplots(2,4, sharex=False, sharey=True)
fig.set_size_inches(20,10)

fig.text(0.5, 0, 'Truth Number of Pulses', size='30', horizontalalignment='center')
fig.text(-0.03, 0.5, 'Predicted Number of Pulses', size='30', verticalalignment='center', rotation=90)
fig.text(0.5, 1.01, 'k Nearest Neighbors Confusion Matrices with Varying Training Set Size', size='30', horizontalalignment='center')


for num in range(0, 8):
    j = num % 4
    if (num < 4):
        i = 0
    else:
        i = 1
    
    name = str(num * 10000 + 10000)
    

    df = pd.read_csv('metrics_train_size/confusion_knn_'+name+'.csv', sep=',', index_col=0)
    grid = np.array(df.values)


    im = ax[i][j].imshow(grid)
    divider = make_axes_locatable(ax[i][j])
    cax = divider.append_axes("right", size="5%", pad=0.05)
    ax[i][j].set_title('size: ' + name + ' islands', fontsize='15')
    plt.colorbar(im, cax=cax)
    ax[i][j].set_ylim(-0.5, 4.5)
    
    #plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)

fig.tight_layout()
fig.subplots_adjust(wspace=0.2, hspace=0)
plt.savefig('./figures/knn_confusion_train_size.png', bbox_inches='tight')
plt.close()

In [160]:
fig, ax = plt.subplots(2,4, sharex=False, sharey=True)
fig.set_size_inches(20,10)

fig.text(0.5, 0, 'Truth Number of Pulses', size='30', horizontalalignment='center')
fig.text(-0.03, 0.5, 'Predicted Number of Pulses', size='30', verticalalignment='center', rotation=90)
fig.text(0.5, 1.01, 'Random Forest Confusion Matrices with Varying Training Set Size', size='30', horizontalalignment='center')


for num in range(0, 8):
    j = num % 4
    if (num < 4):
        i = 0
    else:
        i = 1
    
    name = str(num * 10000 + 10000)
    

    df = pd.read_csv('metrics_train_size/confusion_rfc_'+name+'.csv', sep=',', index_col=0)
    grid = np.array(df.values)


    im = ax[i][j].imshow(grid)
    divider = make_axes_locatable(ax[i][j])
    cax = divider.append_axes("right", size="5%", pad=0.05)
    ax[i][j].set_title('size: ' + name + ' islands', fontsize='15')
    plt.colorbar(im, cax=cax)
    ax[i][j].set_ylim(-0.5, 4.5)
    
    #plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)

fig.tight_layout()
fig.subplots_adjust(wspace=0.2, hspace=0)
plt.savefig('./figures/rfc_confusion_train_size.png', bbox_inches='tight')
plt.close()

In [161]:
fig, ax = plt.subplots(2,4, sharex=False, sharey=True)
fig.set_size_inches(20,10)

fig.text(0.5, 0, 'Truth Number of Pulses', size='30', horizontalalignment='center')
fig.text(-0.03, 0.5, 'Predicted Number of Pulses', size='30', verticalalignment='center', rotation=90)
fig.text(0.5, 1.01, 'Multi-Layer Perceptron Confusion Matrices with Varying Training Set Size', size='30', horizontalalignment='center')


for num in range(0, 8):
    j = num % 4
    if (num < 4):
        i = 0
    else:
        i = 1
    
    name = str(num * 10000 + 10000)
    

    df = pd.read_csv('metrics_train_size/confusion_mlp_'+name+'.csv', sep=',', index_col=0)
    grid = np.array(df.values)


    im = ax[i][j].imshow(grid)
    divider = make_axes_locatable(ax[i][j])
    cax = divider.append_axes("right", size="5%", pad=0.05)
    ax[i][j].set_title('size: ' + name + ' islands', fontsize='15')
    plt.colorbar(im, cax=cax)
    ax[i][j].set_ylim(-0.5, 4.5)
    
    #plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)

fig.tight_layout()
fig.subplots_adjust(wspace=0.2, hspace=0)
plt.savefig('./figures/mlp_confusion_train_size.png', bbox_inches='tight')
plt.close()

# Scatter Plots

## Changing Pulse Separation, test and train

In [194]:
fig, ax = plt.subplots(1,1)
fig.set_size_inches(10,10)

separations = []

nPulses = [0, 1, 2, 3, 4]

for num in range(0, 10):
    
    separation = num * 0.5
    
    separations.append(separation)
    
for minTime in separations:
    
    i_name = str(minTime)
    
    if(minTime < 1):
        name = '0' + str(int(minTime*10))
    else:
        name = str(int(minTime*10))
    

    df = pd.read_csv('metrics/confusion_knn_'+name+'.csv', sep=',', index_col=0)
    grid = np.array(df.values)

    data = np.diagonal(grid)
    
    ax.plot(nPulses, data, label=i_name + ' ns', marker='o')
    
plt.title('k Nearest Neighbors Accuracy Trained and Tested \n with Varying Minimum Pulse Separation', size=20)
plt.xlabel('True Number of Pulses', size=20)
plt.ylabel('Accuracy %', size=20)
plt.xticks([0, 1, 2, 3, 4])
plt.ylim(0, 1.05);
fig.legend(loc=(0.1, 0.1), fontsize='large')
#fig.tight_layout()
plt.savefig('./figures/knn_CPS_both.png', bbox_inches='tight')
plt.close()

In [195]:
fig, ax = plt.subplots(1,1)
fig.set_size_inches(10,10)

separations = []

nPulses = [0, 1, 2, 3, 4]

for num in range(0, 10):
    
    separation = num * 0.5
    
    separations.append(separation)
    
for minTime in separations:
    
    i_name = str(minTime)
    
    if(minTime < 1):
        name = '0' + str(int(minTime*10))
    else:
        name = str(int(minTime*10))
    

    df = pd.read_csv('metrics/confusion_rfc_'+name+'.csv', sep=',', index_col=0)
    grid = np.array(df.values)

    data = np.diagonal(grid)
    
    ax.plot(nPulses, data, label=i_name + ' ns', marker='o')
    
plt.title('Random Forest Accuracy Trained and Tested \n with Varying Minimum Pulse Separation', size=20)
plt.xlabel('True Number of Pulses', size=20)
plt.ylabel('Accuracy %', size=20)
plt.xticks([0, 1, 2, 3, 4])
plt.ylim(0, 1.05);
fig.legend(loc=(0.1, 0.1), fontsize='large')
#fig.tight_layout()
plt.savefig('./figures/rfc_CPS_both.png', bbox_inches='tight')
plt.close()

In [196]:
fig, ax = plt.subplots(1,1)
fig.set_size_inches(10,10)

separations = []

nPulses = [0, 1, 2, 3, 4]

for num in range(0, 10):
    
    separation = num * 0.5
    
    separations.append(separation)
    
for minTime in separations:
    
    i_name = str(minTime)
    
    if(minTime < 1):
        name = '0' + str(int(minTime*10))
    else:
        name = str(int(minTime*10))
    

    df = pd.read_csv('metrics/confusion_mlp_'+name+'.csv', sep=',', index_col=0)
    grid = np.array(df.values)

    data = np.diagonal(grid)
    
    ax.plot(nPulses, data, label=i_name + ' ns', marker='o')
    
plt.title('Multi-Layer Perceptron Accuracy Trained and Tested \n with Varying Minimum Pulse Separation', size=20)
plt.xlabel('True Number of Pulses', size=20)
plt.ylabel('Accuracy %', size=20)
plt.xticks([0, 1, 2, 3, 4])
plt.ylim(0, 1.05);
fig.legend(loc=(0.1, 0.1), fontsize='large')
#fig.tight_layout()
plt.savefig('./figures/mlp_CPS_both.png', bbox_inches='tight')
plt.close()

## Changing Pulse Separation, test and train

In [197]:
fig, ax = plt.subplots(1,1)
fig.set_size_inches(10,10)

separations = []

nPulses = [0, 1, 2, 3, 4]

for num in range(0, 10):
    
    separation = num * 0.5
    
    separations.append(separation)
    
for minTime in separations:
    
    i_name = str(minTime)
    
    if(minTime < 1):
        name = '0' + str(int(minTime*10))
    else:
        name = str(int(minTime*10))
    

    df = pd.read_csv('metrics_00/confusion_knn_'+name+'.csv', sep=',', index_col=0)
    grid = np.array(df.values)

    data = np.diagonal(grid)
    
    ax.plot(nPulses, data, label=i_name + ' ns', marker='o')
    
plt.title('k Nearest Neighbors Accuracy only Tested \n with Varying Minimum Pulse Separation', size=20)
plt.xlabel('True Number of Pulses', size=20)
plt.ylabel('Accuracy %', size=20)
plt.xticks([0, 1, 2, 3, 4])
plt.ylim(0, 1.05);
fig.legend(loc=(0.1, 0.1), fontsize='large')
#fig.tight_layout()
plt.savefig('./figures/knn_CPS_00.png', bbox_inches='tight')
plt.close()

In [198]:
fig, ax = plt.subplots(1,1)
fig.set_size_inches(10,10)

separations = []

nPulses = [0, 1, 2, 3, 4]

for num in range(0, 10):
    
    separation = num * 0.5
    
    separations.append(separation)
    
for minTime in separations:
    
    i_name = str(minTime)
    
    if(minTime < 1):
        name = '0' + str(int(minTime*10))
    else:
        name = str(int(minTime*10))
    

    df = pd.read_csv('metrics_00/confusion_rfc_'+name+'.csv', sep=',', index_col=0)
    grid = np.array(df.values)

    data = np.diagonal(grid)
    
    ax.plot(nPulses, data, label=i_name + ' ns', marker='o')
    
plt.title('Random Forest Accuracy only Tested \n with Varying Minimum Pulse Separation', size=20)
plt.xlabel('True Number of Pulses', size=20)
plt.ylabel('Accuracy %', size=20)
plt.xticks([0, 1, 2, 3, 4])
plt.ylim(0, 1.05);
fig.legend(loc=(0.1, 0.1), fontsize='large')
#fig.tight_layout()
plt.savefig('./figures/rfc_CPS_00.png', bbox_inches='tight')
plt.close()

In [199]:
fig, ax = plt.subplots(1,1)
fig.set_size_inches(10,10)

separations = []

nPulses = [0, 1, 2, 3, 4]

for num in range(0, 10):
    
    separation = num * 0.5
    
    separations.append(separation)
    
for minTime in separations:
    
    i_name = str(minTime)
    
    if(minTime < 1):
        name = '0' + str(int(minTime*10))
    else:
        name = str(int(minTime*10))
    

    df = pd.read_csv('metrics_00/confusion_mlp_'+name+'.csv', sep=',', index_col=0)
    grid = np.array(df.values)

    data = np.diagonal(grid)
    
    ax.plot(nPulses, data, label=i_name + ' ns', marker='o')
    
plt.title('Multi-Layer Perceptron Accuracy only Tested \n with Varying Minimum Pulse Separation', size=20)
plt.xlabel('True Number of Pulses', size=20)
plt.ylabel('Accuracy %', size=20)
plt.xticks([0, 1, 2, 3, 4])
plt.ylim(0, 1.05);
fig.legend(loc=(0.1, 0.1), fontsize='large')
#fig.tight_layout()
plt.savefig('./figures/mlp_CPS_00.png', bbox_inches='tight')
plt.close()

## Changing Size of Train Set

In [215]:
fig, ax = plt.subplots(1,1)
fig.set_size_inches(10,10)

sizes = []

nPulses = [0, 1, 2, 3, 4]

for num in range(1, 9):
    
    size = num * 1000
    
    sizes.append(size)
    
for size in sizes:
    name = str(size)
    
    i_name = str(name[:1] + ',' + name[1:])
    
    df = pd.read_csv('metrics_train_size/confusion_knn_'+name+'.csv', sep=',', index_col=0)
    grid = np.array(df.values)

    data = np.diagonal(grid)
    
    ax.plot(nPulses, data, label=i_name + ' islands', marker='o')
    
plt.title('k Nearest Neighbors Accuracy with \n Varying Training Set Size', size=20)
plt.xlabel('True Number of Pulses', size=20)
plt.ylabel('Accuracy %', size=20)
plt.xticks([0, 1, 2, 3, 4])
plt.ylim(0, 1.05);
fig.legend(loc=(0.1, 0.1), fontsize='large')
#fig.tight_layout()
plt.savefig('./figures/knn_train_size_small.png', bbox_inches='tight')
plt.close()

In [216]:
fig, ax = plt.subplots(1,1)
fig.set_size_inches(10,10)

sizes = []

nPulses = [0, 1, 2, 3, 4]

for num in range(1, 9):
    
    size = num * 1000
    
    sizes.append(size)
    
for size in sizes:
    name = str(size)
    
    i_name = str(name[:1] + ',' + name[1:])
    
    df = pd.read_csv('metrics_train_size/confusion_rfc_'+name+'.csv', sep=',', index_col=0)
    grid = np.array(df.values)

    data = np.diagonal(grid)
    
    ax.plot(nPulses, data, label=i_name + ' islands', marker='o')
    
plt.title('Random Forest Accuracy with \n Varying Training Set Size', size=20)
plt.xlabel('True Number of Pulses', size=20)
plt.ylabel('Accuracy %', size=20)
plt.xticks([0, 1, 2, 3, 4])
plt.ylim(0, 1.05);
fig.legend(loc=(0.1, 0.1), fontsize='large')
#fig.tight_layout()
plt.savefig('./figures/rfc_train_size_small.png', bbox_inches='tight')
plt.close()

In [217]:
fig, ax = plt.subplots(1,1)
fig.set_size_inches(10,10)

sizes = []

nPulses = [0, 1, 2, 3, 4]

for num in range(1, 9):
    
    size = num * 1000
    
    sizes.append(size)
    
for size in sizes:
    name = str(size)
    
    i_name = str(name[:1] + ',' + name[1:])
    
    df = pd.read_csv('metrics_train_size/confusion_mlp_'+name+'.csv', sep=',', index_col=0)
    grid = np.array(df.values)

    data = np.diagonal(grid)
    
    ax.plot(nPulses, data, label=i_name + ' islands', marker='o')
    
plt.title('Multi-Layer Perceptron Accuracy with \n Varying Training Set Size', size=20)
plt.xlabel('True Number of Pulses', size=20)
plt.ylabel('Accuracy %', size=20)
plt.xticks([0, 1, 2, 3, 4])
plt.ylim(0, 1.05);
fig.legend(loc=(0.1, 0.1), fontsize='large')
#fig.tight_layout()
plt.savefig('./figures/mlp_train_size_small.png', bbox_inches='tight')
plt.close()