# Analysis of results of the Q-learner using Gremlin and SWEET

The other notebook is focused on the Gremlin results, whereas this is focused on how
well the Q-learner performed.

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
from sklearn.metrics import r2_score
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib notebook
from datetime import datetime as dt

from scipy.stats import wilcoxon, levene

sns.set_theme() # default Seaborn theme

from analytics import consolidate_csv_as_df, consolidate_learner_csv_as_df


# Load the data
First load, organize, and clean up the data for both the SWEET and UNSWEETENED runs.

## Un-SWEETENED data


In [2]:
q_learner_unsweetened_runs = consolidate_learner_csv_as_df(Path('nonsweet_output').glob('*progress.csv'), infer_run=True)

reading nonsweet_output/run6_gremlin-enhanced-iter17_q_learner_progress.csv
Adding run number 6
Adding iteration number 17
New dataframe length 2000
reading nonsweet_output/run6_gremlin-enhanced-iter3_q_learner_progress.csv
Adding run number 6
Adding iteration number 3
New dataframe length 2000
reading nonsweet_output/run1_gremlin-enhanced-iter7_q_learner_progress.csv
Adding run number 1
Adding iteration number 7
New dataframe length 2000
reading nonsweet_output/run5_gremlin-enhanced-iter8_q_learner_progress.csv
Adding run number 5
Adding iteration number 8
New dataframe length 2000
reading nonsweet_output/run5_gremlin-enhanced-iter6_q_learner_progress.csv
Adding run number 5
Adding iteration number 6
New dataframe length 2000
reading nonsweet_output/run9_gremlin-enhanced-iter2_q_learner_progress.csv
Adding run number 9
Adding iteration number 2
New dataframe length 2000
reading nonsweet_output/run2_gremlin-enhanced-iter14_q_learner_progress.csv
Adding run number 2
Adding iteration num

In [3]:
q_learner_unsweetened_runs

Unnamed: 0,Episode,Reward,TrainTime,Position,Velocity,Angle,RotationalVelocity,run,iteration
0,0,230.0,0.017528,-0.048255,-0.031692,-0.011041,-0.030790,6,17
1,1,179.0,0.013066,0.015853,0.026159,0.009649,-0.040193,6,17
2,2,209.0,0.015771,0.015853,0.026159,0.009649,-0.040193,6,17
3,3,206.0,0.015397,0.015853,0.026159,0.009649,-0.040193,6,17
4,4,47.0,0.003608,0.015853,0.026159,0.009649,-0.040193,6,17
...,...,...,...,...,...,...,...,...,...
1995,1995,57.0,0.002754,0.047366,-0.023219,-0.014872,0.018819,5,5
1996,1996,44.0,0.002160,0.047366,-0.023219,-0.014872,0.018819,5,5
1997,1997,45.0,0.002194,0.047366,-0.023219,-0.014872,0.018819,5,5
1998,1998,45.0,0.002161,0.047366,-0.023219,-0.014872,0.018819,5,5


In [4]:
q_learner_unsweetened_runs['type'] = 'notsweet'

We can see the episodes reset between iterations.

In [5]:
q_learner_unsweetened_runs.groupby(['run','iteration']).Episode.agg(['min','max'])

Unnamed: 0_level_0,Unnamed: 1_level_0,min,max
run,iteration,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,0,4999
0,1,0,1999
0,2,0,1999
0,3,0,1999
0,4,0,1999
...,...,...,...
9,13,0,1999
9,14,0,1999
9,15,0,1999
9,16,0,1999


As you can see, iteration 0 has 5k episodes, and the rest 2k.  And the episode numbers reset with each new iteration.  We need to serialize the episode numbering by run.

In [6]:
def normalize_episode(row):
    """ add new episode that continues range for entire run """
    if row.iteration == 0:
        return row.Episode
    else:
        return (row.iteration - 1) * 2000 + 5000 + row.Episode

In [7]:
q_learner_unsweetened_runs['corrected_episode'] = q_learner_unsweetened_runs.apply(normalize_episode, axis=1)

In [8]:
q_learner_unsweetened_runs.groupby(['run','iteration']).corrected_episode.agg(['min','max'])

Unnamed: 0_level_0,Unnamed: 1_level_0,min,max
run,iteration,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,0,4999
0,1,5000,6999
0,2,7000,8999
0,3,9000,10999
0,4,11000,12999
...,...,...,...
9,13,29000,30999
9,14,31000,32999
9,15,33000,34999
9,16,35000,36999


## SWEETened data


In [9]:
q_learner_sweetened_runs = consolidate_learner_csv_as_df(Path('sweet_output').glob('*progress.csv'), infer_run=True)

reading sweet_output/run6_gremlin-enhanced-iter17_q_learner_progress.csv
Adding run number 6
Adding iteration number 17
New dataframe length 2000
reading sweet_output/run6_gremlin-enhanced-iter3_q_learner_progress.csv
Adding run number 6
Adding iteration number 3
New dataframe length 2000
reading sweet_output/run1_gremlin-enhanced-iter7_q_learner_progress.csv
Adding run number 1
Adding iteration number 7
New dataframe length 2000
reading sweet_output/run5_gremlin-enhanced-iter8_q_learner_progress.csv
Adding run number 5
Adding iteration number 8
New dataframe length 2000
reading sweet_output/run5_gremlin-enhanced-iter6_q_learner_progress.csv
Adding run number 5
Adding iteration number 6
New dataframe length 2000
reading sweet_output/run9_gremlin-enhanced-iter2_q_learner_progress.csv
Adding run number 9
Adding iteration number 2
New dataframe length 2000
reading sweet_output/run2_gremlin-enhanced-iter14_q_learner_progress.csv
Adding run number 2
Adding iteration number 14
New dataframe 

In [10]:
q_learner_sweetened_runs['type'] = 'sweet'

In [11]:
q_learner_sweetened_runs['corrected_episode'] = q_learner_sweetened_runs.apply(normalize_episode, axis=1)

In [12]:
q_learner_sweetened_runs.groupby(['run','iteration']).corrected_episode.agg(['min','max'])

Unnamed: 0_level_0,Unnamed: 1_level_0,min,max
run,iteration,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,0,4999
0,1,5000,6999
0,2,7000,8999
0,3,9000,10999
0,4,11000,12999
...,...,...,...
9,13,29000,30999
9,14,31000,32999
9,15,33000,34999
9,16,35000,36999


## Merge data

Merge the two dataframes.


In [13]:
q_learner_runs = pd.concat([q_learner_sweetened_runs, q_learner_unsweetened_runs])

In [14]:
# We just want a subset
q_learner_runs_subset = q_learner_runs[q_learner_runs.corrected_episode % 100 == 0].copy()

# Compute best-so-far for all the runs by run type


In [15]:
q_learner_runs_subset.sort_values(by=['type','run','corrected_episode'], inplace=True)

In [16]:
q_learner_runs_subset.reset_index(drop=False, inplace=True) # because all the indices will be messed up due to concat

In [17]:
q_learner_runs_subset = q_learner_runs_subset[['type','run','corrected_episode','Reward']].copy()

In [18]:
q_learner_runs_subset['best_so_far'] = q_learner_runs_subset.groupby(['type','run']).Reward.cummax()

In [19]:
q_learner_runs_subset

Unnamed: 0,type,run,corrected_episode,Reward,best_so_far
0,notsweet,0,0,35.0,35.0
1,notsweet,0,100,20.0,35.0
2,notsweet,0,200,20.0,35.0
3,notsweet,0,300,22.0,35.0
4,notsweet,0,400,20.0,35.0
...,...,...,...,...,...
7795,sweet,9,38500,58.0,178.0
7796,sweet,9,38600,64.0,178.0
7797,sweet,9,38700,111.0,178.0
7798,sweet,9,38800,76.0,178.0


In [20]:
q_learner_runs_subset.corrected_episode.max()

38900

In [21]:
not_sweet_best = q_learner_runs_subset.query("corrected_episode == 38900 and type == 'notsweet'").best_so_far

In [22]:
sweet_best = q_learner_runs_subset.query("corrected_episode == 38900 and type == 'sweet'").best_so_far

In [23]:
wilcoxon(not_sweet_best, sweet_best, alternative='greater')



WilcoxonResult(statistic=8.0, pvalue=0.13666083914614907)

In [24]:
levene(not_sweet_best, sweet_best)

LeveneResult(statistic=1.82331991491905, pvalue=0.19365341670603076)

# Visualizations

Now we get to look at the data.  We're interested in comparing the bestp-so-far
curves of the two sets of experiments.  We may also be interested in generating
a boxplot showing the relative time the best solution was found first.


In [25]:
q_learner_impact_plot = sns.lineplot(data=q_learner_runs_subset,
                        x='corrected_episode', y='best_so_far',
                        hue='type', ci=95,
                        ).set(title='Impact of SWEET on Q-Learner training performance',
                             xlabel='Episode', ylabel='Mean of best-so-far fitnesses')

<IPython.core.display.Javascript object>

In [26]:
plt.legend(title='Run type', labels=['Basic Async','SWEET'])

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7fbf5c1909d0>

In [27]:
plt.savefig('gremlin_cart_pole_bsf.png')

<IPython.core.display.Javascript object>

In [28]:
plt.savefig('gremlin_cart_pole_bsf.pdf')

<IPython.core.display.Javascript object>

In [29]:
plt.clf()

<IPython.core.display.Javascript object>

We want to compare the final scores. Use box plots to denote the distribution between the two.

In [30]:
final_box_plot = sns.boxplot(data=q_learner_runs_subset[q_learner_runs_subset.corrected_episode == 38900],
                             y='best_so_far', x='type').set(title='Comparison of final Q-learner scores', ylabel='final scores')

<IPython.core.display.Javascript object>

In [31]:
plt.xticks(plt.xticks()[0], ['Basic Async', 'SWEET'])

<IPython.core.display.Javascript object>

ValueError: The number of FixedLocator locations (6), usually from a call to set_ticks, does not match the number of ticklabels (2).

In [32]:
plt.savefig('gremlin_cart_pole_final_score_boxplot.png')

<IPython.core.display.Javascript object>

In [33]:
plt.savefig('gremlin_cart_pole_final_score_boxplot.pdf')

<IPython.core.display.Javascript object>

In [41]:
df = q_learner_runs.copy()
df = df[['type','run','corrected_episode','Reward','TrainTime']]
#df = df[df.corrected_episode % 100 == 0].copy()
df.sort_values(by=['type','run','corrected_episode'], inplace=True)

df['best_so_far'] = df.groupby(['type','run']).Reward.cummax()
df['CumulTime'] = df.groupby(['type','run']).apply(lambda g: g.TrainTime.cumsum().reset_index()).iloc[:,1].values
df['CumulTime'] = df['CumulTime'] * 1000
df['CumulTime'] = df.CumulTime.apply(lambda t: dt.fromtimestamp(t))
df = df.groupby(['type','run']).apply(lambda g: g.set_index('CumulTime').resample('S').ffill().reset_index())
df['CumulTime'] = df['CumulTime'] - df['CumulTime'].min()
df['CumulTime'] = df['CumulTime'].apply(lambda t: t.seconds)
df = df[df.CumulTime % 100 == 0].copy().drop(['type','run'],axis=1).reset_index()
df['CumulTime'] = df.CumulTime / 1000



In [43]:

G = sns.lineplot(data=df[df.CumulTime > 0.2], x='CumulTime', y = 'best_so_far', hue='type', ci=95)
#G.axes.set_xticklabels([l.get_text()  if idx % 100 ==0 else '' for idx, l in enumerate(G.axes.get_xticklabels())])
G.legend().set_title('')
plt.xlabel('Time (s)')
plt.ylabel('Best Fitness So Far')
plt.savefig('gremlin_cart_pole_time_curve.pdf')
plt.savefig('gremlin_cart_pole_time_curve.png')
plt.close()



<IPython.core.display.Javascript object>