In [3]:
import pandas
import numpy
import random

In [4]:
df = pandas.DataFrame.from_csv('alpha_gamma_experiments_two_sets_15_experiments.csv')
df

Unnamed: 0,alpha,gamma,rewards_per_action
0,0.9,0.3,0.461435
1,0.9,0.3,0.451306
2,0.9,0.3,0.429459
3,0.9,0.3,0.438609
4,0.9,0.3,0.439223
5,0.9,0.3,0.4371
6,0.9,0.3,0.44456
7,0.9,0.3,0.452696
8,0.9,0.3,0.470704
9,0.9,0.3,0.442216


In [5]:
df[df['alpha']== 0.9][df['gamma'] == 0.5]

Unnamed: 0,alpha,gamma,rewards_per_action
15,0.9,0.5,0.434538
16,0.9,0.5,0.448658
17,0.9,0.5,0.463374
18,0.9,0.5,0.437126
19,0.9,0.5,0.443238
20,0.9,0.5,0.435405
21,0.9,0.5,0.44206
22,0.9,0.5,0.43617
23,0.9,0.5,0.435386
24,0.9,0.5,0.451591


In [6]:
def setting_df(alpha=0, gamma=0):
    return df[df['alpha'] == alpha][df['gamma'] == gamma]

In [7]:
def avg_alp_gam(alpha=0, gamma=0):
    return setting_df(alpha=alpha, gamma=gamma)['rewards_per_action']
    

In [8]:
avg_alp_gam(alpha=0.9, gamma=0.5).mean()

0.44142449705166664

In [9]:
avg_alp_gam(alpha=0.9, gamma=0.3).mean()

0.44560205024213329

### Note: 
I ran `30` experiments, `1` for each combination of alpha and gamma values, where the best action taken was always the action that had the max Q value between the next_waypoint or None, considering the state of the learning agent.  I picked the settings in which the the reward per action (RPA) was highest and lowest.  I wanted to see whether or not these settings give significantly different results. Given that one run of 100 trials for alpha=`0.9`, gamma=`0.3` resulted in `2.149222` RPA and alpha=`0.9`, gamma=`0.5` in `2.446097` RPA, which is slightly higher than the latter, I expected the average of the set of rewards per action for alpha=`0.9` and gamma=`0.5` to be higher than the set for alpha=`0.9` and gamma=`0.3`, but that does not seem to be the case.  This suggests that changing the alpha and gamma values, considering the current the "best action" decision criteria, might be of little consequence.

In [10]:
def significance_test(p=0.05):
    group_1 = avg_alp_gam(alpha=0.9, gamma=0.5)
    group_2 = avg_alp_gam(alpha=0.9, gamma=0.3)
    
    diff_mean = group_1.mean() - group_2.mean()
    
    group_1_size = group_1.size
    group_2_size = group_2.size
    
    num_experiments = 10000
    rewards_per_action_values = df['rewards_per_action'].values
    
    for i in range(num_experiments):
        random.shuffle(rewards_per_action_values)
        new_group_1 = rewards_per_action_values[0:group_1_size]
        new_group_2 = rewards_per_action_values[group_1_size:]

In [11]:
def significance_test(p=0.05):
    group_1 = avg_alp_gam(alpha=0.9, gamma=0.5)
    group_2 = avg_alp_gam(alpha=0.9, gamma=0.3)
    
    diff_mean = group_1.mean() - group_2.mean()
    
    group_1_size = group_1.size
    group_2_size = group_2.size
    
    num_experiments = 10000
    rewards_per_action_values = df['rewards_per_action'].values
    count_pass_test = 0
    
    for i in range(num_experiments):
        random.shuffle(rewards_per_action_values)
        new_group_1 = rewards_per_action_values[0:group_1_size]
        new_group_2 = rewards_per_action_values[group_1_size:]
        
        if new_group_1.mean() - new_group_2.mean() >= diff_mean:
            count_pass_test += 1
            
    result = float(count_pass_test) / num_experiments
    
    if result < p:
        print "Passed the significance level of p < {}, got {}".format(p, result)
    else:
        print "Failed to pass the significance level of p < {}, got {}".format(p, result)

In [12]:
# Running this significance test several times shows a consistency of rejecting the null (i.e. changing alpha and gamma values does not affect reward_per_action scores)
significance_test()

Failed to pass the significance level of p < 0.05, got 0.8633


In [13]:
# Running this significance test several times shows a consistency of rejecting the null (i.e. changing alpha and gamma values does not affect reward_per_action scores)
significance_test()

Failed to pass the significance level of p < 0.05, got 0.5928


In [14]:
# Running this significance test several times shows a consistency of rejecting the null (i.e. changing alpha and gamma values does not affect reward_per_action scores)
significance_test()

Passed the significance level of p < 0.05, got 0.0468


In [15]:
# Running this significance test several times shows a consistency of rejecting the null (i.e. changing alpha and gamma values does not affect reward_per_action scores)
significance_test()

Failed to pass the significance level of p < 0.05, got 0.5009


In [17]:
df['rewards_per_action']

0     0.461435
1     0.451306
2     0.429459
3     0.438609
4     0.439223
5     0.437100
6     0.444560
7     0.452696
8     0.470704
9     0.442216
10    0.437719
11    0.442267
12    0.447249
13    0.441105
14    0.448384
15    0.434538
16    0.448658
17    0.463374
18    0.437126
19    0.443238
20    0.435405
21    0.442060
22    0.436170
23    0.435386
24    0.451591
25    0.454698
26    0.445614
27    0.432558
28    0.435594
29    0.425357
Name: rewards_per_action, dtype: float64