# Mountaincar Environment
* Eval can start anywhere from left to goal state, vel 0 (also training). They need 71 episodes
* Modify cartpole to only have two actions-> left and right. The magnitude of the actions are much larger in nfq paper
* Hint to goal, which sometimes makes the agent perform worse
* Group: the magnitude of the action
* Made the forces symmetric

In [1]:
import configargparse
import torch
import torch.optim as optim
import sys
sys.path.append('../')

from environments import MountainCarEnv, Continuous_MountainCarEnv
from models.agents import NFQAgent
from models.networks import NFQNetwork, ContrastiveNFQNetwork
from util import get_logger, close_logger, load_models, make_reproducible, save_models
import matplotlib.pyplot as plt
import numpy as np
import itertools
import seaborn as sns
import tqdm
import json
from train_mountaincar import fqi, warm_start, transfer_learning
import scipy

# Running experiments

## "Structureless Test"
* The dynamics of the systems are actually the same. Do any of the algorithms learn a difference?

In [None]:
num_iter=15
perf_foreground = []
perf_background = []
for i in range(num_iter):
    print(str(i))
    perf_bg, perf_fg = fqi(epoch=1500, verbose=True, is_contrastive=True, structureless=True)
    perf_foreground.append(perf_fg)
    perf_background.append(perf_bg)
sns.distplot(perf_foreground, label='Foreground Performance')
sns.distplot(perf_background, label='Background Performance')
plt.legend()
plt.xlabel("Average Reward Earned")
plt.title("Dynamics are the same in fg and bg environments")

## "Performance when force left is different"
* We change the gravity on the foreground environments. 

In [None]:
num_iter=3
results = {}
for i in range(0, 5):
    results[i] = {}
    results[i]['cfqi'] = {}
    results[i]['fqi'] = {}
    results[i]['warm_start'] = {}
    results[i]['transfer_learning'] = {}
    
initial_g = 0.0025
x = []
for i in range(num_iter):
    for f in range(0, 5):
        gravity = initial_g + f*0.0005
        print(str(gravity))
        x.append(gravity)
        
        perf_bg, perf_fg = fqi(epoch=1500, verbose=False, is_contrastive=True, structureless=True, gravity=gravity)
        results[f]['cfqi'][i] = (perf_fg, perf_bg)
        
        perf_bg, perf_fg = fqi(epoch=1500, verbose=False, is_contrastive=False, structureless=True, gravity=gravity)
        results[f]['fqi'][i] = (perf_fg, perf_bg)
        
        perf_bg, perf_fg = warm_start(epoch=1500, verbose=False, structureless=True, gravity=gravity)
        results[f]['warm_start'][i] = (perf_fg, perf_bg)
        
        perf_bg, perf_fg = transfer_learning(epoch=1500, verbose=False, structureless=True, gravity=gravity)
        results[f]['transfer_learning'][i] = (perf_fg, perf_bg)
        
        
        
        
    with open('force_left_v_performance.json', 'w') as f:
        json.dump(results, f) 

0.0025


 39%|███▊      | 580/1501 [04:05<06:29,  2.36it/s]
100%|██████████| 1501/1501 [13:06<00:00,  1.91it/s]


0.003


 17%|█▋        | 257/1501 [01:47<08:41,  2.38it/s]
100%|██████████| 1501/1501 [13:08<00:00,  1.90it/s]


In [None]:
results

In [None]:
def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return m, h

def plot_performance(results, ds='bg'):
    c_success = []
    f_success = []
    w_success = []
    t_success = []
    c_errs = []
    f_errs = []
    w_errs = []
    t_errs = []
    if ds == 'bg':
        ind = 1
    else:
        ind = 0
    for i in range(0, 5):
        cfqi_perf = []
        fqi_perf = []
        ws_perf = []
        tl_perf = []
        for key in results[i]['fqi']:
            fqi_perf.append(results[i]['fqi'][key][ind])
        for key in results[i]['cfqi']:
            cfqi_perf.append(results[i]['cfqi'][key][ind])
        for key in results[i]['warm_start']:
            ws_perf.append(results[i]['warm_start'][key][ind])
        for key in results[i]['transfer_learning']:
            ts_perf.append(results[i]['transfer_learning'][key][ind])

        c_success.append(np.mean(cfqi_perf))
        f_success.append(np.mean(fqi_perf))
        w_success.append(np.mean(ws_perf))
        t_success.append(np.mean(tl_perf))
        m, h = mean_confidence_interval(cfqi_perf)
        c_errs.append(h)
        m, h = mean_confidence_interval(fqi_perf)
        f_errs.append(h)
        m, h = mean_confidence_interval(ws_perf)
        w_errs.append(h)
        m, h = mean_confidence_interval(tl_perf)
        t_errs.append(h) 

    x = [k for k in range(0, 5)]
    plt.figure(figsize=(10, 4))
    sns.scatterplot(x, c_success, label='CFQI')
    plt.errorbar(x, c_success ,yerr=c_errs, linestyle="None")
    sns.scatterplot(x, f_success, label='FQI')
    plt.errorbar(x, f_success ,yerr=f_errs, linestyle="None")
    sns.scatterplot(x, w_success, label='Warm Start')
    plt.errorbar(x, w_success ,yerr=w_errs, linestyle="None")
    sns.scatterplot(x, t_success, label='Transfer Learning')
    plt.errorbar(x, t_success ,yerr=t_errs, linestyle="None")
    if ds == 'bg':
        plt.title("Background Dataset: Performance of CFQI, FQI, Warm Start, Transfer Learning when gravity is modified")
    else:
        plt.title("Foreground Dataset: Performance of CFQI, FQI, Warm Start, Transfer Learning when gravity is modified")
    plt.xlabel("Gravity")
    plt.ylabel("Reward")
    plt.show()  

In [None]:
plot_performance(results, ds='bg')