## Imports

In [None]:
# Set notebook up to load files from Science repo
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Ensure that we re-load changes automagically
%load_ext autoreload
%autoreload 2

In [None]:
import matplotlib.pyplot as plt

In [None]:
import math
import pickle
import random
import glob
import re
import pandas as pd
import numpy as np

In [None]:
#%matplotlib notebook
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from matplotlib.patches import Rectangle
from matplotlib.lines import Line2D

### Import Functions

In [None]:
from science.agents import GridEnvironment, QLearningAgent_Bernoulli_greedy, QLearningAgent_Bernoulli

In [None]:
from science.draw_map import create_grid_map, add_trajectory, map_reward_estimation

In [None]:
from science.feedback_true import wrapper_true_trajectory_level_feedback, wrapper_true_state_level_feedback, calculate_deviation

## Initialize the map

In [None]:
def random_road(car_init, num_steps, grid_width, grid_height):
    # initialize the path with the car's initial position
    road = [car_init[0]]
    
    for _ in range(num_steps):
        # get the last position
        last_position = road[-1]
        
        # generate possible moves
        possible_moves = []
        if last_position[0] < grid_width - 1:  #if not on the right edge
            possible_moves.append([last_position[0] + 1, last_position[1]])
        
        if last_position[1] < grid_height - 1:  #if not on the upper edge
            possible_moves.append([last_position[0], last_position[1] + 1])
        
        # if there are possible moves, randomly choose one
        if possible_moves:
            new_position = possible_moves[np.random.randint(0, len(possible_moves))]
            road.append(new_position)
        else:
            break   #if stuck (at upper-right corner), then end the loop early
    
    return np.array(road)

In [None]:
grid_height = 5
grid_width = 10
num_landmarks = 4
num_steps = 10
car_init = [[0, 0]]
num_iters_per_experiment = 4

In [None]:
car_init = [[0, 0]]

## Run Experiments

In [140]:
reward_true_UCB_trajectory = []
final_reward_true_UCB_trajectory = []
num_exp = 25

for _ in range(num_exp):
    # Define Grid and RL Agent
    env = GridEnvironment(grid_width, grid_height, num_steps, [0,0])
    road = random_road(car_init, num_steps, grid_width, grid_height) # The road is the ideal path to be completed in num_steps
    agent_Bernoulli = QLearningAgent_Bernoulli(env, alpha_init = 0.5, beta_init = 0.5)
    # Run learning
    reward_Bernoulli = agent_Bernoulli.learn(5, wrapper_true_trajectory_level_feedback, [], [],\
                                             road, grid_width, grid_height, [], [], 0)
    reward_true_UCB_trajectory.append([calculate_deviation(trajectory, road) for trajectory in agent_Bernoulli.exp_trajectory])
    
    final_trajectory = agent_Bernoulli.get_optimal_trajectory()
    final_reward_true_UCB_trajectory.append(calculate_deviation(final_trajectory, road))
    

In [None]:
reward_true_UCB_state = []
num_exp = 25

for _ in range(num_exp):
    # Define Grid and RL Agent
    env = GridEnvironment(grid_width, grid_height, num_steps, [0,0])
    road = random_road(car_init, num_steps, grid_width, grid_height) # The road is the ideal path to be completed in num_steps
    agent_Bernoulli = QLearningAgent_Bernoulli(env, alpha_init = 0.5, beta_init = 0.5)
    # Run learning
    reward_Bernoulli = agent_Bernoulli.learn(4, wrapper_true_state_level_feedback, [], [],\
                                             road, grid_width, grid_height, [], [], 0)
    reward_true_UCB_state.append([calculate_deviation(trajectory, road) for trajectory in agent_Bernoulli.exp_trajectory])

In [160]:
deviation_df = pd.DataFrame(reward_true_UCB_trajectory)
deviation_mean_true_traj = deviation_df.mean(axis=0)
deviation_std_true_traj = deviation_df.std(axis=0)
deviation_ste_true_traj = deviation_std_true_traj / np.sqrt(len(deviation_df))