In [1]:
import numpy as np
import gym
import matplotlib.pyplot as plt
from f110_gym.envs.base_classes import Integrator
import random
import os
import pandas as pd
import time
from sklearn.preprocessing import normalize
from scipy.sparse import csr_matrix
import sys
from f110_gym.envs.f110_env import F110Env

In [2]:
class Reward:
    def __init__(self, min_speed=0.5, max_speed=1.8, map_centers=None,track_width = 2.2):
        self.min_speed = min_speed
        self.max_speed = max_speed
        self.set_parameters(map_centers,track_width)
        
        # Hyperparameters
        self.epsilon = 1e-5
        self.distance_travelled = 0

        # Centering reward function
        self.func = lambda y : 2 * (np.exp(-0.017*y) - 0.5)
    
    def set_parameters(self,map_centers, track_width):
        self.map_centers = map_centers
        # Initial point and center that detrmines the position at the start of episode
        self.initial_point = np.array([[0, 0]])
        self.initial_center_idx , _ = self.__calculate_distance_from_center(self.map_centers,self.initial_point)
        self.initial_center = self.map_centers[self.initial_center_idx]

        # Race Track parameters
        self.distance_between_centers = np.hstack([[0.],np.linalg.norm(self.map_centers[:-1,:]- self.map_centers[1:,:],axis=1)])
        self.total_track_length = np.sum(self.distance_between_centers)
        self.track_width = track_width

    
    def __calculate_distance_from_center(self, centers,curr):
        distances = np.linalg.norm(centers - curr, axis=1)
        idx = np.argmin(distances)
        return idx, distances[idx]
    
    def reset(self, point):
        self.distance_travelled = 0
        self.initial_point = point
        idx , _ = self.__calculate_distance_from_center(self.map_centers,self.initial_point)
        self.initial_center_idx = idx
        self.initial_center = self.map_centers[self.initial_center_idx]

    def exponential_angle(self, angle):
        if angle <=90:
            return self.func(angle)
        return -1
    
    def progress_reward(self, curr_position, next_position):
        distance = np.linalg.norm(curr_position - next_position)
        self.distance_travelled += distance
        return self.distance_travelled / self.total_track_length

    def centering_reward(self, curr_position,next_position):
        
        position_vector = next_position - curr_position

        curr_idx, c = self.__calculate_distance_from_center(self.map_centers,curr_position)
        # print(f'Curr position: {curr_position}, Next position: {next_position} with initial center idx: {curr_idx}')

        if curr_idx == self.map_centers.shape[0] - 1:
            indices = [curr_idx-1,0]
        elif curr_idx == 0:
            indices = [self.map_centers.shape[0]-1,curr_idx+1]
        else:
            indices = [curr_idx-1, curr_idx+1]
            
        # print(f'Indices: {indices}')
        # print(np.vstack([self.map_centers[indices[0],:],self.map_centers[indices[1],:]]))

        next_idx, n = self.__calculate_distance_from_center(np.vstack([self.map_centers[indices[0],:],self.map_centers[indices[1],:]]),next_position)
        next_idx = indices[next_idx]
        # print(f'Next index is {next_idx} and distance is {n}')

        curr_center = self.map_centers[curr_idx]
        next_center = self.map_centers[next_idx]

        if curr_idx == next_idx:
            return -1
        
        # print(f'Current center: {curr_center}, Next center: {next_center}')
        center_vector = next_center - curr_center
        
        dot_product = np.dot(center_vector, position_vector)
        norm_product = np.linalg.norm(center_vector) * np.linalg.norm(position_vector)
        cosine_angle = dot_product / norm_product
        angle_rad = np.arccos(np.clip(cosine_angle, -1.0, 1.0))

        # print(f'Center vector: {center_vector} and Position vector: {position_vector}')

        return self.exponential_angle(np.degrees(angle_rad))
    
    def milestone_reward(self, next_position):
        idx, _ = self.__calculate_distance_from_center(self.map_centers,next_position)
        if idx!=self.initial_center_idx:
            travelled = np.linalg.norm(self.initial_center - next_position) / self.total_track_length
            if  travelled >= np.abs(self.distance_between_centers[idx] - self.distance_between_centers[self.initial_center_idx]) / self.total_track_length :
                self.initial_center_idx = idx
                self.initial_center = self.map_centers[idx]
                return 5
        return 0
    
    def calculate_reward(self, curr_position, next_position):
        progress_reward = self.progress_reward(curr_position, next_position)
        centering_reward = self.centering_reward(curr_position, next_position)
        milestone_reward = self.milestone_reward(next_position)
        # print(f"Distance reward: {progress_reward}, Centering reward: {centering_reward}, Milestone reward: {milestone_reward}")
        return progress_reward + centering_reward + milestone_reward

In [3]:
class IndexSelector:
    def __init__(self, num_indices):
        self.set_parameters(num_indices)
    
    def set_parameters(self, num_indices):
        self.num_indices = num_indices
        self.visited_indices = set()
        self.probabilities = np.ones(num_indices) / num_indices
    
    def select_index(self):
        if len(self.visited_indices) == self.num_indices:
            # Reset the probabilities and visited indices
            print('Visited all indices, resetting')
            self.visited_indices = set()
            self.probabilities = np.ones(self.num_indices) / self.num_indices

        # Select an index based on the current probabilities
        random_idx = np.random.choice(np.arange(self.num_indices), p=self.probabilities)

        # Update the probabilities
        self.visited_indices.add(random_idx)
        if len(self.visited_indices) < self.num_indices:
            self.probabilities[random_idx] = 0
            remaining_prob = 1 - np.sum(self.probabilities)
            self.probabilities[self.probabilities > 0] += remaining_prob / np.sum(self.probabilities > 0)

        return random_idx

In [4]:
class F1Tenth_navigation:

    def __init__(self,gym_env_code='f110_gym:f110-v0', num_agents=1, map_path=['./f1tenth_racetracks/Austin/Austin_map'], map_ext='.png', sx=0., sy=0., stheta=0., map_centers_file=None, save_path=None, track_name=None, inference=None,reward_file=None,collision_file=None):

        # Environment setup
        self.path_counter = 0
        self.sx, self.sy, self.stheta = sx, sy, stheta
        self.save_path = save_path
        self.track_name = track_name
        self.num_agents = num_agents
        self.map_path = map_path
        self.map_ext = map_ext
        self.map_centers_file = map_centers_file
    
        self.env = gym.make(gym_env_code, map=self.map_path[self.path_counter], map_ext=self.map_ext, num_agents=self.num_agents, timestep=0.01, integrator=Integrator.RK4)
        self.env.add_render_callback(self.render_callback)
       
        file = pd.read_csv(self.map_centers_file[self.path_counter])
        file.columns = ['x', 'y', 'w_r', 'w_l']
        file.index = file.index.astype(int)
        self.map_centers = file.values[:, :2]
        self.track_width = file.loc[0,'w_r'] + file.loc[0,'w_l']
        self.reward_file = reward_file
        self.collision_file = collision_file

        # Random Seed
        self.random_seed = 42
        np.random.seed(self.random_seed)

        # Environment Observation Parameters
        self.num_beams = 1080
        self.n_features = 11
        self.angle = 270

        # Action Space Parameters
        self.num_angles = 30
        self.num_speeds = 10

        # LiDAR downsampling parameters
        self.n_sectors = 30
        self.normalized_lidar = np.zeros((1,self.n_sectors))

        # State Space Parameters
        self.num_states = 2 ** self.n_features

        # Speed Parameters
        self.min_speed = 0.8
        self.max_speed = 2

        # Action Space
        self.angles_deg = np.linspace(-self.angle // 2, self.angle // 2, self.num_angles)[::-1]
        self.angles = np.radians(self.angles_deg)
        self.speeds = np.linspace(self.min_speed, self.max_speed, self.num_speeds)
    
        # State Space - Q-Table
        if inference is not None:
            self.weights = np.load(inference)
            self.num_collisions = int(inference.split('_')[-1].split('.')[0])
            print(f'Loaded Weights')
        else:
            self.weights = np.zeros((self.num_states,self.num_angles,self.num_speeds))
            self.num_collisions = 0

        # ELigibility Trace
        self.ET_IS = np.zeros((self.num_states,self.num_angles,self.num_speeds))

        # projection matrix
        if self.n_features == 10:
            zero_prob = 0.85
            one_prob = 0.15
        if self.n_features == 11:
            zero_prob = 0.8
            one_prob = 0.2
        self.projection_matrix = self.get_projection_matrix(zero_prob=zero_prob,one_prob=one_prob)

        # binary powers
        self.binary_powers = np.array([2 ** i for i in range(self.n_features)])

        # Training Variables
        self.curr_state = None
        self.next_state = None
        
        self.action_threshold_decay = 0.99997
        self.action_threshold = 0.20 * (self.action_threshold_decay ** self.num_collisions)

        # Imported Classes
        self.reward_class = Reward(min_speed=self.min_speed, max_speed=self.max_speed, map_centers=self.map_centers, track_width=self.track_width)
        self.index_selector = IndexSelector(self.map_centers.shape[0])      

        # BTSP Parameters
        self.learning_rate = 0.1
        self.discount_factor = 0.95
        self.decay_rate = 0.9

        # Reward
        self.reward = 0
        self.episode_reward = 0
        self.cumulative_reward = 0
        self.episodic_rewards = [0]
        self.delayed_reward = 0
        self.delayed_reward_counter = 0

        # Time
        self.collision_times = [0]       


    def __update_map(self):
        if self.env.renderer is not None:
            self.env.renderer.close()
        self.path_counter += 1
        if self.path_counter == len(self.map_path):
            self.path_counter = 0
        self.env.map_name = self.map_path[self.path_counter]
        self.env.update_map(f'{self.map_path[self.path_counter]}.yaml',self.map_ext)
        F110Env.renderer = None
        file = pd.read_csv(self.map_centers_file[self.path_counter])
        file.columns = ['x', 'y', 'w_r', 'w_l']
        file.index = file.index.astype(int)
        self.map_centers = file.values[:, :2]
        self.track_width = file.loc[0,'w_r'] + file.loc[0,'w_l']
        print(f'Map updated to {self.track_name[self.path_counter]}')
        
        
    def render_callback(self, env_renderer):
        e = env_renderer
        x = e.cars[0].vertices[::2]
        y = e.cars[0].vertices[1::2]
        top, bottom, left, right = max(y), min(y), min(x), max(x)
        e.score_label.x = left
        e.score_label.y = top - 700
        e.left = left - 800
        e.right = right + 800
        e.top = top + 800
        e.bottom = bottom - 800


    def get_statistical_properties(self,lidar_input,n_sectors=30):
        sector_size = np.asarray(lidar_input).shape[0] // n_sectors
        sectors = lidar_input[:sector_size * n_sectors].reshape(n_sectors, sector_size)
        return np.median(sectors, axis=1).reshape(1,-1)
    

    def binarize_vector(self,vector):
        threshold = (np.min(vector)+ np.max(vector))/2
        return np.where(vector > threshold, 1, 0)
    

    def get_projection_matrix(self,zero_prob=0.5,one_prob=0.5):
        # Generate a random matrix with values 0 and 1 based on the given probabilities [prob_0,prob_1]
        if not os.path.exists('Projection_matrices'):
            os.mkdir('Projection_matrices')
        if not os.path.exists(os.path.join('Projection_matrices', f'projection_{self.n_features}f_s{self.random_seed}.npy')):
            matrix = np.random.choice([0, 1], size=(self.n_sectors, self.n_features), p=[zero_prob,one_prob])
            np.save(os.path.join('Projection_matrices', f'projection_{self.n_features}f_s{self.random_seed}.npy'), matrix)
        else:
            matrix = np.load(os.path.join('Projection_matrices', f'projection_{self.n_features}f_s{self.random_seed}.npy'))
        return matrix
    
    def softmax(self,x, temperature=3.0):
        e_x = np.exp(x  / temperature) 
        return e_x / e_x.sum(axis=-1)

    def get_binary_representation(self,lidar_input):
        self.normalized_lidar = normalize(lidar_input,axis=1)
        return self.binarize_vector(np.dot( self.normalized_lidar,self.projection_matrix))
    

    def get_state(self, binary):
        return np.dot(binary[0], self.binary_powers)
    

    def select_action(self, state):
        random_number = np.random.rand()
        if random_number < self.action_threshold:
            angle_index = np.random.randint(0, self.num_angles)
            speed_index = np.random.randint(0, self.num_speeds)
        else:
            max_value = np.max(self.weights[state])
            max_indices = np.argwhere(self.weights[state] == max_value)
            angle_index, speed_index  = max_indices[np.random.randint(len(max_indices))]

        self.action_threshold *= self.action_threshold_decay

        return angle_index, speed_index

    def select_action_inference(self, state):
        max_indices = np.argwhere(self.weights[state] == np.max(self.weights[state]))
        angle_index, speed_index  = max_indices[np.random.randint(len(max_indices))]
        return angle_index, speed_index

    def sarsa_weight_update(self,angle_idx,speed_idx,reward):
        next_angle_idx,next_speed_idx = self.select_action(self.next_state)
        delta = reward + self.discount_factor * self.weights[self.next_state,next_angle_idx,next_speed_idx] - self.weights[self.curr_state,angle_idx,speed_idx]

        self.weights += self.learning_rate * delta * self.ET_IS
        return next_angle_idx,next_speed_idx

    def set_eligibility_trace(self,angle_idx,speed_idx):
        self.ET_IS [self.curr_state,angle_idx,speed_idx] = 1

    def decay_eligibility_trace(self):
        self.ET_IS *= self.discount_factor * self.decay_rate

    def save_reward_time(self):
        if not os.path.exists(os.path.join(self.save_path)):
            os.mkdir(os.path.join(self.save_path))
        
        if self.reward_file is not None:
            r = np.append(np.load(self.reward_file), self.episodic_rewards)
            t = np.append(np.load(self.collision_file), self.collision_times)
            np.save(os.path.join(self.save_path, f'rewards.npy'), np.array(r))
            np.save(os.path.join(self.save_path, f'times.npy'), np.array(t))
        else:
            np.save(os.path.join(self.save_path, f'rewards.npy'), np.array(self.episodic_rewards))
            np.save(os.path.join(self.save_path, f'times.npy'), np.array(self.collision_times))

    def save_weights(self):
        if not os.path.exists(os.path.join(self.save_path)):
            os.mkdir(os.path.join(self.save_path))
        np.save(os.path.join(self.save_path, f'{self.track_name[self.path_counter]}_{self.num_collisions + 1}.npy'), self.weights)
        # print(f'File saved')


    def train(self):
        try:
            obs, step_reward, done, info = self.env.reset(np.array([[self.sx, self.sy, self.stheta]]))
            lidar = obs['scans'][0]
            lidar_down_sampled = self.get_statistical_properties(lidar)
            self.curr_state = self.get_state(self.get_binary_representation(lidar_down_sampled))
            self.reward_class.reset(np.array([[self.sx, self.sy]]))
            angle_index,speed_index = self.select_action(self.curr_state)
            start_time = time.time()
            while True:
                # np.save(f'./LiDAR_scans/scan_{self.curr_state}.npy',lidar)
                steering_angle,speed = self.angles[angle_index],self.speeds[speed_index]
                curr_x = obs['poses_x'][0]
                curr_y = obs['poses_y'][0]
                obs, reward, done, info = self.env.step(np.array([[steering_angle, speed]]))
                lidar = obs['scans'][0]
                lidar_down_sampled = self.get_statistical_properties(lidar)
                self.next_state = self.get_state(self.get_binary_representation(lidar_down_sampled))

                if done:
                    self.reward =-50
                else:           
                    self.reward = self.reward_class.calculate_reward(np.array([curr_x, curr_y]), np.array([obs['poses_x'][0], obs['poses_y'][0]]))
                
                self.episode_reward += self.reward
                # self.delayed_reward += self.reward

                self.set_eligibility_trace(angle_index,speed_index)
                
                # if self.delayed_reward_counter == 5:
                #     angle_index,speed_index = self.sarsa_weight_update(angle_index,speed_index,self.delayed_reward)
                #     self.delayed_reward_counter = 0
                #     self.delayed_reward = 0
                # else:
                #     self.delayed_reward_counter += 1
                #     angle_index,speed_index = self.sarsa_weight_update(angle_index,speed_index,0)

                angle_index,speed_index = self.sarsa_weight_update(angle_index,speed_index,self.reward)
                self.decay_eligibility_trace()
                self.curr_state = self.next_state

                # Randomize the starting point after collision
                if done:
                    self.episodic_rewards.append(self.episode_reward)
                    self.episode_reward = 0
                    end_time = time.time()
                    self.collision_times.append(end_time - start_time)
                    start_time = end_time

                    self.num_collisions += 1
                    self.ET_IS.fill(0)

                    # Obtaining a new random position on the track
                    random_idx = self.index_selector.select_index()
                    n_x, n_y = self.map_centers[random_idx]
                    delta_x, delta_y = np.random.uniform(-0.75, 0.75), np.random.uniform(-0.2, 0.2)
                    n_theta = np.random.choice(self.angles)

                    # Sensing the new state
                    obs, step_reward, done, info = self.env.reset(np.array([[n_x + delta_x, n_y + delta_y, n_theta]]))
                    lidar = obs['scans'][0]
                    lidar_down_sampled = self.get_statistical_properties(lidar)
                    self.curr_state = self.get_state(self.get_binary_representation(lidar_down_sampled))
                    angle_index,speed_index = self.select_action(self.curr_state)

                    # Resetting 
                    self.reward_class.reset(np.array([[n_x + delta_x, n_y + delta_y]]))
                    
                    # Checkpoint
                    if (self.num_collisions+1) % 1000 == 0:
                        print(f'Collision: {self.num_collisions+1}, Time: {sum(self.collision_times)}, Reward: {sum(self.episodic_rewards)}')
                        self.save_reward_time()
                        self.save_weights()
                        self.episodic_rewards.clear()
                        self.collision_times.clear()
                        self.reward_file = os.path.join(self.save_path, f'rewards.npy')
                        self.collision_file = os.path.join(self.save_path, f'times.npy')
                        
                    if (self.num_collisions+1)  % 2000 == 0:
                        print(f'Training on {self.track_name[self.path_counter]} Completed')
                        self.__update_map()
                        self.reward_class.set_parameters(self.map_centers,self.track_width)
                        self.reward_class.reset(np.array([[self.sx, self.sy]]))
                        self.env.reset(np.array([[self.sx, self.sy, self.stheta]]))
                        self.index_selector.set_parameters(self.map_centers.shape[0])
                
                # self.env.render(mode='human')
        except Exception as e:
            print(e)
            self.env.renderer.close()
            # self.env.renderer = None
            self.env.close()
        finally:
            del self.env

    def inference(self):
        obs, step_reward, done, info = self.env.reset(np.array([[self.sx, self.sy, self.stheta]]))
        lidar = obs['scans'][0]
        lidar_down_sampled = self.get_statistical_properties(lidar)
        self.curr_state = self.get_state(self.get_binary_representation(lidar_down_sampled))
        self.reward_class.reset(np.array([[self.sx, self.sy]]))
        angle_index,speed_index = self.select_action_inference(self.curr_state)
        while not done:
            steering_angle,speed = self.angles[angle_index],self.speeds[speed_index]
            obs, reward, done, info = self.env.step(np.array([[steering_angle, speed]]))
            lidar = obs['scans'][0]
            lidar_down_sampled = self.get_statistical_properties(lidar)
            self.next_state = self.get_state(self.get_binary_representation(lidar_down_sampled))
            angle_index,speed_index = self.select_action_inference(self.next_state)
            self.curr_state = self.next_state
            
            self.env.render(mode='human')

In [5]:
path = './f1tenth_racetracks'
all_map_paths=[]
map_centers = []
map_names = []
track_lengths=[]
for folder in os.listdir(path):
    if folder not in ['README.md','.gitignore','convert.py','LICENSE','rename.py','.git']:
        folder_name=folder
        file_name=folder_name.replace(' ','')+'_map'
        map_center = folder_name.replace(' ','')+'_centerline.csv'
        track_lengths.append(len(pd.read_csv(f'{path}/{folder_name}/{map_center}')))
        map_names.append(folder_name)
        all_map_paths.append(f'{path}/{folder_name}/{file_name}')
        map_centers.append(f'{path}/{folder_name}/{map_center}')

track_length_list = list(zip(map_names,track_lengths))
track_length_list

[('Oschersleben', 739),
 ('Spielberg', 864),
 ('BrandsHatch', 781),
 ('MoscowRaceway', 813),
 ('Monza', 1159),
 ('Mexico City', 860),
 ('Sakhir', 1082),
 ('Austin', 1102),
 ('Budapest', 876),
 ('Melbourne', 1060),
 ('Sochi', 1169),
 ('SaoPaulo', 862),
 ('Montreal', 872),
 ('Nuerburgring', 1029),
 ('Hockenheim', 914),
 ('Shanghai', 1090),
 ('Sepang', 1108),
 ('YasMarina', 1110),
 ('Catalunya', 931),
 ('Zandvoort', 864),
 ('Spa', 1401),
 ('Silverstone', 1178)]

In [6]:
train_maps = ['MoscowRaceway','YasMarina','Sepang','Austin','Melbourne','Mexico City','Monza','Silverstone','Spa']
test_maps = [i[0] for i in track_length_list if i[0] not in train_maps]
print(f'Train Maps: {train_maps}')
print(f'Test Maps: {test_maps}')

Train Maps: ['MoscowRaceway', 'YasMarina', 'Sepang', 'Austin', 'Melbourne', 'Mexico City', 'Monza', 'Silverstone', 'Spa']
Test Maps: ['Oschersleben', 'Spielberg', 'BrandsHatch', 'Sakhir', 'Budapest', 'Sochi', 'SaoPaulo', 'Montreal', 'Nuerburgring', 'Hockenheim', 'Shanghai', 'Catalunya', 'Zandvoort']


In [None]:
global num_agents,map_path,map_ext,sx,sy,stheta,num_maps_to_train,gym_env_code,inference_file,reward_file,collision_file,counter,indices,save_path
gym_env_code='f110_gym:f110-v0'
num_agents = 1
map_ext = '.png'
sx = 0.
sy = 0.
stheta = -1
num_maps_to_train = 5
indices = [idx for idx,i in enumerate(map_names) if i in train_maps]
save_path = 'SARSA_Multiple_training/'
if not os.path.exists(save_path):
    os.mkdir(save_path)
counter = 0
inference_file = None
reward_file=None
collision_file=None
map_path_subset = [all_map_paths[i] for i in indices]
map_centers_subset = [map_centers[i] for i in indices]
map_names_subset = [map_names[i] for i in indices]

simulator = F1Tenth_navigation(gym_env_code=gym_env_code, num_agents=num_agents, map_path=map_path_subset, map_ext=map_ext, sx=sx, sy=sy, stheta=stheta, map_centers_file=map_centers_subset, save_path=save_path, track_name=map_names_subset, inference=inference_file,reward_file=reward_file,collision_file=collision_file)
simulator.train()



Visited all indices, resetting
Collision: 1000, Time: 259.8837375640869, Reward: -94723.72116566692
Visited all indices, resetting
Collision: 2000, Time: 321.93824100494385, Reward: -90055.10453426259
Training on MoscowRaceway Completed
Map updated to Monza
Collision: 3000, Time: 476.4615738391876, Reward: -58751.43249443645
Visited all indices, resetting
Collision: 4000, Time: 444.9024968147278, Reward: -68651.08235076038
Training on Monza Completed
Map updated to Mexico City
Visited all indices, resetting
Collision: 5000, Time: 819.9105112552643, Reward: -1395.3714991676497
Visited all indices, resetting
Collision: 6000, Time: 467.29459595680237, Reward: -73633.60422738781
Training on Mexico City Completed
Map updated to Austin


In [None]:
path = '/home/praneeth/shared_f1_tenth /'
folder_name= 'Nuerburgring'
folder_BTSP = 'Silverstone'

In [None]:
path = '/home/praneeth/shared_f1_tenth /'
reward_file_BTSP = f'{path}Weights_BTSP/{folder_BTSP}/rewards.npy'
time_file_BTSP = f'{path}Weights_BTSP/{folder_BTSP}/times.npy'
path_BTSP = f'{path}Weights_BTSP/{folder_BTSP}'

reward_file_normal = f'{path}Weights/{folder_name}/rewards.npy'
time_file_normal = f'{path}Weights/{folder_name}/times.npy'
path_normal = f'{path}Weights/{folder_name}'

episode_num=[0]
for file in os.listdir(path_BTSP):
    if file.startswith('weights'):
        episode_num.append(int(int(file.split('_')[1].split('.')[0])/1000))

episode_num = sorted(episode_num) 

reward = np.load(reward_file_BTSP)
times= np.load(time_file_BTSP)

reward_normal = np.load(reward_file_normal)
times_normal= np.load(time_file_normal)[:episode_num[-1]+1]

fig, ax = plt.subplots(1,1, figsize=(8,4))
# ax[0].plot(list(range(episode_num[-1]+1)),reward/10000,label='HCL')
# # ax[0].set_title('Reward Vs Trials')
# ax[0].set_xlabel('Number of Trials ($10^3$)')
# ax[0].set_ylabel('Reward ($10^4$)') 
ax.plot(list(range(episode_num[-1]+1)),times/1000,label='HCL')
# ax[1].set_title('Time Vs Trials')
ax.set_xlabel('Number of Trials ($10^3$)')
ax.set_ylabel('Time to Collisions ($10^3$) s')

# ax.plot(list(range(episode_num[-1]+1)),reward_normal[:episode_num[-1]+1]/10000,label='SARSA($\lambda$)')
ax.plot(list(range(episode_num[-1]+1)),times_normal/1000,label='SARSA($\lambda$)')
ax.legend()
ax.legend()

# fig.suptitle('BTSP')
plt.tight_layout()

In [None]:
path = '/home/praneeth/shared_f1_tenth /'
reward_file_normal = f'{path}Weights/{folder_name}/rewards.npy'
time_file_normal = f'{path}Weights/{folder_name}/times.npy'
path_normal = f'{path}Weights/{folder_name}'
episode_num=[0]
for file in os.listdir(path):
    if file.startswith('weights'):
        episode_num.append(int(int(file.split('_')[1].split('.')[0])/1000))

episode_num = sorted(episode_num) 

reward = np.load(reward_file_normal)
times= np.load(time_file_normal)

fig, ax = plt.subplots(1,2, figsize=(8,4))
ax[0].plot(list(range(episode_num[-1]+1)[:21]),reward[:21]/10000)
ax[0].set_title('Reward Vs Collisions')
ax[0].set_xlabel('Number of collisions ($10^3$)')
ax[0].set_ylabel('Reward ($10^4$)') 
ax[1].plot(list(range(episode_num[-1]+1)[:21]),times[:21]/1000)
ax[1].set_title('Time Vs Collisions')
ax[1].set_xlabel('Number of collisions ($10^3$)')
ax[1].set_ylabel('Time to collision ($10^3$) s')
fig.suptitle('SARSA($\lambda$)')
plt.tight_layout()

