In [1]:
import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Circle
import csv

In [2]:
from sac.sac_torch_new import Agent
from env.unicycle_2d_with_obs import Unicycle2DWithObstacle
from cbf.cbf_simplified_car import cbf_casadi

env = Unicycle2DWithObstacle()

In [3]:
def save_trajectory(exp,episode,obs_history,env,info):
    
    x_coords = [obs[0] for obs in obs_history]
    y_coords = [obs[1] for obs in obs_history]

    fig = plt.figure()
    plt.plot(x_coords, y_coords, marker='o', markersize=2, linestyle='-')
    plt.plot(x_coords, y_coords, linestyle='-')
    plt.xlabel('X Coordinate')
    plt.ylabel('Y Coordinate')
    plt.title(f'Car Trajectory - Episode {episode}')
    plt.grid()

    for obstacle in env.obstacles:
        x, y, radius = obstacle
        circle = Circle((x, y), radius, fill=False, color='red')
        plt.gca().add_patch(circle)

    plt.gca().set_aspect('equal', adjustable='box')
    plt.savefig(f'tmp/plot/exp_{exp}_epi_{episode}_{info}.png')
    plt.close()

In [None]:
n_games = 1000
exp_repeat_times = 10
total_score_history = []
for j in range(exp_repeat_times):
    print('experiment',j,'start')
    agent = Agent(input_dims=env.observation_space.shape, env=env,
        n_actions=env.action_space.shape[0])
    best_score = env.reward_range[0]
    goal_reached_num = 0

    for i in range(n_games):
        global_step = 0
        score_history = []
        observation_history = []
        info = {}
        observation,_ = env.reset()
        done = False
        score = 0
        step = 0
        while not done and step < 500:
            observation_history.append(np.array(observation))
            action_rl = agent.choose_action(observation)
            # print('after choose action: ',action)
            action = cbf_casadi(env,observation,action_rl)

            observation_, reward, done,_, info = env.step(action)
            score += reward
            agent.remember(observation, action, reward, observation_, done)
            agent.learn(i, step, global_step, None, save_runs = False)
            observation = observation_
            step += 1
            global_step += 1
        score_history.append(score)

        np.set_printoptions(formatter={'float': '{:0.2f}'.format})
        print('episode', i, ', after', step, 'steps: ', info, ', last position: ', observation[:2], ', with total reward: ', np.array([score]))

        if info == {'goal_reached'}:
            goal_reached_num += 1
            save_trajectory(j,i,observation_history,env,info)
        if goal_reached_num >= 150:
            break
    
    total_score_history.append(score_history)

csv_file_path = "simplified_car_score_history.csv"

with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(total_score_history)

In [None]:
plt.plot(score_history, label='Score')

window = 20
moving_avg = np.convolve(score_history, np.ones(window)/window, mode='valid')

offset = (len(score_history) - len(moving_avg)) // 2

plt.plot(range(offset, offset + len(moving_avg)), moving_avg, color='red', label='Moving Average')
plt.xlabel("Episode")
plt.ylabel("Score")
plt.title("car_13_4_1xObstacle")
plt.grid(True)
plt.legend()
# plt.savefig("car_13_4_1xObstacle`")
plt.show()


In [None]:
agent.save_models()

In [None]:
agent.load_models()