In [34]:
from ddpg import ReplayBuffer, DDPGAgent, EpsilonGreedy
from fsae.envs import *
import time

In [35]:
try:
    env.close()
except:
    pass 
env = RandomTrackEnv(render_mode=None, seed=0)

In [36]:
samples = np.random.normal(0,0.5, 1000)
plt.hist(samples, bins=20, edgecolor='black')
plt.show()


In [37]:
# Initialize the agent, replay buffer, and environment
state_dim = 8 # Dimension of the state space
action_dim = 2 # Dimension of the action space
hidden_dim = 256
max_action = (1,0.6) # Maximum value of the action
num_episodes = 1000
max_steps = 25
batch_size = 500

replay_buffer = ReplayBuffer(buffer_size=50000, state_dim=state_dim, action_dim=action_dim)
# replay_buffer.load_from_csv("replay_buffers/replayBuffer_teleop_test.csv")
agent = DDPGAgent(state_dim, action_dim, hidden_dim, replay_buffer, max_action)
greedy = EpsilonGreedy(1, 0.01, 0.001, num_episodes, 0.4)
if replay_buffer.size > batch_size:
    print("Replay buffer size after load: ", replay_buffer.size, " vs Batch Size: ", batch_size)
    agent.train(batch_size)

rewards = []
avg_rewards = []
best_reward  = 0

# Training loop
for episode in range(num_episodes):
    state = env.reset(seed=episode)
    episode_reward = 0

    for step in range(max_steps):
        action = greedy.get_action(agent, state)
        # print(action)
        next_state, reward, done, _ = env.step(action)
        if done: 
            reward -= 1
        
        replay_buffer.add(state, action, reward, next_state, done)
        state = next_state
        episode_reward += reward

        if replay_buffer.size > batch_size:
            agent.train(batch_size)
            # print("Training")

        if done:
            break
    
    rewards.append(episode_reward)

    if (episode % 10 == 0):
        avg_reward = np.mean(np.asarray(rewards))
        print(f"{episode} Average Episode Reward: {avg_reward} Replay buffer size: {replay_buffer.size}")
        rewards.clear()
        avg_rewards.append(avg_reward)
        if avg_reward > best_reward:
            print("saved weights")
            agent.save_weights()
            best_reward = avg_reward

    greedy.incr_step()

agent.save_weights(best=False)


0 Average Episode Reward: 2.795785486871863 Replay buffer size: 14
saved weights
10 Average Episode Reward: 3.368977213898318 Replay buffer size: 169
saved weights
20 Average Episode Reward: 3.97177324377202 Replay buffer size: 337
saved weights
30 Average Episode Reward: 3.854186111451173 Replay buffer size: 501
40 Average Episode Reward: 2.753283592436644 Replay buffer size: 644
50 Average Episode Reward: 4.314710583100586 Replay buffer size: 824
saved weights
60 Average Episode Reward: 4.127975911177465 Replay buffer size: 1010
70 Average Episode Reward: 3.793837606575761 Replay buffer size: 1180
80 Average Episode Reward: 3.109844914225509 Replay buffer size: 1327
90 Average Episode Reward: 3.5487815816408066 Replay buffer size: 1487
100 Average Episode Reward: 4.504338764005302 Replay buffer size: 1676
saved weights
110 Average Episode Reward: 4.3227018349822215 Replay buffer size: 1861
120 Average Episode Reward: 4.062005235808198 Replay buffer size: 2034
130 Average Episode Rewa

KeyboardInterrupt: 

In [38]:
agent.save_weights(best=False)


In [39]:
replay_buffer.save_as_csv("replayBuffer_train_test.csv")

In [40]:
try:
    env.close()
except:
    pass    
env = RandomTrackEnv(render_mode='tp_camera', seed=0)



In [41]:
done = False
state = env.reset(seed=4)
agent.load_weights()

while not done:
    action = agent.get_action(state)
    next_state, reward, done, _ = env.step(action)
    state = next_state
    print(action)

[          1    -0.24484]
[          1    -0.23563]
[          1    -0.17694]
[          1   -0.057106]
[          1    0.041586]
[          1    0.086771]
[          1     0.10193]
[          1    0.054719]
[          1  -0.0035542]
[          1   -0.022078]
[          1    -0.39872]
[          1     0.19029]
[          1     0.14764]
[          1   -0.042296]
[          1   -0.012219]
[          1    0.035005]
[          1     -0.4182]
[          1     0.26932]
[          1   -0.019044]
[          1      0.1066]
[          1    -0.44693]
[          1     0.26325]
[          1    0.087065]
[          1      -0.264]
[          1   -0.056108]
[          1    0.047416]
[          1     -0.1079]
[          1     0.03135]
[          1     0.10418]
[          1    -0.13084]
[          1    0.028727]
[          1    -0.29646]
[          1   0.0088188]
[          1     0.24401]
[          1   -0.034907]
[          1   -0.040003]
[          1    0.081847]
[          1    0.098868]
[          1

KeyboardInterrupt: 