In [1]:
from ddpg import ReplayBuffer, DDPGAgent, EpsilonGreedy
from fsae.envs import *
import time

In [20]:
try:
    env.close()
except:
    pass 
env = RandomTrackEnv(render_mode=None, seed=0)

In [16]:
samples = np.random.normal(0,0.5, 1000)
plt.hist(samples, bins=20, edgecolor='black')
plt.show()


In [26]:
# Initialize the agent, replay buffer, and environment
state_dim = 8 # Dimension of the state space
action_dim = 2 # Dimension of the action space
hidden_dim = 256
max_action = (1,0.6) # Maximum value of the action
num_episodes = 1000
max_steps = 1000
batch_size = 500

replay_buffer = ReplayBuffer(buffer_size=50000, state_dim=state_dim, action_dim=action_dim)
replay_buffer.load_from_csv("replayBuffer_train_test_50000.csv")
agent = DDPGAgent(state_dim, action_dim, hidden_dim, replay_buffer, max_action)
agent.load_weights()

#greedy = EpsilonGreedy(1, 0.3, 0.01, num_episodes, 0.4)
greedy = EpsilonGreedy(1, 0.01, 0.01, num_episodes, 0.4)
if replay_buffer.size > batch_size:
    print("Replay buffer size after load: ", replay_buffer.size, " vs Batch Size: ", batch_size)
    agent.train(batch_size)

rewards = []
avg_rewards = []
best_reward  = 0


# Training loop
for episode in range(num_episodes):
    state = env.reset(seed=episode)
    episode_reward = 0

    for step in range(max_steps):
        action = greedy.get_action(agent, state)
        # print(action)
        next_state, reward, done, _ = env.step(action)
        if done: 
            reward -= 1
        
        replay_buffer.add(state, action, reward, next_state, done)
        state = next_state
        episode_reward += reward

        if replay_buffer.size > batch_size:
            agent.train(batch_size)
            # print("Training")

        if done:
            break
    
    rewards.append(episode_reward)

    if (episode % 10 == 0):
        if episode == 0: 
            continue
        avg_reward = np.mean(np.asarray(rewards))
        print(f"{episode}: Average Episode Reward: {avg_reward} Replay buffer size: {replay_buffer.size}")
        rewards.clear()
        avg_rewards.append(avg_reward)
        if avg_reward > best_reward:
            print("saved weights")
            agent.save_weights()
            best_reward = avg_reward 
    greedy.incr_step()

agent.save_weights(best=False)


Replay buffer size after load:  48727  vs Batch Size:  500
10: Average Episode Reward: 4.248586702131805 Replay buffer size: 48924
saved weights
20: Average Episode Reward: 4.294611323310932 Replay buffer size: 49102
saved weights
30: Average Episode Reward: 5.236380642183938 Replay buffer size: 49315
saved weights
40: Average Episode Reward: 4.833428367113659 Replay buffer size: 49510
50: Average Episode Reward: 5.011837664990888 Replay buffer size: 49715
60: Average Episode Reward: 7.013601499865165 Replay buffer size: 49983
saved weights
70: Average Episode Reward: 5.903436201327796 Replay buffer size: 50000
80: Average Episode Reward: 9.94751588599089 Replay buffer size: 50000
saved weights
90: Average Episode Reward: 9.783763481364918 Replay buffer size: 50000
100: Average Episode Reward: 10.204718122635938 Replay buffer size: 50000
saved weights
110: Average Episode Reward: 9.950564131605894 Replay buffer size: 50000
120: Average Episode Reward: 7.072330378094703 Replay buffer si

KeyboardInterrupt: 

In [40]:
import csv

# Plotting rewards
plt.plot(avg_reward)
plt.xlabel('Episodes')
plt.ylabel('Average Reward')
plt.title('Average Reward per 10 Episodes')
plt.show()

# Save rewards to CSV
with open('average_rewards.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Episode', 'Average Reward'])
    for episode, reward in enumerate(avg_rewards):
        writer.writerow([episode, reward])

In [38]:
agent.save_weights(best=False)


In [29]:
replay_buffer.save_as_csv("replayBuffer_train_test_1000_eposh.csv")

In [3]:
try:
    env.close()
except:
    pass    
env = RandomTrackEnv(render_mode='tp_camera', seed=0)



In [38]:
# Initialize the agent, replay buffer, and environment
state_dim = 8 # Dimension of the state space
action_dim = 2 # Dimension of the action space
hidden_dim = 256
max_action = (1,0.6) # Maximum value of the action
num_episodes = 1000
max_steps = 25
batch_size = 500

replay_buffer = ReplayBuffer(buffer_size=50000, state_dim=state_dim, action_dim=action_dim)
replay_buffer.load_from_csv("replayBuffer_train_test_1000_eposh.csv")
agent = DDPGAgent(state_dim, action_dim, hidden_dim, replay_buffer, max_action)
agent.load_weights()
agent.train(batch_size=batch_size)

In [39]:
done = False
state = env.reset(seed=4)
agent.load_weights()

total_reward = 0
counter = 0

while not done:
    action = agent.get_action(state)
    next_state, reward, done, _ = env.step(action)
    state = next_state

    total_reward += reward
    counter += 1

    if counter % 50 == 0:
        print(f'{counter} : total reward: {total_reward}')
    #print(action)

print(f'{counter} : Finished simulation. Total reward: {total_reward}')


50 : total reward: 17.888857977443415
100 : total reward: 35.66990133411456
150 : total reward: 53.372084246898154
200 : total reward: 71.89597171081152
244 : Finished simulation. Total reward: 84.91537914557964
