In [1]:
import sys
import os
import torch
sys.path.append(os.path.abspath("/home/raggadruid/Documents/ZephyrRL-Training/"))

from src.training_algorithms.reinforce import REINFORCE
from src.env.sailboat_env import SailboatEnv
from src.utils.plotting import training_reward


import torch
import time
import numpy as np
from datetime import timedelta


pygame 2.6.1 (SDL 2.30.52, Python 3.13.1)
Hello from the pygame community. https://www.pygame.org/contribute.html




In [2]:
model_path = "/home/raggadruid/Documents/ZephyrRL-Training/src/model_saves/variable_wind_model.pth"

#Parameters
total_episodes = 20000
save_interval = 200
info_interval = 50
draw_interval = 1000



wind_settings = {
    "type": 'variable_per_epoch'
}
#env = SailboatEnv(**ENV_PARAMS)
env = SailboatEnv(wind_settings)

obs_space_dims = 8
action_space_dims = 1

agent = REINFORCE(obs_space_dims, action_space_dims)
reward_over_episodes = []

start_time = time.time()  # Start timing
# Training loop
for episode in range(total_episodes):
    episode_start_time = time.time()  # Record episode start time
    obs = env.reset()
    episode_reward = 0
    done = False
    
    while not done:
        action = agent.sample_action(obs)
        obs, reward, done, _ = env.step(action)
        agent.rewards.append(reward)
        episode_reward += reward
        
        if episode % draw_interval == 0:  # Render every 500 episodes
            time.sleep(1/60)
            env.draw()
    
    reward_over_episodes.append(episode_reward)
    agent.update()
    
    # Calculate time per episode
    episode_time = time.time() - episode_start_time  
    elapsed_time = time.time() - start_time  

    # Estimate total training time
    estimated_total_time = (elapsed_time / (episode + 1)) * total_episodes
    remaining_time = estimated_total_time - elapsed_time
    
    if episode % save_interval == 0:
        torch.save(agent.net.state_dict(), model_path)
        print(f"Saved model at episode {episode}")
    
    if episode % info_interval == 0:
        avg_reward = np.mean(reward_over_episodes[-10:])
        print(f"Episode {episode}: Reward {episode_reward:.2f}, Avg (last 10): {avg_reward:.2f}")
        print(f"Estimated Remaining: {timedelta(seconds=int(remaining_time))}")
        
        
training_reward(reward_over_episodes, 'Variable Wind Training')



Saved model at episode 0
Episode 0: Reward -1.69, Avg (last 10): -1.69
Estimated Remaining: 1 day, 3:34:55
Episode 50: Reward -1.23, Avg (last 10): -3.41
Estimated Remaining: 1:22:39
Episode 100: Reward -1.92, Avg (last 10): -2.93
Estimated Remaining: 1:02:07
Episode 150: Reward -1.08, Avg (last 10): -3.01
Estimated Remaining: 1:02:22
Saved model at episode 200
Episode 200: Reward -1.69, Avg (last 10): -0.83
Estimated Remaining: 0:59:28
Episode 250: Reward 4.99, Avg (last 10): -0.93
Estimated Remaining: 0:57:49
Episode 300: Reward -3.57, Avg (last 10): -1.58
Estimated Remaining: 0:57:08
Episode 350: Reward -2.02, Avg (last 10): -2.15
Estimated Remaining: 0:55:10
Saved model at episode 400
Episode 400: Reward -4.13, Avg (last 10): -1.91
Estimated Remaining: 0:53:26
Episode 450: Reward -3.70, Avg (last 10): -1.84
Estimated Remaining: 0:52:30
Episode 500: Reward -6.98, Avg (last 10): -0.85
Estimated Remaining: 0:51:34
Episode 550: Reward -5.24, Avg (last 10): -2.39
Estimated Remaining: 0:

KeyboardInterrupt: 

In [None]:
threshold = 10  # Replace with the value you want to use as the threshold
filtered_rewards = [reward for reward in reward_over_episodes if abs(reward) < threshold]

training_reward(filtered_rewards, 'Variable Wind Training')

print(filtered_rewards)
