In [3]:
import sys
import os
sys.path.append(os.path.abspath(".."))

from src.training_algorithms.reinforce import REINFORCE
from src.env.sailboat_env import SailboatEnv
from src.utils.plotting import training_reward


import torch
import time
import numpy as np
from datetime import timedelta


In [None]:
model_path = "../model_saves/variable_wind_model.pth"

#Parameters
total_episodes = 20000
save_interval = 200
info_interval = 50
draw_interval = 10



wind_settings = {
    "type": 'variable_per_epoch'
}
#env = SailboatEnv(**ENV_PARAMS)
env = SailboatEnv(wind_settings)

obs_space_dims = 8
action_space_dims = 1

agent = REINFORCE(obs_space_dims, action_space_dims)
reward_over_episodes = []

start_time = time.time()  # Start timing
# Training loop
for episode in range(total_episodes):
    episode_start_time = time.time()  # Record episode start time
    obs = env.reset()
    episode_reward = 0
    done = False
    
    while not done:
        action = agent.sample_action(obs)
        obs, reward, done, _ = env.step(action)
        agent.rewards.append(reward)
        episode_reward += reward
        
        if episode % draw_interval == 0:  # Render every 500 episodes
            time.sleep(1/60)
            env.draw()
    
    reward_over_episodes.append(episode_reward)
    agent.update()
    
    # Calculate time per episode
    episode_time = time.time() - episode_start_time  
    elapsed_time = time.time() - start_time  

    # Estimate total training time
    estimated_total_time = (elapsed_time / (episode + 1)) * total_episodes
    remaining_time = estimated_total_time - elapsed_time
    
    if episode % save_interval == 0:
        torch.save(agent.net.state_dict(), model_path)
        print(f"Saved model at episode {episode}")
    
    if episode % info_interval == 0:
        avg_reward = np.mean(reward_over_episodes[-10:])
        print(f"Episode {episode}: Reward {episode_reward:.2f}, Avg (last 10): {avg_reward:.2f}")
        print(f"Estimated Remaining: {timedelta(seconds=int(remaining_time))}")
        
        
training_reward(reward_over_episodes, 'Variable Wind Training')

