In [1]:
import sys
import os
sys.path.append(os.path.abspath(".."))

from src.training_algorithms.reinforce import REINFORCE
from src.env.sailboat_env import SailboatEnv
from src.utils.plotting import training_reward

import torch
import time
from datetime import timedelta
import numpy as np
import matplotlib.pyplot as plt


  from .autonotebook import tqdm as notebook_tqdm


pygame 2.6.1 (SDL 2.28.4, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
# Define custom environment parameters per user
ENV_PARAMS = {
    "wind_speed": np.random.uniform(5, 15),  # Vary wind speed
    "theta_wind": np.random.uniform(0, 2 * np.pi),  # Vary wind direction
    "max_steps": 1500  # Change episode length
}

# Training parameters
TOTAL_EPISODES = 200
SAVE_INTERVAL = 100 # Save model every X episodes



MODEL_SAVE_PATH = "../model_saves/noah_model.pth"

# Initialize environment and agent
#env = SailboatEnv(**ENV_PARAMS)
env = SailboatEnv()

obs_space_dims = 8
action_space_dims = 1



agent = REINFORCE(obs_space_dims, action_space_dims)
reward_over_episodes = []


start_time = time.time()  # Start timing
# Training loop
for episode in range(TOTAL_EPISODES):
    episode_start_time = time.time()  # Record episode start time
    obs = env.reset()
    episode_reward = 0
    done = False
    
    while not done:
        action = agent.sample_action(obs)
        obs, reward, done, _ = env.step(action)
        agent.rewards.append(reward)
        episode_reward += reward
        
        if episode % 10 == 0:  # Render every 500 episodes
            time.sleep(1/60)
            env.draw()
    
    reward_over_episodes.append(episode_reward)
    agent.update()
    
    # Calculate time per episode
    episode_time = time.time() - episode_start_time  
    elapsed_time = time.time() - start_time  

    # Estimate total training time
    estimated_total_time = (elapsed_time / episode) * TOTAL_EPISODES
    remaining_time = estimated_total_time - elapsed_time
    
    if episode % SAVE_INTERVAL == 0:
        torch.save(agent.net.state_dict(), MODEL_SAVE_PATH)
        print(f"Saved model at episode {episode}")
    
    if episode % 10 == 0:
        avg_reward = np.mean(reward_over_episodes[-10:])
        print(f"Episode {episode}: Reward {episode_reward:.2f}, Avg (last 10): {avg_reward:.2f}")
        print(f"Estimated Remaining: {timedelta(seconds=int(remaining_time))}")


training_reward(reward_over_episodes, 'Test Training')

Loaded saved model from ../model_saves/noah_model.pth
Saved model at episode 0
Episode 0: Reward -1.47, Avg (last 10): -1.47


: 

In [5]:
# Testing the trained model on a given state
def test_model(state_input):
    """Runs the trained RL model on a provided state."""
    if not os.path.exists(MODEL_SAVE_PATH):
        print("No trained model found.")
        return
    agent.net.load_state_dict(torch.load(MODEL_SAVE_PATH))
    state = np.array(state_input)
    action = agent.sample_action(state)
    print("Predicted action for input state:", action)
    return action

# Example usage: Provide a sample state
sample_state = [0.5, 0.5, 0.01, 0.01, 0.02, 0.3, np.pi, 10]
test_model(sample_state)

Predicted action for input state: [-2.4190266]


array([-2.4190266], dtype=float32)