In [94]:
import sys
import os
import time
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
import joblib
from joblib import dump, load
import xgboost as xgb
import matplotlib.animation as animation
from typing import Dict, Any

# Add the src directory to the path
sys.path.append(os.path.abspath('../src'))
sys.path.append(os.path.abspath('..'))

# Import the BaseAgent class
from src.agents.base_agent import BaseAgent
from initial_windfields import get_initial_windfield, INITIAL_WINDFIELDS
from src.env_sailing import SailingEnv
from src.test_agent_validity import validate_agent, load_agent_class
from src.evaluation import evaluate_agent, visualize_trajectory

# Environment parameters
env = SailingEnv(**get_initial_windfield('simple_static'))
n_actions = env.action_space.n
d_s = 2054

### Define the playing function

In [95]:
def make_animation(imgs):
  """
  Makes an animation from a list of images
  Parameters
  ----------
  imgs: list of (height, width, 3) np arrays
    list of images
  Return
  -------
  ani: animation
  """
  fig, ax = plt.subplots()
  draw = []
  for i in range(len(imgs)):
    draw_i = ax.imshow(imgs[i])
    if i == 0:
      ax.imshow(imgs[0]) # Show an initial one first
    draw.append([draw_i])
  plt.close()
  ani = animation.ArtistAnimation(fig, draw, interval=200, blit=True,
                              repeat=False)
  return ani

In [96]:
def play_policy(env, pi, horizon=200, capture_rate=1):
  s, _ = env.reset()
  a = pi(s)
  imgs = []
  imgs.append(env.render())
  for tt in range(horizon):
    s, rew, term, trunc, _ = env.step(a)
    a = pi(s)
    if tt % capture_rate == 0:
      imgs.append(env.render())
    if term or trunc:
      break
  return make_animation(imgs)

In [97]:
for initial_windfield_name, initial_windfield in INITIAL_WINDFIELDS.items():
    print(initial_windfield_name)
    print(initial_windfield)

training_1
{'wind_init_params': {'base_speed': 3.0, 'base_direction': (-0.8, -0.2), 'pattern_scale': 32, 'pattern_strength': 0.3, 'strength_variation': 0.4, 'noise': 0.1}, 'wind_evol_params': {'wind_change_prob': 1.0, 'pattern_scale': 128, 'perturbation_angle_amplitude': 0.1, 'perturbation_strength_amplitude': 0.1, 'rotation_bias': 0.02, 'bias_strength': 1.0}, 'env_params': {'wind_grid_density': 25, 'wind_arrow_scale': 80, 'render_mode': 'rgb_array'}}
training_2
{'wind_init_params': {'base_speed': 3.0, 'base_direction': (-0.2, 0.8), 'pattern_scale': 128, 'pattern_strength': 0.6, 'strength_variation': 0.3, 'noise': 0.1}, 'wind_evol_params': {'wind_change_prob': 1.0, 'pattern_scale': 128, 'perturbation_angle_amplitude': 0.1, 'perturbation_strength_amplitude': 0.1, 'rotation_bias': 0.02, 'bias_strength': 1.0}, 'env_params': {'wind_grid_density': 25, 'wind_arrow_scale': 80, 'render_mode': 'rgb_array'}}
training_3
{'wind_init_params': {'base_speed': 3.0, 'base_direction': (0.2, -0.8), 'patt

## Fitted Q iteration

>Implement fitted Q iterations with random forest using uniform exploration for $\pi$.

In [112]:
# Collect a dataset 

def collect_dataset(pi= lambda x: np.random.randint(0, n_actions) , n=10000):
    """
    Collect a dataset of the form $(s_i, a_i, r_{a_i}(s_i), s'_i)_{i=1}^n$
    by running a policy that chooses its action uniformly at random

    Parameters
    ---------
    env: Environment
    pi: Policy
    n: int
        Number of samples to collect

    Return
    -----
    data: np array of size (n, 2d_s + d_a + 2)
        data collected by the random policy
        the first 4 columns are the states s_i,
        the 5th, 6th and 7th columns contain the actions a_i,
        rewards r_{a_i}(s_i) and whether the step is a termination step
        and the columns 8th-12th columns contain the states s'_i
    """
    data = []
    for initial_windfield_name, initial_windfield in INITIAL_WINDFIELDS.items():
        print(initial_windfield_name)
        env = SailingEnv(**get_initial_windfield(initial_windfield_name))
        s0, _ = env.reset()
        s = s0.copy()
        n_actions = env.action_space.n

        for i in range(n//4):
            a = pi(s)
            s2, r, done, trunc, _ = env.step(a)
            data.append(s.copy().tolist() + [a, r, done] + s2.copy().tolist())
            if done or trunc:
                s, _ = env.reset()
            else:
                s = s2.copy()

    return np.array(data)

In [101]:
INITIAL_WINDFIELDS.items()

dict_items([('training_1', {'wind_init_params': {'base_speed': 3.0, 'base_direction': (-0.8, -0.2), 'pattern_scale': 32, 'pattern_strength': 0.3, 'strength_variation': 0.4, 'noise': 0.1}, 'wind_evol_params': {'wind_change_prob': 1.0, 'pattern_scale': 128, 'perturbation_angle_amplitude': 0.1, 'perturbation_strength_amplitude': 0.1, 'rotation_bias': 0.02, 'bias_strength': 1.0}, 'env_params': {'wind_grid_density': 25, 'wind_arrow_scale': 80, 'render_mode': 'rgb_array'}}), ('training_2', {'wind_init_params': {'base_speed': 3.0, 'base_direction': (-0.2, 0.8), 'pattern_scale': 128, 'pattern_strength': 0.6, 'strength_variation': 0.3, 'noise': 0.1}, 'wind_evol_params': {'wind_change_prob': 1.0, 'pattern_scale': 128, 'perturbation_angle_amplitude': 0.1, 'perturbation_strength_amplitude': 0.1, 'rotation_bias': 0.02, 'bias_strength': 1.0}, 'env_params': {'wind_grid_density': 25, 'wind_arrow_scale': 80, 'render_mode': 'rgb_array'}}), ('training_3', {'wind_init_params': {'base_speed': 3.0, 'base_di

In [113]:
data = collect_dataset()

training_1


TypeError: SailingEnv.__init__() got an unexpected keyword argument 'env_params'

In [93]:
class FQI(BaseAgent):
    """ FQI agent"""
    
    def __init__(self, data=collect_dataset(), gamma=0.99, n_iterations=100, d_s=2054, n_actions=9):
        super().__init__()
        self.d_s = 2054
        self.data = data
        self.gamma = gamma 
        self.n_iterations = n_iterations
        self.d_s = d_s 
        self.n_actions = n_actions
        self.epsilon = 0.2
        self.model = RandomForestRegressor()
        self.pi = None
        
    
    def act(self, observation: np.ndarray) -> int:
        if self.pi is None:
            print("The Agent has not been trained")
        else:
            return int(self.pi(observation))
    
    def trainFQI(self):
        # Use the data collected before
        n = len(self.data)
        states, actions, rewards, dones, next_states = self.data[:, :2054], self.data[:, 2054], self.data[:, 2054+1], self.data[:, 2054+2], self.data[:, 2054+3:]
        X, Y = self.data[:, :2054+1], self.data[:, 2054+1]
        self.model.fit(X, Y)

        for _ in tqdm(range(self.n_iterations)):
            Qmax = np.max(
                            [
                            self.model.predict(np.column_stack([
                                            self.data[:, d_s + 3:],
                                            np.ones(n).reshape(-1, 1) * a
                                            ]))
                                for a in range(n_actions)
                            ],axis=0)
            Y = self.data[:, d_s + 1] + self.gamma * (1 - dones) * Qmax
            self.model.fit(X, Y)
        
        pi = lambda s: np.argmax(
                                [self.model.predict(np.array(s.tolist() + [a]).reshape(1, -1))[0] for a in range(n_actions)]
                                )
        self.pi = pi
        agent.save(path="models/FQI_RF")
        return pi
    
    def reset(self) -> None:
        """Reset the agent."""
        pass  # Nothing to reset in this simple agent
    
    def seed(self, seed: int = None) -> None:
        """Set the random seed."""
        self.np_random = np.random.default_rng(seed)

    def save(self, path):
        if self.model is not None:
            joblib.dump(self.model, path)
        else:
            print("No model found to save.")

    def load(self):
        try:
            self.model = joblib.load("models/FQI_RF")
        except:
            print("No saved model found.")
            self.model = None


agent = FQI()
pi_FQI = agent.trainFQI()

TypeError: SailingEnv.__init__() got an unexpected keyword argument 'env_params'

In [82]:
# Choose which training initial windfields to evaluate on
TRAINING_INITIAL_WINDFIELDS = ["training_1", "training_2", "training_3"]

# Evaluation parameters for all initial windfields
ALL_SEEDS = [42, 43, 44, 45, 46]  # Seeds to use for all evaluations
ALL_MAX_HORIZON = 200             # Maximum steps per episode

# Only run if the agent was successfully loaded
if 'agent' in locals():
    # Store results for each initial windfield
    all_results = {}
    
    print(f"Evaluating agent on {len(TRAINING_INITIAL_WINDFIELDS)} training initial windfields...")
    
    # Evaluate on each initial windfield
    for initial_windfield_name in TRAINING_INITIAL_WINDFIELDS:
        print(f"\nInitial windfield: {initial_windfield_name}")
        
        # Get the initial windfield
        initial_windfield = get_initial_windfield(initial_windfield_name)
        
        # Run the evaluation
        results = evaluate_agent(
            agent=agent,
            initial_windfield=initial_windfield,
            seeds=ALL_SEEDS,
            max_horizon=ALL_MAX_HORIZON,
            verbose=False,  # Less verbose for multiple evaluations
            render=False,
            full_trajectory=False
        )
        
        # Store results
        all_results[initial_windfield_name] = results
        
        # Print summary
        print(f"  Success Rate: {results['success_rate']:.2%}")
        print(f"  Mean Reward: {results['mean_reward']:.2f}")
        print(f"  Mean Steps: {results['mean_steps']:.1f}")
    
    # Print overall performance
    total_success = sum(r['success_rate'] for r in all_results.values()) / len(all_results)
    print("\n" + "="*50)
    print(f"OVERALL SUCCESS RATE: {total_success:.2%}")
    print("="*50)

Evaluating agent on 3 training initial windfields...

Initial windfield: training_1
  Success Rate: 100.00%
  Mean Reward: 28.51
  Mean Steps: 127.4

Initial windfield: training_2
  Success Rate: 80.00%
  Mean Reward: 55.17
  Mean Steps: 70.4

Initial windfield: training_3
  Success Rate: 100.00%
  Mean Reward: 47.44
  Mean Steps: 75.4

OVERALL SUCCESS RATE: 93.33%


In [86]:
# Set to True to enable visualization
VISUALIZE = True
MAX_HORIZON = 200

# Visualization parameters
VIZ_INITIAL_WINDFIELD_NAME = "training_3"  # Choose which initial windfield to visualize
VIZ_SEED = 42                    # Choose a single seed for visualization

#############################################
### DO NOT MODIFY BELOW THIS LINE ##########
#############################################

# Only run if visualization is enabled and agent is loaded
if VISUALIZE and 'agent' in locals():
    # Get the initial windfield with visualization parameters
    viz_initial_windfield = get_initial_windfield(VIZ_INITIAL_WINDFIELD_NAME)
    viz_initial_windfield.update({
        'env_params': {
            'wind_grid_density': 25,
            'wind_arrow_scale': 80,
            'render_mode': "rgb_array"
        }
    })
    
    print(f"Visualizing agent behavior on initial windfield: {VIZ_INITIAL_WINDFIELD_NAME}")
    print(f"Using seed: {VIZ_SEED}")
    
    # Run the evaluation with visualization enabled
    viz_results = evaluate_agent(
        agent=agent,
        initial_windfield=viz_initial_windfield,
        seeds=VIZ_SEED,
        max_horizon=MAX_HORIZON,
        verbose=False,
        render=True,
        full_trajectory=True  # Enable full trajectory for visualization
    )
    
    # Visualize the trajectory with a slider
    visualize_trajectory(viz_results, None, with_slider=True)
else:
    if 'agent' in locals():
        print("Visualization is disabled. Set VISUALIZE = True to see agent behavior.")

Visualizing agent behavior on initial windfield: training_3
Using seed: 42


interactive(children=(IntSlider(value=0, description='Step:', max=87), Output()), _dom_classes=('widget-intera…

## DQN

In [None]:
class DQNAgent(BaseAgent):
    """ """
    
    def __init__(self):
        super().__init__()
        
    
    def act(self, observation: np.ndarray) -> int:
        """ """
        pass
    
    def reset(self) -> None:
        """Reset the agent."""
        pass  # Nothing to reset in this simple agent
    
    def seed(self, seed: int = None) -> None:
        """Set the random seed."""
        self.np_random = np.random.default_rng(seed)