In [None]:
%load_ext autoreload
%autoreload 2
%reload_ext line_profiler

In [None]:
from functools import partial

import time
from tqdm.notebook import tqdm
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [None]:
import jax
import jax.numpy as jnp

jax.config.update('jax_platform_name', 'cpu')

import diffrax
import equinox as eqx
import optax

from haiku import PRNGSequence

In [None]:
from cmaes import CMAwM
from scipy.stats.qmc import LatinHypercube

import exciting_environments as excenvs

import exciting_exciting_systems
# from exciting_exciting_systems.models.model_utils import simulate_ahead_with_env

from exciting_exciting_systems.evaluation.plotting_utils import plot_sequence

from exciting_exciting_systems.related_work.np_reimpl.env_utils import simulate_ahead_with_env
from exciting_exciting_systems.related_work.excitation_utils import generate_aprbs, soft_penalty
from exciting_exciting_systems.related_work.np_reimpl.pendulum import Pendulum
from exciting_exciting_systems.related_work.np_reimpl.metrics import MC_uniform_sampling_distribution_approximation

---

In [None]:
batch_size = 1
tau = 2e-2

env = Pendulum(
    batch_size=batch_size,
    tau=tau,
    max_torque=5
)

In [None]:
h = 100

action_parameters = np.concatenate([
    np.random.uniform(low=-1, high=1, size=(h,)).astype(np.float32),
    np.random.randint(low=2, high=50, size=(h,), dtype=np.int32)
])

actions = generate_aprbs(
    amplitudes=action_parameters[:h],
    durations=np.abs(action_parameters[h:].astype(np.int32))
)[None, :, None]

plt.plot(np.squeeze(actions))

In [None]:
obs, env_state = env.reset()
obs = obs.astype(np.float32)
env_state = env_state.astype(jnp.float32)

observations = simulate_ahead_with_env(
    env,
    obs,
    env_state,
    actions,
)

print("actions.shape:", actions.shape)
print("observations.shape:", observations.shape)

print(" \n One of the trajectories:")
fig, axs = plot_sequence(
    observations=observations[0, ...],
    actions=actions[0, ...],
    tau=tau,
    obs_labels=[r"$\theta$", r"$\omega$"],
    action_labels=[r"$u$"],
);
plt.show()

In [None]:
n_amplitude_levels = 1000

# use latin hypercube sampling to generate the amplitude levels
amplitude_sampler = LatinHypercube(d=1)

amplitudes = amplitude_sampler.random(n=n_amplitude_levels) * 2 - 1

In [None]:
plt.plot(amplitudes)

In [None]:
def featurize_theta(obs):
    """The angle itself is difficult to properly interpret in the loss as angles
    such as 1.99 * pi and 0 are essentially the same. Therefore the angle is 
    transformed to sin(phi) and cos(phi) for comparison in the loss."""

    feat_obs = np.stack([np.sin(obs[..., 0] * np.pi), np.cos(obs[..., 0] * np.pi), obs[..., 1]], axis=-1)
    return feat_obs

In [None]:
from exciting_exciting_systems.related_work.algorithms import excite_with_GOATs

In [None]:
observations, actions = excite_with_GOATs(
    n_amplitudes=100,
    env=env,
    bounds_duration=(1,50),
    population_size=20,
    n_generations=None,
    n_support_points=1600,
    featurize=featurize_theta,
    seed=0,
    verbose=True
)

In [None]:
fig, axs = plot_sequence(
    observations=observations[0, ...],
    actions=actions[0, ...],
    tau=tau,
    obs_labels=[r"$\theta$", r"$\omega$"],
    action_labels=[r"$u$"],
);
plt.show()

In [None]:
from pymoo.core.problem import ElementwiseProblem
from pymoo.core.variable import Real, Integer, Choice, Binary

from pymoo.problems import get_problem
from pymoo.optimize import minimize
from pymoo.algorithms.soo.nonconvex.ga import GA
from pymoo.core.mixed import MixedVariableGA

from pymoo.operators.repair.rounding import RoundingRepair
from pymoo.operators.sampling.rnd import PermutationRandomSampling, IntegerRandomSampling
from pymoo.operators.crossover.ox import OrderCrossover
from pymoo.operators.mutation.inversion import InversionMutation
from pymoo.operators.crossover.sbx import SBX
from pymoo.operators.mutation.pm import PM

## GOATS:
- What is the influence of the Lehmer coding on the result of the algorithm?
- What genetic algorithm is best suited here?

In [None]:
def decode(lehmer_code: list[int]) -> list[int]:
    """Decode Lehmer code to permutation.

    This function decodes Lehmer code represented as a list of integers to a permutation.
    """

    n = len(lehmer_code)
    
    all_indices = list(range(n))
    output = []
    for k in lehmer_code:
        value = all_indices[k]
        output.append(value)
        all_indices.remove(value)
    return output

In [None]:
def featurize_theta(obs):
    """The angle itself is difficult to properly interpret in the loss as angles
    such as 1.99 * pi and 0 are essentially the same. Therefore the angle is 
    transformed to sin(phi) and cos(phi) for comparison in the loss."""

    feat_obs = np.stack([np.sin(obs[..., 0] * np.pi), np.cos(obs[..., 0] * np.pi), obs[..., 1]], axis=-1)
    return feat_obs

In [None]:
class GoatsProblem(ElementwiseProblem):

    def __init__(self, n_amplitudes, max_duration, **kwargs):
        super().__init__(
            n_var=2*n_amplitudes,
            n_obj=1,
            xl=np.concatenate([np.zeros(n_amplitudes), np.ones(n_amplitudes)]),
            xu=np.concatenate([
                np.ones(n_amplitudes) * np.linspace(0, n_amplitudes-1, n_amplitudes)[::-1],
                np.ones(n_amplitudes) * max_duration
            ]),
        )
        amplitude_sampler = LatinHypercube(d=1)
        self.n_amplitudes = n_amplitudes
        self.amplitudes = amplitude_sampler.random(n=n_amplitudes) * 2 - 1

    def _evaluate(self, x, out, *args, **kwargs):
        indices = decode(x[:self.n_amplitudes])

        applied_amplitudes = self.amplitudes[indices]

        actions = generate_aprbs(
            amplitudes=applied_amplitudes,
            durations=x[self.n_amplitudes:]
        )[None, :, None]

        observations = simulate_ahead_with_env(
            env,
            obs,
            env_state,
            actions,
        )[0]

        feat_observations = featurize_theta(observations)

        score = MC_uniform_sampling_distribution_approximation(
            data_points=feat_observations,
            support_points=support_points
        )
        N = observations.shape[0]

        rho_obs = 1
        rho_act = 1
        penalty_terms = rho_obs * soft_penalty(a=observations, a_max=1) + rho_act * soft_penalty(a=actions, a_max=1)
        
        out["F"] = 1 * score + penalty_terms.item()

In [None]:
support_points = LatinHypercube(d=2).random(n=1600) * 2 - 1
support_points = featurize_theta(support_points)

support_points.shape

In [None]:
n_amplitudes = 100
max_duration = 50

problem = GoatsProblem(
    n_amplitudes=n_amplitudes,
    max_duration=max_duration
)

algorithm = GA(
    pop_size=20,
    sampling=IntegerRandomSampling(),
    crossover=SBX(prob=1.0, eta=10.0, vtype=float, repair=RoundingRepair()),
    mutation=PM(prob=1.0, eta=10.0, vtype=float, repair=RoundingRepair()),
    eliminate_duplicates=True,
)

res = minimize(
    problem,
    algorithm,
    seed=3,
    save_history=False,
    verbose=True
)

print("Best solution found: %s" % res.X)
print("Function value: %s" % res.F)
print("Constraint violation: %s" % res.CV)

In [None]:
indices = decode(res.X[:problem.n_amplitudes])

applied_amplitudes = problem.amplitudes[indices]

actions = generate_aprbs(
    amplitudes=applied_amplitudes,
    durations=res.X[problem.n_amplitudes:]
)[None, :, None]

observations = simulate_ahead_with_env(
    env,
    obs,
    env_state,
    actions,
)

In [None]:
fig, axs = plot_sequence(
    observations=observations[0, ...],
    actions=actions[0, ...],
    tau=tau,
    obs_labels=[r"$\theta$", r"$\omega$"],
    action_labels=[r"$u$"],
);
plt.show()

## sGOATS:

- essentially repeat GOATs for subsets of the data
- either take previous data points into account or not (I would argue that you should always take previous datapoints into account?)