# [  ]

In [None]:
import fatbot as fb
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import os

# Setup

### RL Algorithm

In [None]:
model_name =     'hull'
db =             fb.db.db8
global_isd =     'D'
total_timesteps = 200_000
n_steps =        2048+2048
batch_size =     64+64
n_epochs =      20

### Reward Scheme

In [None]:
reward_scheme = 'hullnt'
delta_reward = True
scan_radius = 20
reset_noise = (2.0, 2.0)

### Hyperparams

In [None]:
gamma =                 0.75
horizon =               1000
model_version =         'base'
model_path =            os.path.join(model_name, model_version)

# learning rate scheduling
start_lr, end_lr = 0.00050, 0.000040
lr_mapper=fb.REMAP((-0.2,1), (start_lr, end_lr)) # set learn rate schedluer
def lr_schedule(progress): return lr_mapper.in2map(1-progress) #lr

# Training

### prepare

In [None]:

# initial state distribution - uniformly sample from all listed states
initial_state_keys =  db.isd[global_isd] # [db.isd[db.isd_keys[0]]] #[v for k,v in db.isd.items()] 
print(f'Total Initial States: {len(initial_state_keys)}')

# build training env
training_env = db.envF(
  testing=False, 
  scan_radius=scan_radius, 
  reset_noise=reset_noise, 
  horizon=horizon, 
  scheme=reward_scheme, 
  delta_reward=delta_reward, 
  point_list=initial_state_keys, 
  state_history=False)


#<---- optinally check
fb.check_env(training_env) 

In [None]:
training_env.reset()
_=training_env.render()

### perform training

In [None]:
# start training
training_start_time = fb.common.now()
print(f'Training @ [{model_path}]')
model = fb.PPO(policy=      'MlpPolicy', 
        env=                training_env, 
        learning_rate =     lr_schedule,
        n_steps=            n_steps,
        batch_size =        batch_size,
        n_epochs =          n_epochs,
        gamma =             gamma,
        gae_lambda=         0.95,
        clip_range=         0.20, 
        clip_range_vf=      None, 
        normalize_advantage=True, 
        ent_coef=           0.0, 
        vf_coef=            0.5, 
        max_grad_norm=      0.5, 
        use_sde=            False, 
        sde_sample_freq=    -1, 
        target_kl=          None, 
        tensorboard_log=    None, 
        create_eval_env=    False, 
        verbose=            0, 
        seed=               None, 
        device=             'cpu', 
        _init_setup_model=  True,
        policy_kwargs=dict(
                        activation_fn=  nn.LeakyReLU, 
                        net_arch=[dict(
                            pi=[512, 512, 300], 
                            vf=[512, 512, 300])])) #256, 256, 256, 128, 128

model.learn(total_timesteps=total_timesteps,log_interval=int(0.1*total_timesteps))
model.save(model_path)
training_end_time = fb.common.now()
print(f'Finished!, Time-Elapsed:[{training_end_time-training_start_time}]')

# Testing

In [None]:
model = fb.PPO.load(model_path)
model, model_path

### prepare

In [None]:

# initial state distribution - uniformly sample from all listed states
initial_state_keys =  db.isd[global_isd] # [db.isd[db.isd_keys[0]]] #[v for k,v in db.isd.items()] 
print(f'Total Initial States: {len(initial_state_keys)}')

# build training env
testing_env = db.envF(
  testing=True, 
  scan_radius=scan_radius, 
  reset_noise=reset_noise, 
  horizon=horizon, 
  scheme=reward_scheme, 
  delta_reward=delta_reward, 
  point_list=initial_state_keys, 
  state_history=False)



### perform testing

In [None]:
print(f'Testing @ [{model_path}]')
average_return, total_steps, sehist, tehist = fb.TEST(
    env=testing_env, 
    model=model, 
    episodes=1, 
    steps=0, 
    deterministic=True, 
    render_as='None', 
    save_dpi='figure', 
    make_video=False,
    video_fps=1,
    render_kwargs=dict(local_sensors=True, reward_signal=True, fill_bots=False, state_hist_marker='o'),
    starting_state=None,
    plot_results=0,
    start_n=0,
    reverb=1,
    plot_end_states=False,
    save_states='',
    save_prefix=''
)
print(f'{average_return=}, {total_steps=}, {sehist=}, {len(tehist)=}')




In [None]:
testing_env.reset()

In [None]:
_=testing_env.render()

In [None]:
testing_env.step(np.array([0, 0,   0, 0,   0, 0,    0, 0,    0, -1,    0, 0], dtype=np.float32))

In [None]:
from scipy.spatial import ConvexHull, convex_hull_plot_2d

In [None]:
hull = ConvexHull(testing_env.xy)
print(len(hull.vertices), testing_env.N_BOTS, int(len(hull.vertices)==testing_env.N_BOTS))
convex_hull_plot_2d(hull)