# Example Script for the Senior Model

This is the example script of the Senior model and is based on the previous exploration of the teacher, tutor, junior  script. Note that in order to run this Notebook, ray needs to be working !

In [3]:
import os
import grid2op
from lightsim2grid import LightSimBackend
import numpy as np
from pathlib import Path
import ray
import ray.tune as tune
import tensorflow as tf
from ray.tune.schedulers import PopulationBasedTraining
from ray.rllib.models import ModelCatalog
import pickle
import random
import json
example_path = os.getcwd()

Again we test the Grid2Op Environment. Additionally, we already init() ray

In [4]:
backend = LightSimBackend()
env_path = test_data_path = Path(example_path)/ "data" / "training_data_track1"
print(env_path)
env = grid2op.make(env_path, backend=backend)
env.set_id(1)
env.reset()
env.chronics_handler.get_name()
obs = env.get_obs()

if ray.is_initialized:
    ray.shutdown()
ray.init()

numba cannot be imported and numba functions are disabled.
Probably the execution is slow.
Please install numba to gain a massive speedup.

numba cannot be imported and numba functions are disabled.
Probably the execution is slow.
Please install numba to gain a massive speedup.

2023-05-22 21:53:02,962	INFO worker.py:1528 -- Started a local Ray instance.


0,1
Python version:,3.9.16
Ray version:,2.1.0


## Training based on the more complex Junior model.

In this example, we show the implementation of the hyper-paramter optimized Junior model by using the
```AdvancedCustomModel```. Note that  one can simply use the ```Grid2OpCustomModel``` if the simple Junior was trained.

In [5]:
with open(Path(example_path) / "data" /'scaler_junior.pkl', "rb") as fp:   #Pickling
    scaler = pickle.load(fp)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Under consideration that ray has sometimes trouble finding the scaler with one of its workers, we advice to use the ```ray.put()``` and ```ray.get()``` option.

In [6]:
ray_scal = ray.put(scaler)

Now we have to initialize both the config of the environment, as well as of the model

In [13]:
action_path1 = Path(example_path) / "data" / "actionspace_nminus1.npy"
action_path2 = Path(example_path) / "data" / "actionspace_tuples.npy"
action_set = [action_path1, action_path2]
model_path = Path(example_path) / "data" / "junior_model"
data_path = Path(example_path) / "data"  / "junior_experience"
target = Path(example_path) / "data" / "junior_model"/ "hyperparam"
hyperparam_path = Path(example_path) / "data" / 'junior_best_params.json'

# Again define best_params
with open(hyperparam_path) as json_file:
    best_params = json.load(json_file)

best_params["epochs"] = 1000
best_params["initializer"] = tf.keras.initializers.Orthogonal()
for name in ["layer1","layer2","layer3","layer4"]:
        best_params[name] = np.round(best_params[name])

print(best_params)

# Env Config
env_config = {"action_space_path":action_set,
              "env_path": env_path,
              "action_threshold":0.95,
              'filtered_obs':True,
              'scaler': ray_scal}

# Model Config:
model_config = {"path_to_junior":target,
                "custom_config":best_params}

{'activation': 'relu', 'batchsize': 256, 'dropout1': 0.19492379978847293, 'dropout2': 0.1259373123876904, 'initializer': <keras.initializers.initializers_v2.Orthogonal object at 0x0000014E9E2F70A0>, 'layer1': 1415.0, 'layer2': 1252.0, 'layer3': 1485.0, 'layer4': 232.0, 'learning_rate': 0.0003013467240068926, 'TRIAL_BUDGET': 11, 'epochs': 1000}


Testing whether we can load the model. Then, we initialize the model config

In [14]:
from curriculumagent.senior.rllib_execution.senior_env_rllib import SeniorEnvRllib
from curriculumagent.senior.rllib_execution.senior_model_rllib import Grid2OpCustomModel

## Testing both the environment and the model

Scip these methods if you want to start directly with the ray optimization.

Note that you can check, whether your scaler works.

In [15]:
# Test Environment:
env =  SeniorEnvRllib(env_config)
print(env.action_space.n)
print(env.observation_space.shape)

# Run "simulation":
done = False
while done==False:
    act = random.choice(np.arange(env.action_space.n))
    obs,b,done,d = env.step(act)
    print(act,done,b,env.step_in_env,max(obs),min(obs),env.single_env.nb_time_step)

numba cannot be imported and numba functions are disabled.
Probably the execution is slow.
Please install numba to gain a massive speedup.

numba cannot be imported and numba functions are disabled.
Probably the execution is slow.
Please install numba to gain a massive speedup.



AttributeError: 'SeniorEnvRllib' object has no attribute 'actions'

model:

In [None]:
# Test Model:
model = Grid2OpCustomModel(obs_space = env.observation_space,
                           action_space = env.action_space,
                           num_outputs = env.action_space.n,
                           model_config = {},
                           path_to_junior =target,
                           custom_config = best_params,
                           name="Junior")
print(model.base_model.summary())
obs_dict = {"obs": obs.reshape(1,-1)}
out = model.forward(input_dict = obs_dict, state=1, seq_lens=None)
out

## Run Rllib experiment to collect the Senior Model

We run the senior experiment. Please adjust the number of cpus, gpus and workers based on your computational power.

In [None]:
# Register the model
ModelCatalog.register_custom_model('Senior',  Grid2OpCustomModel)

In [None]:
NUM_WORKER = os.cpu_count() - 1

In [None]:
pbt = PopulationBasedTraining(
        time_attr="training_iteration",
        metric="episode_reward_mean",
        mode="max",
        perturbation_interval=50,
        resample_probability=0.5,
        # Specifies the mutations of these hyperparams
        hyperparam_mutations={
            "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
            "num_sgd_iter": lambda: random.randint(3, 10),
            "vf_loss_coeff": lambda: random.uniform(0.5,1),
            "clip_param": lambda: random.uniform(0.01, 0.5),
            "gamma": lambda: random.uniform(0.975, 1),
            "entropy_coeff": lambda: 10**-random.uniform(2,5)
        })

tune.run(
    "PPO",
    checkpoint_freq=5,
    scheduler= pbt,
    keep_checkpoints_num =30, 
    verbose = 1,
    max_failures=3, 
    num_samples=1,
    local_dir="~/ray_results",
    stop={"training_iteration": 100},
    config={
        "env":  SeniorEnvRllib,
        "env_config":env_config,
        "num_workers":NUM_WORKER,
        "num_envs_per_worker": 1,
        "lr": 5e-5,
        "num_gpus":0.0,
        "num_cpus_per_worker": 1,
        "remote_worker_envs": False,
        "model":{"custom_model":  "binbinchen","custom_model_config": model_config},
    },
)

In [None]:
isinstance(ray_scal,ray._raylet.ObjectRef)

## Select and convert checkpoint

After the training is completed, one to collect the saved model

Note, you have to specify the correct checkpoint dir in ```"~/ray_results"```

In [None]:
from curriculumagent.senior.rllib_execution.convert_rllib_ckpt import load_config,load_and_save_model

In [None]:
example_dir = "/ray_results/PPO/PPO_SeniorEnvRllib_6c729_00001_1_2022-04-06_15-28-19"
ckpt_path = Path(example_dir) / "checkpoint_000675"/"checkpoint-675"
save_path =  Path(example_path) /"senior_model"

First we load the config of the ray experiment and overwrite the class values with the actual classes:

In [None]:
config = load_config(ckpt_path)
config['env_config'] = env_config
config["model"]['custom_model_config']['custom_config'] = model_config

Now run and save the model

In [None]:
load_and_save_model(ckpt_path = ckpt_path,config =config, save_path = save_path, ckpt_nr = 675)

## Submission:

In order to correctly submit a working agent, the tensorflow model now only needs to be placed in a submission
folder, together with the action set. An example can be found in ```my_agent_advanced.py```