In [None]:
from satellite_gym.envs.satellite_env import SatelliteEnv
# from satellite_gym.envs.satellite_env import SatelliteEnvV0 as SatelliteEnv

In [None]:
from sklearn.preprocessing import MinMaxScaler
from satellite_gym.envs.satellite_env.satellite_env import TRAIN_COLUMNS, TEST_COLUMNS

from pathlib import Path
import pandas as pd
from random import randint
import numpy as np

COLUMNS = ['id', 'sat_id', 'x', 'y', 'z', 'Vx', 'Vy', 'Vz', 'x_sim', 'y_sim', 'z_sim', 'Vx_sim', 'Vy_sim', 'Vz_sim']
SATELLITES_NUM = 300

df = pd.read_csv(Path('./data/train.csv'), index_col='id', usecols=COLUMNS, dtype=np.float64)

df["orbit_sim"] = df.apply(lambda x: np.sqrt(x['x_sim']**2 + x['y_sim']**2 + x['z_sim']**2), axis=1)
df["V_sim"] = df.apply(lambda x: np.sqrt(x['Vx_sim']**2 + x['Vy_sim']**2 + x['Vz_sim']**2), axis=1)

columns = TEST_COLUMNS + TRAIN_COLUMNS

scaler = MinMaxScaler((-10,10))
df[columns] = scaler.fit_transform(df[columns])

# sat_id = randint(0, SATELLITES_NUM)
sat_id = 41
# df = df[df['sat_id'] == sat_id]  # take random satellite
# df.drop('sat_id', inplace=True, axis=1)

# df.reset_index(drop=True, inplace=True)

In [None]:
sat_id

In [None]:
df.describe()

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d, Axes3D #<-- Note the capitalization! 
from satellite_gym.envs.satellite_env.satellite_env import TRAIN_COLUMNS, TEST_COLUMNS

env = SatelliteEnv(df)
COLS = ['x_sim', 'y_sim', 'z_sim']

arr1=env.df[TRAIN_COLUMNS].values
arr2=env.df[TEST_COLUMNS].values

fig = plt.figure()
ax = Axes3D(fig) #<-- Note the difference from your original code...
ax.scatter(xs=arr1[:,:1], ys=arr1[:,1:2], zs=arr1[:,2:3], marker='o')
ax.scatter(xs=arr2[:,:1], ys=arr2[:,1:2], zs=arr2[:,2:3], marker='^')

plt.show()

In [None]:
import ray

ray.init(num_cpus=12, num_gpus=1, memory=1024 * 1024 * 1024 * 20, object_store_memory=1024 * 1024 * 1024 * 20)

In [None]:
from ray.tune.registry import register_env

env = SatelliteEnv(df, sat_id=sat_id)
register_env("SatelliteEnv-v2", lambda x: env)

In [None]:
env.df

In [None]:
import ray.rllib.agents.a3c as a3c
from ray.tune.logger import pretty_print

config = a3c.DEFAULT_CONFIG.copy()
config['model']['use_lstm'] = True
config["num_gpus"] = 1
config["num_workers"] = 9
config["eager"] = False
trainer = a3c.A3CTrainer(config=config, env="SatelliteEnv-v2")

for i in range(200):
    # Perform one iteration of training the policy with PPO
    result = trainer.train()
    print(pretty_print(result))
    
    if i % 100 == 0:
        checkpoint = trainer.save()
        print("checkpoint saved at", checkpoint)

In [None]:
import ray.rllib.agents.ppo as ppo
from ray.tune.logger import pretty_print


def on_train_result(info):
    result = info["result"]
    if result["episode_reward_mean"] > 44:
        phase = 2
    elif result["episode_reward_mean"] > 22:
        phase = 1
    else:
        phase = 0
    trainer = info["trainer"]
    trainer.workers.foreach_worker(
        lambda ev: ev.foreach_env(
            lambda env: env.set_phase(phase)))

config = ppo.DEFAULT_CONFIG.copy()
config['model']['use_lstm'] = True
config["model"]["vf_share_layers"] = True
# config["lr"] = 0.001
# config["gamma"] = 0.9
# config["lambda"] = 0.9
# config["num_gpus"] = 1
config["num_workers"] = 5
config["num_cpus_per_worker"] = 2
config["num_gpus_per_worker"] = .2
config["seed"] = 0
config["eager"] = False
# config["vf_clip_param"] = 1000.0
# config["vf_loss_coeff"] = 0.
# config["sample_batch_size"] = 200
# config["train_batch_size"] = 1000
# config["batch_mode"] = "complete_episodes"
# config["shuffle_sequences"] = False
# config["entropy_coeff"] = .5
# config["kl_coeff"] = .5
config["callbacks"] = { "on_train_result": on_train_result }

trainer = ppo.PPOTrainer(config=config, env="SatelliteEnv-v2")


for i in range(401):
    # Perform one iteration of training the policy with PPO
    result = trainer.train()
    print(pretty_print(result))
    
    if i % 100 == 0:
        checkpoint = trainer.save()
        print("checkpoint saved at", checkpoint)

In [None]:
from ray import tune
import ray.rllib.agents.ppo as ppo
from ray.tune.schedulers import AsyncHyperBandScheduler
import numpy as np

config = ppo.DEFAULT_CONFIG.copy()
config['model']['use_lstm'] = True
config["model"]["vf_share_layers"] = True

async_hb_scheduler = AsyncHyperBandScheduler(
    time_attr='training_iteration',
    metric='episode_reward_mean',
    mode='max',
    max_t=200,
    grace_period=10,
    reduction_factor=3,
    brackets=3
)


def train(config, reporter):
    trainer = ppo.PPOTrainer(config=config, env="SatelliteEnv-v2")
    while True:
        result = trainer.train()
        reporter(**result)
        if result["episode_reward_mean"] > 44:
            phase = 2
        elif result["episode_reward_mean"] > 22:
            phase = 1
        else:
            phase = 0
        trainer.workers.foreach_worker(
            lambda ev: ev.foreach_env(
                lambda env: env.set_phase(phase)))


tune.run(
    train,
    stop={"training_iteration": 200},
    config={
        "env": "SatelliteEnv-v2",
#         "num_gpus": .2,
        "num_workers": 2,
#         "num_gpus_per_worker": .05,
#         "num_cpus_per_worker": .2,
#         "vf_clip_param": 1000.0,
#         "sample_batch_size": tune.grid_search([200, 400, 1000]),
#         "train_batch_size": tune.grid_search([1000, 2000]),
        "lr": tune.grid_search([0.01, 0.001, 0.0001, 0.00005]),
        "gamma": tune.grid_search(list(np.linspace(0.9, 0.99, 3))),
#         "lambda": tune.grid_search(list(np.linspace(0.9, 0.99, 3))),
#         "vf_loss_coeff": tune.grid_search(list(np.linspace(0, 1, 5))),
#         "kl_coeff": tune.grid_search([0.1, .2, .5]),
#         "entropy_coeff": tune.grid_search([0., .5, 1]),
        "eager": False,
        "seed": 0,
        "batch_mode": tune.grid_search(["truncate_episodes", "complete_episodes"]),
        "shuffle_sequences": tune.grid_search([False, True]),
        "model": config["model"]
    }, scheduler=async_hb_scheduler,
    resources_per_trial={
        "cpu": 1,
        "gpu": .2,
        "extra_cpu": 1,
    },
)

In [None]:
from ray.tune import Analysis
import pandas as pd
analysis = Analysis("~/ray_results/PPO")
pd.set_option('display.max_rows', 100)


In [None]:
df = analysis.dataframe(metric="episode_reward_mean", mode="max")
df.sort_values(["episode_reward_mean"], ascending=False).iloc[0].loc["experiment_tag"]

In [None]:
df.sort_values(["episode_reward_mean"], ascending=False).iloc[1].loc["experiment_tag"]

In [None]:
df.sort_values(["episode_reward_mean"], ascending=False).iloc[2].loc["experiment_tag"]

In [None]:
df.sort_values(["episode_reward_mean"], ascending=False).iloc[3].loc["experiment_tag"]

In [None]:
ray.shutdown()

In [None]:
df.to_csv('trained_example.csv')

In [None]:
from pathlib import Path

trainer.restore('/home/golemxiv/ray_results/PPO_SatelliteEnv-v2_2020-01-23_12-33-45fmpi6yi3/checkpoint_2001/checkpoint_2001')

In [None]:
trainer.get_policy().export_model('trained_model_v1')


In [None]:
df.head()

In [None]:
sat_id

In [None]:
state = env.reset()

In [None]:
import numpy as np
# obs = np.zeros((256), dtype=np.float32)
obs = np.squeeze(env.df.head(256).values)

In [None]:
np.squeeze(df.head(1).values).shape

In [None]:
np.squeeze(env.df.head(1).values).shape

In [None]:
from satellite_gym.envs.satellite_env.satellite_env import TRAIN_COLUMNS, TEST_COLUMNS
trainer.get_policy().compute_single_action(obs=np.squeeze(env.df[TRAIN_COLUMNS].head(1).values), state=trainer.get_policy().get_initial_state())

In [None]:
true_value = env.df[['Vx', 'Vy', 'Vz']].values

policy = trainer.get_policy()
state = policy.get_initial_state()
predicted_value = np.empty((1, 3,))
for i in range(len(true_value)):
#     if len(predicted_value) == 1:
#         predicted_value = np.array([env.df[['Vx_sim', 'Vy_sim', 'Vz_sim']].loc[i].values])
    val = policy.compute_single_action(np.squeeze(env.df[TRAIN_COLUMNS].loc[i].values), state=state)
    state = val[1]
    predicted_value = np.append(predicted_value, [val[0][3:]], axis=0)

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d, Axes3D #<-- Note the capitalization! 


fig = plt.figure()
ax = Axes3D(fig) #<-- Note the difference from your original code...
ax.scatter(xs=true_value[:,:1], ys=true_value[:,1:2], zs=true_value[:,2:3], marker='o')
ax.scatter(xs=predicted_value[:,:1], ys=predicted_value[:,1:2], zs=predicted_value[:,2:3], marker='^')
ax.view_init(elev=10., azim=20)
plt.show()
# for ii in range(0,360,1):
#         ax.view_init(elev=10., azim=ii)
#         fig.savefig("movie/movie%d.png" % ii)


#ax + by + cz + d = 0

In [None]:
len(true_value)

In [None]:
fig = go.Figure(data=[go.Scatter3d(x=true_value[:,:1], y=true_value[:,1:2], z=true_value[:,2:3], mode='markers')])
fig.write_image("figure.png")