In [None]:
# importing the necessary libraries
import gym
import myosuite
from stable_baselines3 import PPO
from gym.envs.registration import register
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
from time import sleep

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# environment registration
ENV_NAME = 'Reach-v0'
MODEL_PATH = 'C:/Personal/Second year/Sem 2/Project Course/Submission/finger/myofinger_v0.xml'
register(id=ENV_NAME,
        entry_point='myosuite.envs.myo.myobase.reach_v0:ReachEnvV0',
        max_episode_steps=200,
        kwargs={
            'model_path': MODEL_PATH,
            'target_reach_range': {'IFtip': ((0.2, 0.05, 0.20), (0.2, 0.05, 0.20))},
            'normalize_act': True
        }
)

In [None]:
# initialising the environment
env = gym.make('Reach-v0', seed=1)
obs = env.reset()

# initialising the evaluation environment
eval_env = gym.make('Reach-v0', seed=1)
eval_env.reset()

# Callback to test the performance
eval_callback = EvalCallback(eval_env, best_model_save_path='./logs/',
                             log_path='./logs/', eval_freq=500,
                             deterministic=True, render=False)

In [None]:
# arrays for storing the coordinates of the trajectory
observations_x=[0.2]
observations_y=[0.05]
observations_z=[0.20]

In [None]:
# RL model
model = PPO("MlpPolicy", env, verbose=1, device=device)
model.learn(total_timesteps=100000, callback=eval_callback)
model.save("myofinger_model")
model.load("myofinger_model")

# loop to carry out the simulation
done = False
while not done:
    act, _ = model.predict(obs)
    sleep(0.1)
    obs, reward, done, info = env.step(act)
    observations_x.append(env.sim.data.site('IFtip').xpos[0])
    observations_y.append(env.sim.data.site('IFtip').xpos[1])
    observations_z.append(env.sim.data.site('IFtip').xpos[2])
    print(done, act, obs)
    env.mj_render()

In [None]:
#plotting the coordinates of the trajectory
plt.plot(observations_x[0], 'o')
plt.plot(observations_x)
plt.title('X coordinate')

In [None]:
plt.plot(observations_y[0], 'o')
plt.plot(observations_y)
plt.title('Y coordinate')

In [None]:
plt.plot(observations_z[0], 'o')
plt.plot(observations_z)
plt.title('Z coordinate')