<a href="https://colab.research.google.com/github/Roniebin/Armigo/blob/mingyu/Panda_Robot_DDPG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Robot Reacher Task using DDPG

Robot Environment: https://panda-gym.readthedocs.io/en/latest/

# Utils

To view the simulation

In [None]:
!apt install -y python-opengl ffmpeg > /dev/null 2>&1
%pip install pyvirtualdisplay panda-gym==2.0.0 stable_baselines3 sb3_contrib

Collecting pyvirtualdisplay
  Downloading PyVirtualDisplay-3.0-py3-none-any.whl (15 kB)
Collecting panda-gym==2.0.0
  Downloading panda_gym-2.0.0-py3-none-any.whl (26 kB)
Collecting stable_baselines3
  Downloading stable_baselines3-2.3.0-py3-none-any.whl (182 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.1/182.1 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sb3_contrib
  Downloading sb3_contrib-2.3.0-py3-none-any.whl (80 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m80.3/80.3 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
Collecting pybullet (from panda-gym==2.0.0)
  Downloading pybullet-3.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (103.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m103.2/103.2 MB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
Collecting gymnasium<0.30,>=0.28.1 (from stable_baselines3)
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━

In [None]:
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1024, 768))
display.start()


from matplotlib import pyplot as plt, animation
%matplotlib inline
from IPython import display

def create_anim(frames, dpi, fps):
    plt.figure(figsize=(frames[0].shape[1] / dpi, frames[0].shape[0] / dpi), dpi=dpi)
    patch = plt.imshow(frames[0])
    def setup():
        plt.axis('off')
    def animate(i):
        patch.set_data(frames[i])
    anim = animation.FuncAnimation(plt.gcf(), animate, init_func=setup, frames=len(frames), interval=fps)
    return anim

def display_anim(frames, dpi=72, fps=50):
    anim = create_anim(frames, dpi, fps)
    return anim.to_jshtml()

def save_anim(frames, filename, dpi=72, fps=50):
    anim = create_anim(frames, dpi, fps)
    anim.save(filename)


class trigger:
    def __init__(self):
        self._trigger = True

    def __call__(self, e):
        return self._trigger

    def set(self, t):
        self._trigger = t

# Robot Environment

In [None]:
import gym
import panda_gym
import pprint

env = gym.make('PandaReach-v2')

In [None]:
obs = env.reset()
print(obs)

In [None]:
frames = []

obs = env.reset()
done = False

while not done:
    current_position = obs["observation"][0:3]
    desired_position = obs["desired_goal"][0:3]
    action = 5.0 * (desired_position - current_position)
    frames.append(env.render(mode = 'rgb_array'))
    obs, reward, done, info = env.step(action)

env.close()

In [None]:
display.HTML(display_anim(frames))

# DDPG - Deep Deterministic Policy Gradient

## Training

In [None]:
import gym
import panda_gym
import numpy as np
from stable_baselines3 import DDPG, HerReplayBuffer
from stable_baselines3 .common.noise import NormalActionNoise
from sb3_contrib.common.wrappers import TimeFeatureWrapper

rb_kwargs = {'online_sampling' : True,
             'goal_selection_strategy' : 'future',
             'n_sampled_goal' : 4}

policy_kwargs = {'net_arch' : [512, 512, 512],
                 'n_critics' : 2}

n_actions = env.action_space.shape[0]
noise = NormalActionNoise(mean = np.zeros(n_actions), sigma = 0.1 * np.ones(n_actions))

env = gym.make("PandaReach-v2")
env = TimeFeatureWrapper(env)

model = DDPG(policy="MultiInputPolicy", env=env, replay_buffer_class=HerReplayBuffer, verbose=1,
             gamma = 0.95, batch_size= 2048, buffer_size=100000, replay_buffer_kwargs = rb_kwargs,
             learning_rate = 1e-3, action_noise = noise, policy_kwargs = policy_kwargs)
model.learn(1e6)
model.save('pick_place/model')

## Testing

In [None]:
model = DDPG.load("pick_place/model", env = env)

frames = []
for _ in range(10):
    done = False
    observation = env.reset()
    while not done:
        action, _states = model.predict(observation, deterministic = True)
        observation, reward, done, info = env.step(action)
        frame = env.render(mode='rgb_array')
        frames.append(frame)

env.close()

In [None]:
display.HTML(display_anim(frames))