# Deep Reinforcement Learning for IoT Interoperability

---

## Imports

In [None]:
import warnings
warnings.filterwarnings('ignore') 
import gym
from gym.envs.registration import register
from src.PVGBaseline import PVGBaseline
from src.PVGBaselineClassic import PVGBaselineClassic
from wrappers import GloveObsWrapper, RestfulCartPoleV0Wrapper, FloorObsWrapper, TokenizeObsWrapper,RestfulAcrobotV1WrapperV1,Acrobot_v1_reward_modified

## Prepare Environment and Model

There are two simple policy gradient algorithms provided to train the extended OpenAI Gym environments (CartPole-v0, Acrobot-v1).
1. **PVGBaseline** to train on native Gym environments
2. **PVGBaselineClassic** to train on native Gym enivornments encoded to produce IoT-like observations


Parameters used for training:

In [None]:
lr = 1e-4
epochs = 500
batch_size = 15000

## Environment registration

Modified max_epside_steps for CartPole-v0

In [None]:
register(
    id='CartPole-mod-v0',
    entry_point='gym.envs.classic_control:CartPoleEnv',
    max_episode_steps=500,
    reward_threshold=195.0,
)

Modified reward function for Acrobot-v1

In [None]:
register(
    id='Acrobot-mod-v1',
    entry_point='wrappers:Acrobot_v1_reward_modified',
    reward_threshold=500.0,
    max_episode_steps=500,
)

---


## IoT CartPole-v0 JSON
Initialize PVG Baseline model

In [None]:
model = PVGBaseline()

Create encoded environment

In [None]:
env = GloveObsWrapper(
            TokenizeObsWrapper(
                RestfulCartPoleV0Wrapper(
                    FloorObsWrapper(
                        gym.make("CartPole-mod-v0"),100
                    ),
                "application/vnd.cartpole.cart+json", 
                "application/vnd.cartpole.pole+json"
                )
             ),
        "weights/glove.6B.50d.txt"
)

Train CartPole-v0 PVG model on JSON

In [None]:
returns, returns_std = model.train(env=env, lr=lr,epochs=epochs, batch_size=batch_size)

## IoT CartPole-v0 XML
Initialize PVG Baseline model

In [None]:
model = PVGBaseline()

Create encoded environment

In [None]:
env = GloveObsWrapper(
            TokenizeObsWrapper(
                RestfulCartPoleV0Wrapper(
                    FloorObsWrapper(
                        gym.make("CartPole-mod-v0"),100
                    ),
                "application/vnd.cartpole.cart+XML", 
                "application/vnd.cartpole.pole+XML"
                )
             ),
        "weights/glove.6B.50d.txt"
)

Train CartPole-v0 PVG model on XML

In [None]:
returns, returns_std = model.train(env=env, lr=lr,epochs=epochs, batch_size=batch_size)

## IoT Acrobot-v1 JSON
Initialize PVG Baseline model

In [None]:
model = PVGBaseline()

Create encoded environment

In [None]:
env = GloveObsWrapper(
            TokenizeObsWrapper(
                RestfulAcrobotV1WrapperV1(
                    FloorObsWrapper(
                        gym.make("Acrobot-mod-v1"),100
                    ),
                    "application/vnd.acrobot.joint+json",
                    "application/vnd.acrobot.joints+json")
            ),
    "weights/glove.6B.50d.txt"
)

Train Acrobot-v1 PVG model on JSON

In [None]:
returns, returns_std = model.train(env=env, lr=lr,epochs=epochs, batch_size=batch_size)

## IoT Acrobot-v1 XML
Initialize PVG Baseline model

In [None]:
model = PVGBaseline()

Create encoded environment

In [None]:
env = GloveObsWrapper(
            TokenizeObsWrapper(
                RestfulAcrobotV1WrapperV1(
                    FloorObsWrapper(
                        gym.make("Acrobot-mod-v1"),100
                    ),
                    "application/vnd.acrobot.joint+xml",
                    "application/vnd.acrobot.joints+xml")
            ),
    "weights/glove.6B.50d.txt"
)

Train Acrobot-v1 PVG model on XML

In [None]:
returns, returns_std = model.train(env=env, lr=lr,epochs=epochs, batch_size=batch_size)

---

## Classic CartPole-v0

Initialize PVG Baseline model for classic environment

In [None]:
model = PVGBaselineClassic()

Create classic CartPole environment

In [None]:
env = gym.make("CartPole-mod-v0")

Train PVG classic model on CartPole

In [None]:
returns, returns_std = model.train(env=env, lr=lr,epochs=epochs, batch_size=batch_size)

## Classic Acrobot-v1

Initialize PVG Baseline model for classic environment

In [None]:
model = PVGBaselineClassic()

Create classic Acrobot environment

In [None]:
env = gym.make("Acrobot-mod-v1")

Train PVG classic model on Acrobot

In [None]:
returns, returns_std = model.train(env=env, lr=lr,epochs=epochs, batch_size=batch_size)

---