Merge pull request #10 from AlexanderJYu/quadrotor_notebook

planarquadrotor notebook
MinRegret · Dec 10, 2020 · f3c3884 · f3c3884
2 parents f23be8e + 43be5bd
commit f3c3884
Show file tree

Hide file tree

Showing 2 changed files with 172 additions and 1 deletion.
diff --git a/deluca/envs/classic/_planar_quadrotor.py b/deluca/envs/classic/_planar_quadrotor.py
@@ -128,7 +128,7 @@ def reset(self):
     def step(self, u):
         self.last_u, self.h = u, self.h + 1
         self.state = self.f(self.state, u)
-        return self.state, self.c(self.state, u)
+        return self.state, self.c(self.state, u), False, {}
 
     def render(self, mode="human"):
         if self.viewer is None:

diff --git a/examples/envs/classic/PlanarQuadrotor_ilqr.ipynb b/examples/envs/classic/PlanarQuadrotor_ilqr.ipynb
@@ -0,0 +1,171 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import jax.numpy as jnp\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import jax\n",
+    "from jax import lax\n",
+    "from deluca.envs import PlanarQuadrotor\n",
+    "from deluca.agents import ILQR\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "W1209 02:34:56.329764 4343987648 xla_bridge.py:131] No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "c = 374122.0319286693\n",
+      "cC = 3861.0703039268997\n",
+      "iLQR : t = 2, c = 3861.070304\n",
+      "c = 3861.0703039268997\n",
+      "cC = 219.25601885420733\n",
+      "iLQR : t = 3, c = 219.256019\n",
+      "c = 219.25601885420733\n",
+      "cC = 69.37626506797743\n",
+      "iLQR : t = 4, c = 69.376265\n",
+      "c = 69.37626506797743\n",
+      "cC = 54.80947011412863\n",
+      "iLQR : t = 5, c = 54.809470\n",
+      "c = 54.80947011412863\n",
+      "cC = 52.380773285980005\n",
+      "iLQR : t = 6, c = 52.380773\n",
+      "c = 52.380773285980005\n",
+      "cC = 51.873685827995054\n",
+      "iLQR : t = 7, c = 51.873686\n",
+      "c = 51.873685827995054\n",
+      "cC = 52.30835658833143\n",
+      "iLQR : t = 8, c = 51.873686\n",
+      "c = 51.873685827995054\n",
+      "cC = 51.818456885242966\n",
+      "iLQR : t = 9, c = 51.818457\n",
+      "c = 51.818456885242966\n",
+      "cC = 52.42826316876129\n",
+      "iLQR : t = 10, c = 51.818457\n"
+     ]
+    }
+   ],
+   "source": [
+    "agent = ILQR()\n",
+    "agent.train(PlanarQuadrotor(placebo=0), 10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def loop(context, x):\n",
+    "    env, agent = context\n",
+    "    control = agent(env.state)\n",
+    "    _, reward, _, _ = env.step(control)\n",
+    "    return (env, agent), reward"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[1. 1. 0. 0. 0. 0.]\n",
+      "reward_forloop = 51.818456885242966\n"
+     ]
+    }
+   ],
+   "source": [
+    " # for loop version\n",
+    "T = 100\n",
+    "env = PlanarQuadrotor(placebo=0)\n",
+    "print(env.reset())\n",
+    "reward = 0\n",
+    "for i in range(T):\n",
+    "    (env, agent), r = loop((env, agent), 0)\n",
+    "    reward += r\n",
+    "reward_forloop = reward\n",
+    "print('reward_forloop = ' + str(reward_forloop))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[1. 1. 0. 0. 0. 0.]\n",
+      "reward_scan sum = 51.81845688524296\n"
+     ]
+    }
+   ],
+   "source": [
+    "# scan version\n",
+    "# env = PlanarQuadrotor(placebo=0)\n",
+    "agent.reset()\n",
+    "print(env.reset())\n",
+    "xs = jnp.array(jnp.arange(T))\n",
+    "_,reward_scan = lax.scan(loop, (env, agent), xs)\n",
+    "\n",
+    "# correctness test\n",
+    "print('reward_scan sum = ' + str(jnp.sum(reward_scan)))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}