LxMLS · antoniogois · Jul 17, 2019
diff --git a/labs/notebooks/reinforcement_learning/exercise_5.ipynb b/labs/notebooks/reinforcement_learning/exercise_5.ipynb
@@ -9,188 +9,113 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "As a last exercise, apply what you have learned to the RNN model seen in previous days. Implement REINFORCE to replace the maximum likelihood loss used on the RNN day. For this you can modify the PolicyRNN class in lxmls/deep learning/pytorch\\_models/rnn.py"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### WSJ Data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Load Part-of-Speech data \n",
+    "# Load Part-of-Speech data\n",
     "from lxmls.readers.pos_corpus import PostagCorpusData\n",
-    "data = PostagCorpusData()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Train model with Cross entropy"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print( data.input_size)\n",
-    "print( data.output_size)"
+    "data = PostagCorpusData()\n",
+    "# Get batch iterators for train and test\n",
+    "batch_size = 1\n",
+    "train_batches = data.batches('train', batch_size=batch_size)\n",
+    "dev_set = data.batches('dev', batch_size=batch_size)\n",
+    "test_set = data.batches('test', batch_size=batch_size)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 4,
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
-    "# Alterbative native CuDNN native implementation of RNNs\n",
-    "from lxmls.deep_learning.pytorch_models.rnn import FastPytorchRNN\n",
-    "model = FastPytorchRNN(\n",
-    "    input_size=data.input_size,\n",
-    "    embedding_size=50,\n",
-    "    hidden_size=20,\n",
-    "    output_size=data.output_size,\n",
-    "    learning_rate=0.1\n",
+    "reinforce = True\n",
+    "# Load version of the RNN\n",
+    "from lxmls.deep_learning.pytorch_models.rnn import PolicyRNN\n",
+    "model = PolicyRNN(\n",
+    "input_size=data.input_size,\n",
+    "embedding_size=50,\n",
+    "hidden_size=100,\n",
+    "output_size=data.output_size,\n",
+    "learning_rate=0.05,\n",
+    "gamma=0.8,\n",
+    "RL=reinforce,\n",
+    "maxL=data.maxL\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
-   "source": [
-    "num_epochs = 10"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1: dev accuracy 79.95 %\n",
+      "Epoch 2: dev accuracy 85.14 %\n",
+      "Epoch 3: dev accuracy 85.20 %\n",
+      "Epoch 4: dev accuracy 85.20 %\n",
+      "Epoch 5: dev accuracy 85.20 %\n",
+      "Epoch 6: dev accuracy 85.20 %\n",
+      "Epoch 7: dev accuracy 85.23 %\n",
+      "Epoch 8: dev accuracy 85.20 %\n",
+      "Epoch 9: dev accuracy 85.23 %\n",
+      "Epoch 10: dev accuracy 85.20 %\n",
+      "Epoch 11: dev accuracy 85.23 %\n",
+      "Epoch 12: dev accuracy 85.20 %\n",
+      "Epoch 13: dev accuracy 85.23 %\n",
+      "Epoch 14: dev accuracy 85.20 %\n",
+      "Epoch 15: dev accuracy 85.23 %\n",
+      "Training took 3.39 seconds per epoch\n",
+      "Test accuracy 84.11 %\n"
+     ]
+    }
+   ],
    "source": [
     "import numpy as np\n",
     "import time\n",
     "\n",
+    "# Run training\n",
+    "num_epochs = 15\n",
+    "batch_size = 1\n",
+    "\n",
     "# Get batch iterators for train and test\n",
-    "train_batches = data.batches('train', batch_size=1)\n",
-    "dev_set = data.batches('dev', batch_size=1)\n",
-    "test_set = data.batches('test', batch_size=1)\n",
+    "train_batches = data.sample('train', batch_size=batch_size)\n",
+    "dev_set = data.sample('dev', batch_size=batch_size)\n",
+    "test_set = data.sample('test', batch_size=batch_size)\n",
     "\n",
     "# Epoch loop\n",
     "start = time.time()\n",
     "for epoch in range(num_epochs):\n",
-    "\n",
     "    # Batch loop\n",
     "    for batch in train_batches:\n",
     "        model.update(input=batch['input'], output=batch['output'])\n",
-    "\n",
     "    # Evaluation dev\n",
     "    is_hit = []\n",
     "    for batch in dev_set:\n",
-    "        is_hit.extend(model.predict(input=batch['input']) == batch['output'])\n",
-    "    accuracy = 100*np.mean(is_hit)\n",
-    "\n",
+    "        loss = model.predict_loss(batch['input'], batch['output'])\n",
+    "        is_hit.extend(loss)\n",
+    "    accuracy = 100 * np.mean(is_hit)\n",
     "    # Inform user\n",
-    "    print(\"Epoch %d: dev accuracy %2.2f %%\" % (epoch+1, accuracy))\n",
-    "\n",
-    "print(\"Training took %2.2f seconds per epoch\" % ((time.time() - start)/num_epochs))    \n",
+    "    print(\"Epoch %d: dev accuracy %2.2f %%\" % (epoch + 1, accuracy))\n",
     "    \n",
+    "print(\"Training took %2.2f seconds per epoch\" % ((time.time() - start) / num_epochs))\n",
     "# Evaluation test\n",
     "is_hit = []\n",
     "for batch in test_set:\n",
-    "    is_hit.extend(model.predict(input=batch['input']) == batch['output'])\n",
-    "accuracy = 100*np.mean(is_hit)\n",
-    "\n",
-    "# Inform user\n",
-    "print(\"Test accuracy %2.2f %%\" % accuracy)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Train model with [LOSS REQUIRING RL]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Example of sampling\n",
-    "print(train_batches[3]['input'])\n",
-    "samples, log_probs = model._sample(input=train_batches[3]['input'])\n",
-    "samples, log_probs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Epoch loop\n",
-    "start = time.time()\n",
-    "for epoch in range(num_epochs):\n",
-    "\n",
-    "    # Batch loop\n",
-    "    for batch in train_batches:\n",
-    "        # TODO: Use this here to create an RL inside model.update()\n",
-    "        samples, log_probs = model._sample(input=batch['input']) #sample actions and its neg log probs\n",
-    "        raise NotImplementedError\n",
-    "\n",
-    "    # Evaluation dev\n",
-    "    is_hit = []\n",
-    "    for batch in dev_set:\n",
-    "        is_hit.extend(model.predict(input=batch['input']) == batch['output'])\n",
-    "    accuracy = 100*np.mean(is_hit)\n",
-    "\n",
-    "    # Inform user\n",
-    "    print(\"Epoch %d: dev accuracy %2.2f %%\" % (epoch+1, accuracy))\n",
-    "\n",
-    "print(\"Training took %2.2f seconds per epoch\" % ((time.time() - start)/num_epochs))    \n",
-    "    \n",
-    "# Evaluation test\n",
-    "is_hit = []\n",
-    "for batch in test_set:\n",
-    "    is_hit.extend(model.predict(input=batch['input']) == batch['output'])\n",
-    "accuracy = 100*np.mean(is_hit)\n",
+    "    is_hit.extend(model.predict_loss(batch['input'],batch['output']))\n",
+    "accuracy = 100 * np.mean(is_hit)\n",
     "\n",
     "# Inform user\n",
     "print(\"Test accuracy %2.2f %%\" % accuracy)"
    ]
   }
  ],
  "metadata": {
-  "git": {
-   "suppress_outputs": true
-  },
   "kernelspec": {
    "display_name": "Python 3",
    "language": "python",
@@ -206,9 +131,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.7.3"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}