# REINFORCE

---

In this notebook, we will train REINFORCE with OpenAI Gym's Cartpole environment.

### 1. Import the Necessary Packages

In [1]:
import gym
gym.logger.set_level(40) # suppress warnings (please remove if gives error)
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline

import torch
torch.manual_seed(0) # set random seed
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical

!python -m pip install pyvirtualdisplay
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1400, 900))
display.start()

is_ipython = 'inline' in plt.get_backend()
if is_ipython:
    from IPython import display

plt.ion()

Collecting pyvirtualdisplay
  Downloading https://files.pythonhosted.org/packages/39/37/f285403a09cc261c56b6574baace1bdcf4b8c7428c8a7239cbba137bc0eb/PyVirtualDisplay-0.2.1.tar.gz
Collecting EasyProcess (from pyvirtualdisplay)
  Downloading https://files.pythonhosted.org/packages/0d/f1/d2de7591e7dfc164d286fa16f051e6c0cf3141825586c3b04ae7cda7ac0f/EasyProcess-0.2.3.tar.gz
Building wheels for collected packages: pyvirtualdisplay, EasyProcess
  Running setup.py bdist_wheel for pyvirtualdisplay ... [?25ldone
[?25h  Stored in directory: /root/.cache/pip/wheels/d1/8c/16/1c64227974ae29c687e4cc30fd691d5c0fd40f54446dde99da
  Running setup.py bdist_wheel for EasyProcess ... [?25ldone
[?25h  Stored in directory: /root/.cache/pip/wheels/b4/c6/e3/c163b04029d8fccfd54b809802640c1af587a01be8d7a04e1a
Successfully built pyvirtualdisplay EasyProcess
Installing collected packages: EasyProcess, pyvirtualdisplay
Successfully installed EasyProcess-0.2.3 pyvirtualdisplay-0.2.1
[33mYou are using pip version

### 2. Define the Architecture of the Policy

In [2]:
env = gym.make('CartPole-v0')
env.seed(0)
print('observation space:', env.observation_space)
print('action space:', env.action_space)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class Policy(nn.Module):
    def __init__(self, s_size=4, h_size=16, a_size=2):
        super(Policy, self).__init__()
        self.fc1 = nn.Linear(s_size, h_size)
        self.fc2 = nn.Linear(h_size, a_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.softmax(x, dim=1)
    
    def act(self, state):
        state = torch.from_numpy(state).float().unsqueeze(0).to(device)
        probs = self.forward(state).cpu()
        m = Categorical(probs)
        action = m.sample()
        return action.item(), m.log_prob(action)

observation space: Box(4,)
action space: Discrete(2)


### 3. Train the Agent with REINFORCE

In [4]:
policy = Policy().to(device)
optimizer = optim.Adam(policy.parameters(), lr=1e-2)

def reinforce(n_episodes=1000, max_t=1000, gamma=1.0, print_every=100):
    scores_deque = deque(maxlen=100)
    scores = []
    for i_episode in range(1, n_episodes+1):
        saved_log_probs = []
        rewards = []
        state = env.reset()
        for t in range(max_t):
            action, log_prob = policy.act(state)
            print(state)
            saved_log_probs.append(log_prob)
            state, reward, done, _ = env.step(action)
            rewards.append(reward)
            if done:
                break 
        scores_deque.append(sum(rewards))
        scores.append(sum(rewards))
        
        discounts = [gamma**i for i in range(len(rewards)+1)]
        R = sum([a*b for a,b in zip(discounts, rewards)])
        
        policy_loss = []
        for log_prob in saved_log_probs:
            policy_loss.append(-log_prob * R)
        policy_loss = torch.cat(policy_loss).sum()
        
        optimizer.zero_grad()
        policy_loss.backward()
        optimizer.step()
        
        if i_episode % print_every == 0:
            print('Episode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))
        if np.mean(scores_deque)>=195.0:
            print('Environment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, np.mean(scores_deque)))
            break
        
    return scores
    
scores = reinforce()

[-0.01032251  0.04644526  0.00232086 -0.04521839]
[-0.00939361 -0.1487099   0.00141649  0.24819588]
[-0.0123678  -0.34385205  0.00638041  0.54132526]
[-0.01924485 -0.5390631   0.01720692  0.8360117 ]
[-0.03002611 -0.34418036  0.03392715  0.54878945]
[-0.03690971 -0.14955102  0.04490294  0.26698612]
[-0.03990074 -0.3452841   0.05024266  0.57348683]
[-0.04680642 -0.15090125  0.0617124   0.29704561]
[-0.04982444 -0.34684616  0.06765331  0.60853541]
[-0.05676137 -0.54284544  0.07982402  0.92173667]
[-0.06761827 -0.73894999  0.09825875  1.23840018]
[-0.08239727 -0.5452178   0.12302675  0.97804521]
[-0.09330163 -0.35194094  0.14258766  0.72639986]
[-0.10034045 -0.54871629  0.15711566  1.06034632]
[-0.11131477 -0.35598434  0.17832258  0.82081149]
[-0.11843446 -0.55303958  0.19473881  1.1638566 ]
[ 0.04123038 -0.02819456  0.0157849  -0.00548869]
[ 0.04066649  0.1666975   0.01567513 -0.29314979]
[ 0.04400044  0.3615925   0.00981213 -0.58084807]
[ 0.05123229  0.16633444 -0.00180483 -0.28509036]


[ 0.01378226 -0.21139155 -0.04000177  0.29942026]
[ 0.00955442 -0.40592117 -0.03401337  0.57922387]
[ 0.001436   -0.21033947 -0.02242889  0.27602301]
[-0.00277079 -0.01490482 -0.01690843 -0.02364877]
[-0.00306889  0.18045548 -0.01738141 -0.32161818]
[  5.40224346e-04   3.75820582e-01  -2.38137710e-02  -6.19731442e-01]
[ 0.00805664  0.18103918 -0.0362084  -0.33464277]
[ 0.01167742 -0.01354924 -0.04290126 -0.0535944 ]
[ 0.01140643 -0.20803062 -0.04397314  0.22525015]
[ 0.00724582 -0.01230869 -0.03946814 -0.08097288]
[ 0.00699965 -0.20684329 -0.0410876   0.19900115]
[ 0.00286278 -0.40135422 -0.03710757  0.47844495]
[-0.0051643  -0.59593317 -0.02753868  0.75920533]
[-0.01708296 -0.79066506 -0.01235457  1.04309706]
[-0.03289627 -0.59538124  0.00850737  0.74656153]
[-0.04480389 -0.79061953  0.0234386   1.04190951]
[-0.06061628 -0.59581661  0.04427679  0.75667571]
[-0.07253261 -0.40133198  0.05941031  0.47824779]
[-0.08055925 -0.20709691  0.06897526  0.20486557]
[-0.08470119 -0.01302566  0.07

[-0.06742733 -0.39008236  0.08079602  0.6571031 ]
[-0.07522898 -0.58623053  0.09393808  0.97409371]
[-0.08695359 -0.39248577  0.11341995  0.71233509]
[-0.09480331 -0.58898023  0.12766665  1.03845703]
[-0.10658291 -0.78554617  0.14843579  1.36833849]
[-0.12229384 -0.59256057  0.17580256  1.12552335]
[-0.13414505 -0.40012282  0.19831303  0.89273607]
[-0.0069256  -0.03365656  0.01322477 -0.0125272 ]
[-0.00759873  0.16127326  0.01297422 -0.30100839]
[-0.00437327 -0.03403118  0.00695405 -0.00426207]
[-0.00505389 -0.22925217  0.00686881  0.29060681]
[ -9.63893548e-03  -3.42288350e-02   1.26809491e-02   9.81211327e-05]
[-0.01032351  0.16070898  0.01268291 -0.28855699]
[-0.00710933 -0.03459152  0.00691177  0.00809887]
[-0.00780116 -0.22981191  0.00707375  0.3029545 ]
[-0.0123974  -0.03479148  0.01313284  0.01251085]
[-0.01309323  0.16013969  0.01338306 -0.27599975]
[-0.00989044  0.35506817  0.00786306 -0.56443179]
[-0.00278907  0.55007892 -0.00342557 -0.85462715]
[ 0.00821251  0.74524739 -0.02

[-0.01727721 -0.23595014  0.10431824  0.5015082 ]
[-0.02199621 -0.43237594  0.11434841  0.82515916]
[-0.03064373 -0.6288606   0.13085159  1.1515066 ]
[-0.04322094 -0.82542415  0.15388172  1.48219202]
[-0.05972943 -0.63247758  0.18352556  1.24125496]
[-0.07237898 -0.8294186   0.20835066  1.5853596 ]
[-0.04835337 -0.03649855  0.02275268 -0.04876648]
[-0.04908334  0.15828988  0.02177735 -0.33418479]
[-0.04591754  0.35309523  0.01509365 -0.61992149]
[-0.03885563  0.54800316  0.00269522 -0.90781271]
[-0.02789557  0.35284482 -0.01546103 -0.61428388]
[-0.02083867  0.54817936 -0.02774671 -0.91179604]
[-0.00987509  0.3534436  -0.04598263 -0.62796132]
[-0.00280622  0.15899256 -0.05854186 -0.35010719]
[  3.73635873e-04   3.54896101e-01  -6.55440007e-02  -6.60660579e-01]
[ 0.00747156  0.16074451 -0.07875721 -0.38931467]
[ 0.01068645  0.35689082 -0.08654351 -0.70575282]
[ 0.01782426  0.55309848 -0.10065856 -1.02437487]
[ 0.02888623  0.35945059 -0.12114606 -0.76491702]
[ 0.03607525  0.16618665 -0.13

[ 0.03859317 -0.15088383 -0.02474534  0.2223062 ]
[ 0.0355755   0.04458291 -0.02029922 -0.07807854]
[ 0.03646715 -0.15024225 -0.02186079  0.20813143]
[ 0.03346231  0.04518536 -0.01769816 -0.0913665 ]
[ 0.03436602  0.24055646 -0.01952549 -0.38958023]
[ 0.03917714  0.43595002 -0.02731709 -0.68835484]
[ 0.04789615  0.63144025 -0.04108419 -0.989511  ]
[ 0.06052495  0.43689162 -0.06087441 -0.71000966]
[ 0.06926278  0.63280147 -0.0750746  -1.02121614]
[ 0.08191881  0.43875572 -0.09549893 -0.7530186 ]
[ 0.09069393  0.24507116 -0.1105593  -0.49184959]
[ 0.09559535  0.05166813 -0.12039629 -0.23595452]
[ 0.09662871  0.24828605 -0.12511538 -0.56405752]
[ 0.10159443  0.44492088 -0.13639653 -0.89339223]
[ 0.11049285  0.64160283 -0.15426438 -1.22565145]
[ 0.12332491  0.44876626 -0.1787774  -0.9850056 ]
[ 0.13230023  0.64577369 -0.19847752 -1.32808787]
[ 0.01714605 -0.03015641  0.02511599 -0.03477114]
[ 0.01654293 -0.22562936  0.02442057  0.26572913]
[ 0.01203034 -0.42109119  0.02973515  0.56601339]


[-0.06840778 -0.22947365  0.02063388  0.28229039]
[-0.07299726 -0.42488374  0.02627969  0.58140907]
[-0.08149493 -0.62036387  0.03790787  0.88225335]
[-0.09390221 -0.42577672  0.05555294  0.60172457]
[-0.10241774 -0.23147406  0.06758743  0.32704435]
[-0.10704723 -0.03737619  0.07412832  0.05641787]
[-0.10779475 -0.23347843  0.07525667  0.37153718]
[-0.11246432 -0.42958436  0.08268742  0.68696806]
[-0.12105601 -0.62575083  0.09642678  1.00449482]
[-0.13357102 -0.43203983  0.11651667  0.74358369]
[-0.14221182 -0.2387023   0.13138835  0.48972219]
[-0.14698586 -0.43540924  0.14118279  0.8207566 ]
[-0.15569405 -0.24247231  0.15759792  0.5756007 ]
[-0.1605435  -0.43941152  0.16910994  0.91349079]
[-0.16933173 -0.24693081  0.18737975  0.67837002]
[-0.17427034 -0.05483782  0.20094715  0.45004205]
[  4.44442020e-02  -1.14420891e-02  -3.63672791e-06  -2.69838549e-02]
[ 0.04421536 -0.20656399 -0.00054331  0.26569792]
[ 0.04008408 -0.01143429  0.00477064 -0.02715632]
[ 0.03985539  0.18361893  0.00

[ 0.05716125 -0.19961217 -0.07772585  0.16699796]
[ 0.05316901 -0.39354046 -0.07438589  0.43418376]
[ 0.0452982  -0.19744855 -0.06570222  0.11901   ]
[ 0.04134923 -0.00144977 -0.06332202 -0.1936564 ]
[ 0.04132023  0.19451815 -0.06719515 -0.50562338]
[ 0.04521059  0.00040427 -0.07730761 -0.23485017]
[ 0.04521868 -0.19353291 -0.08200462  0.03248093]
[ 0.04134802  0.00266344 -0.081355   -0.28490696]
[ 0.04140129 -0.19120966 -0.08705314 -0.01895116]
[ 0.0375771   0.00504589 -0.08743216 -0.33778103]
[ 0.03767802 -0.18873015 -0.09418778 -0.07389956]
[ 0.03390341  0.00760705 -0.09566577 -0.39475027]
[ 0.03405555  0.20394703 -0.10356078 -0.71599646]
[ 0.03813449  0.01039933 -0.11788071 -0.45762283]
[ 0.03834248 -0.18287607 -0.12703316 -0.2042984 ]
[ 0.03468496  0.01381202 -0.13111913 -0.5342006 ]
[ 0.0349612   0.21051051 -0.14180314 -0.86515436]
[ 0.03917141  0.40724824 -0.15910623 -1.19884945]
[ 0.04731637  0.6040301  -0.18308322 -1.53687412]
[-0.00779634  0.03041203 -0.0321009  -0.03336904]


[ 0.02483445  0.00251991  0.00515007  0.02647859]
[ 0.02488485 -0.19267551  0.00567965  0.32078196]
[ 0.02103134  0.00236509  0.01209529  0.02989558]
[ 0.02107864 -0.1929282   0.0126932   0.32637002]
[ 0.01722008  0.00201075  0.0192206   0.03771685]
[ 0.01726029  0.19685188  0.01997493 -0.24884035]
[ 0.02119733  0.39168295  0.01499813 -0.53515643]
[ 0.02903099  0.19635334  0.004295   -0.23778568]
[ 0.03295806  0.0011703  -0.00046072  0.05624891]
[ 0.03298146 -0.19394504  0.00066426  0.34878644]
[ 0.02910256 -0.38907644  0.00763999  0.64167876]
[ 0.02132103 -0.19406181  0.02047357  0.3514115 ]
[ 0.0174398   0.00076309  0.0275018   0.0652542 ]
[ 0.01745506 -0.19474215  0.02880688  0.36648567]
[  1.35602171e-02  -4.11444228e-05   3.61365946e-02   8.30233855e-02]
[ 0.01355939 -0.19566197  0.03779706  0.3868849 ]
[ 0.00964615 -0.00109638  0.04553476  0.10635465]
[ 0.00962423  0.19334446  0.04766185 -0.1716217 ]
[ 0.01349112 -0.00242611  0.04422942  0.13570796]
[ 0.01344259  0.19203533  0.04

[ 0.08129184  0.56501689 -0.06174264 -0.94010657]
[ 0.09259217  0.37077908 -0.08054477 -0.66744583]
[ 0.10000776  0.17686415 -0.09389368 -0.401171  ]
[ 0.10354504 -0.01680929 -0.1019171  -0.13950469]
[ 0.10320885 -0.21033505 -0.1047072   0.11936585]
[ 0.09900215 -0.40381311 -0.10231988  0.37726578]
[ 0.09092589 -0.20739808 -0.09477457  0.05415457]
[ 0.08677793 -0.01105398 -0.09369147 -0.2668619 ]
[ 0.08655685  0.18527156 -0.09902871 -0.58756364]
[ 0.09026228  0.38163073 -0.11077999 -0.90972622]
[ 0.09789489  0.18816831 -0.12897451 -0.65381417]
[ 0.10165826  0.38482782 -0.14205079 -0.98416681]
[ 0.10935482  0.191865   -0.16173413 -0.73926057]
[ 0.11319212 -0.00069818 -0.17651934 -0.50152816]
[ 0.11317815 -0.19295015 -0.1865499  -0.26925895]
[ 0.10931915 -0.3849881  -0.19193508 -0.04072793]
[ 0.10161939 -0.18770622 -0.19274964 -0.38729979]
[ 0.09786526  0.00955441 -0.20049564 -0.73402935]
[-0.01349817 -0.01608127 -0.03706577  0.02905111]
[-0.0138198   0.17955208 -0.03648475 -0.2750922 ]


[ 0.15264482  0.97396393  0.02851776 -0.86046093]
[ 0.17212409  1.16868613  0.01130854 -1.14404238]
[ 0.19549782  1.36365853 -0.01157231 -1.43315764]
[ 0.22277099  1.16868125 -0.04023546 -1.14411348]
[ 0.24614461  0.97410737 -0.06311773 -0.86431492]
[ 0.26562676  0.77989879 -0.08040403 -0.59212642]
[ 0.28122474  0.58598904 -0.09224656 -0.3258148 ]
[ 0.29294452  0.39229327 -0.09876285 -0.06358849]
[ 0.30079038  0.58868233 -0.10003462 -0.38572565]
[ 0.31256403  0.39511224 -0.10774914 -0.12618324]
[ 0.32046627  0.20168564 -0.1102728   0.13065705]
[ 0.32449999  0.3982003  -0.10765966 -0.19467906]
[ 0.33246399  0.59468438 -0.11155324 -0.51928963]
[ 0.34435768  0.4012951  -0.12193903 -0.26373744]
[ 0.35238358  0.20810554 -0.12721378 -0.01186615]
[ 0.35654569  0.0150158  -0.12745111  0.23812793]
[ 0.35684601  0.21170631 -0.12268855 -0.09188408]
[ 0.36108013  0.01853697 -0.12452623  0.15971189]
[ 0.36145087  0.21520132 -0.12133199 -0.16951653]
[ 0.3657549   0.41183216 -0.12472232 -0.49787858]


[ 0.00415067  0.21098092 -0.0372339  -0.37611472]
[ 0.00837029  0.01640705 -0.04475619 -0.09540045]
[ 0.00869843 -0.1780458  -0.0466642   0.18283282]
[ 0.00513752  0.01771172 -0.04300755 -0.12419812]
[ 0.00549175 -0.17676854 -0.04549151  0.1546122 ]
[ 0.00195638  0.01897425 -0.04239927 -0.15206811]
[ 0.00233586 -0.17551576 -0.04544063  0.12694306]
[-0.00117445 -0.36995827 -0.04290177  0.40495082]
[-0.00857362 -0.56444639 -0.03480275  0.68380531]
[-0.01986254 -0.36885892 -0.02112664  0.38037182]
[-0.02723972 -0.17344344 -0.01351921  0.08110324]
[-0.03070859  0.02186968 -0.01189714 -0.21581419]
[-0.0302712   0.21715967 -0.01621343 -0.51222612]
[-0.025928    0.4125062  -0.02645795 -0.80997394]
[-0.01767788  0.21775656 -0.04265743 -0.52572945]
[-0.01332275  0.413452   -0.05317202 -0.83154306]
[-0.00505371  0.60925876 -0.06980288 -1.1404633 ]
[ 0.00713147  0.41511523 -0.09261214 -0.87046257]
[ 0.01543377  0.22136678 -0.11002139 -0.6082744 ]
[ 0.01986111  0.02794098 -0.12218688 -0.35217104]


[ 0.06184399  0.23134091 -0.03408589 -0.36276912]
[ 0.06647081  0.03671958 -0.04134127 -0.08102602]
[ 0.0672052  -0.1577861  -0.04296179  0.19833235]
[ 0.06404948  0.03792316 -0.03899515 -0.10758755]
[ 0.06480794 -0.15661891 -0.0411469   0.17254205]
[ 0.06167556 -0.35112854 -0.03769606  0.45196576]
[ 0.05465299 -0.54569767 -0.02865674  0.73253211]
[ 0.04373904 -0.35019171 -0.0140061   0.43096964]
[ 0.0367352  -0.15487426 -0.00538671  0.13390455]
[ 0.03363772 -0.34991864 -0.00270861  0.42488319]
[ 0.02663934 -0.15475843  0.00578905  0.13134761]
[ 0.02354418  0.04028012  0.008416   -0.15950334]
[ 0.02434978 -0.1549613   0.00522593  0.13582269]
[ 0.02125055  0.04008541  0.00794239 -0.15520696]
[ 0.02205226  0.23509275  0.00483825 -0.4453737 ]
[ 0.02675412  0.03990268 -0.00406922 -0.15116959]
[ 0.02755217  0.23508266 -0.00709262 -0.44513348]
[ 0.03225382  0.43030424 -0.01599529 -0.74004369]
[ 0.04085991  0.23540675 -0.03079616 -0.45243727]
[ 0.04556804  0.04073353 -0.0398449  -0.16961855]


[ 0.04154309  0.39494993  0.04219381 -0.52526118]
[ 0.04944209  0.1992604   0.03168858 -0.21958685]
[ 0.0534273   0.00370015  0.02729685  0.08292115]
[ 0.0535013  -0.19180224  0.02895527  0.38408977]
[ 0.04966526 -0.38732307  0.03663706  0.68575972]
[ 0.0419188  -0.19272837  0.05035226  0.40483245]
[ 0.03806423  0.00164469  0.05844891  0.12843968]
[ 0.03809712 -0.19426374  0.0610177   0.43897471]
[ 0.03421185 -0.39019383  0.0697972   0.75025098]
[ 0.02640797 -0.1961004   0.08480222  0.48032343]
[ 0.02248596 -0.39231065  0.09440868  0.79848316]
[ 0.01463975 -0.58859226  0.11037835  1.11930939]
[ 0.00286791 -0.39507764  0.13276453  0.8631893 ]
[-0.00503365 -0.59173296  0.15002832  1.19449455]
[-0.01686831 -0.78844479  0.17391821  1.53019079]
[-0.0326372  -0.59579389  0.20452203  1.29644842]
[-0.04763225  0.02435437  0.01628971  0.0205223 ]
[-0.04714517 -0.17099736  0.01670016  0.31829998]
[-0.05056511  0.02388281  0.02306616  0.03093011]
[-0.05008746 -0.1715622   0.02368476  0.33080049]


[ 0.04533814  0.01935726 -0.02110811  0.01332491]
[ 0.04572529 -0.17545572 -0.02084161  0.29927395]
[ 0.04221617 -0.37027448 -0.01485613  0.58531164]
[ 0.03481068 -0.17494763 -0.0031499   0.28798614]
[ 0.03131173  0.0202191   0.00260982 -0.00568857]
[ 0.03171611  0.21530353  0.00249605 -0.29754692]
[ 0.03602218  0.02014609 -0.00345489 -0.00407783]
[ 0.0364251  -0.17492615 -0.00353644  0.28751304]
[ 0.03292658 -0.36999749  0.00221382  0.57907851]
[ 0.02552663 -0.17490663  0.01379539  0.2870938 ]
[ 0.0220285  -0.37022258  0.01953726  0.58409555]
[ 0.01462405 -0.17537968  0.03121917  0.29763058]
[ 0.01111645 -0.37093243  0.03717178  0.59999372]
[ 0.00369781 -0.1763497   0.04917166  0.31924723]
[ 0.00017081  0.01803872  0.0555566   0.04246768]
[ 0.00053159 -0.17783407  0.05640596  0.35214879]
[-0.0030251  -0.37371088  0.06344893  0.66207128]
[-0.01049931 -0.17952645  0.07669036  0.39002221]
[-0.01408984  0.01442803  0.0844908   0.1224705 ]
[-0.01380128  0.20824425  0.08694021 -0.14240601]


[-0.00222441 -0.230322    0.06559488  0.4596989 ]
[-0.00683085 -0.0361856   0.07478885  0.18839163]
[-0.00755457 -0.23229344  0.07855669  0.50369852]
[-0.01220043 -0.03836144  0.08863066  0.23676957]
[-0.01296766 -0.23463048  0.09336605  0.55604087]
[-0.01766027 -0.04093483  0.10448687  0.2941726 ]
[-0.01847897 -0.23737923  0.11037032  0.61789542]
[-0.02322655 -0.04395801  0.12272823  0.36191196]
[-0.02410571 -0.24059117  0.12996647  0.69063478]
[-0.02891754 -0.04748923  0.14377916  0.44152739]
[-0.02986732  0.14533661  0.15260971  0.19739871]
[-0.02696059 -0.05160138  0.15655768  0.53406569]
[-0.02799262  0.14101257  0.167239    0.29451847]
[-0.02517237  0.33340456  0.17312937  0.05889622]
[-0.01850428  0.52567599  0.17430729 -0.17455325]
[-0.00799076  0.7179305   0.17081623 -0.40757791]
[ 0.00636785  0.52085041  0.16266467 -0.06628401]
[ 0.01678486  0.32381536  0.16133899  0.27297983]
[ 0.02326117  0.51631163  0.16679858  0.03521348]
[ 0.0335874   0.31923935  0.16750285  0.37553096]


[-0.01870968 -0.20436994 -0.05976337  0.22384345]
[-0.02279708 -0.008447   -0.05528651 -0.08707627]
[-0.02296602 -0.20273467 -0.05702803  0.18766401]
[-0.02702071 -0.00684515 -0.05327475 -0.12244975]
[-0.02715762  0.18899798 -0.05572375 -0.43145304]
[-0.02337766 -0.00529248 -0.06435281 -0.15684401]
[-0.02348351 -0.19943686 -0.06748969  0.11486309]
[-0.02747225 -0.39353012 -0.06519243  0.38551328]
[-0.03534285 -0.19754617 -0.05748216  0.07300884]
[-0.03929377 -0.00164926 -0.05602198 -0.23724164]
[-0.03932676  0.19422644 -0.06076682 -0.5470561 ]
[ -3.54422276e-02   8.50518981e-06  -7.17079373e-02  -2.74121365e-01]
[-0.03544206 -0.19402095 -0.07719036 -0.00488922]
[-0.03932248 -0.38795588 -0.07728815  0.26247553]
[-0.04708159 -0.58189441 -0.07203864  0.52981479]
[-0.05871948 -0.38583692 -0.06144234  0.21533027]
[-0.06643622 -0.18989285 -0.05713574 -0.09608391]
[-0.07023408  0.00599948 -0.05905742 -0.40623167]
[-0.07011409 -0.18823746 -0.06718205 -0.132737  ]
[-0.07387884  0.00777929 -0.06

[ 0.0665281   0.58830659  0.12686231  0.04843114]
[ 0.07829423  0.78140272  0.12783093 -0.20168785]
[ 0.09392229  0.58470635  0.12379717  0.12842937]
[ 0.10561641  0.38804845  0.12636576  0.45746214]
[ 0.11337738  0.19138775  0.135515    0.78715381]
[ 0.11720514  0.3844138   0.15125808  0.53998884]
[ 0.12489341  0.18752549  0.16205786  0.87624978]
[ 0.12864392  0.38011777  0.17958285  0.63858331]
[ 0.13624628  0.57234157  0.19235452  0.40739856]
[ 0.14769311  0.76429009  0.20050249  0.18099013]
[ 0.16297891  0.5669482   0.20412229  0.52962638]
[-0.00578558  0.03855995  0.04561435  0.03589916]
[-0.00501438 -0.15718544  0.04633234  0.34261779]
[-0.00815809  0.03724778  0.05318469  0.06489779]
[-0.00741313  0.23156846  0.05448265 -0.21054215]
[-0.00278176  0.42587079  0.05027181 -0.48555321]
[ 0.00573565  0.62024863  0.04056074 -0.76197793]
[ 0.01814063  0.42459212  0.02532118 -0.45681296]
[ 0.02663247  0.22912151  0.01618492 -0.1562574 ]
[ 0.0312149   0.03377161  0.01305978  0.14148723]


[ 0.04817213  0.01290466  0.01804332 -0.02794218]
[ 0.04843022 -0.18247134  0.01748448  0.27037855]
[ 0.04478079 -0.37783837  0.02289205  0.56852449]
[ 0.03722403 -0.18304486  0.03426254  0.28314046]
[ 0.03356313 -0.37863833  0.03992535  0.58642968]
[ 0.02599036 -0.57429606  0.05165394  0.89141733]
[ 0.01450444 -0.37991148  0.06948229  0.61540885]
[ 0.00690621 -0.1858256   0.08179046  0.34539386]
[ 0.0031897   0.00804341  0.08869834  0.0795821 ]
[ 0.00335057 -0.18823071  0.09028998  0.39887904]
[-0.00041405  0.00550213  0.09826756  0.13597271]
[ -3.04002778e-04  -1.90879988e-01   1.00987018e-01   4.57968941e-01]
[-0.0041216   0.00268017  0.1101464   0.19874769]
[-0.004068    0.19606846  0.11412135 -0.05725925]
[ -1.46629992e-04   3.89384857e-01   1.12976165e-01  -3.11868729e-01]
[ 0.00764107  0.19284988  0.10673879  0.01419874]
[ 0.01149806 -0.00362807  0.10702277  0.33856003]
[ 0.0114255  -0.20009721  0.11379397  0.66298227]
[ 0.00742356 -0.00672692  0.12705361  0.40818604]
[ 0.007289

[-0.14874603 -0.22716396 -0.0241176   0.01204413]
[-0.15328931 -0.42193187 -0.02387671  0.29702116]
[-0.16172795 -0.22647783 -0.01793629 -0.00309542]
[-0.16625751 -0.42133802 -0.0179982   0.28387486]
[-0.17468427 -0.22596405 -0.0123207  -0.01442982]
[-0.17920355 -0.03066759 -0.0126093  -0.31097446]
[-0.1798169   0.16463171 -0.01882879 -0.60760717]
[-0.17652426 -0.030222   -0.03098093 -0.32091375]
[-0.1771287  -0.22488936 -0.03739921 -0.03815976]
[-0.18162649 -0.41945561 -0.0381624   0.24249272]
[-0.1900156  -0.61401225 -0.03331255  0.52289818]
[-0.20229585 -0.41843766 -0.02285458  0.21990698]
[-0.2106646  -0.2229966  -0.01845644 -0.07989671]
[-0.21512453 -0.027615   -0.02005438 -0.37834508]
[-0.21567683  0.16778593 -0.02762128 -0.67728313]
[-0.21232112 -0.02694158 -0.04116694 -0.3934229 ]
[-0.21285995 -0.22145592 -0.0490354  -0.11399827]
[-0.21728907 -0.4158422  -0.05131536  0.16282006]
[-0.22560591 -0.22002463 -0.04805896 -0.14559975]
[-0.2300064  -0.02424856 -0.05097096 -0.45304869]


[-0.04734022 -0.03106206  0.00418908 -0.03513762]
[-0.04796146  0.16399957  0.00348633 -0.32649591]
[-0.04468147 -0.03117184 -0.00304359 -0.03271559]
[-0.04530491  0.16399362 -0.0036979  -0.32635724]
[-0.04202503  0.35916803 -0.01022504 -0.62020403]
[-0.03484167  0.16419036 -0.02262912 -0.33075893]
[-0.03155787  0.359627   -0.0292443  -0.63049123]
[-0.02436533  0.55514455 -0.04185413 -0.93223875]
[-0.01326244  0.36061159 -0.0604989  -0.65299637]
[-0.0060502   0.16638197 -0.07355883 -0.37996078]
[-0.00272256  0.36246721 -0.08115804 -0.69490006]
[ 0.00452678  0.16855914 -0.09505605 -0.4288296 ]
[ 0.00789796  0.3648897  -0.10363264 -0.74990037]
[ 0.01519576  0.17133817 -0.11863065 -0.49154419]
[ 0.01862252 -0.02192818 -0.12846153 -0.23847785]
[ 0.01818396  0.17477258 -0.13323109 -0.56876172]
[ 0.02167941 -0.01825387 -0.14460632 -0.32084214]
[ 0.02131433 -0.21105227 -0.15102316 -0.07702922]
[ 0.01709329 -0.01412424 -0.15256375 -0.41329124]
[ 0.0168108  -0.20679173 -0.16082957 -0.17232678]


[ 0.02974832  0.04768827  0.02638191 -0.03321518]
[ 0.03070209 -0.14780188  0.02571761  0.26767339]
[ 0.02774605  0.04694378  0.03107108 -0.01678836]
[ 0.02868493  0.24160668  0.03073531 -0.29950834]
[ 0.03351706  0.43627736  0.02474514 -0.58234168]
[ 0.04224261  0.63104403  0.01309831 -0.86712793]
[ 0.05486349  0.43574631 -0.00424425 -0.57035569]
[ 0.06357842  0.63092753 -0.01565136 -0.86437268]
[ 0.07619697  0.43602209 -0.03293881 -0.57665164]
[ 0.08491741  0.24137695 -0.04447185 -0.29452456]
[ 0.08974495  0.04691631 -0.05036234 -0.01619241]
[ 0.09068327  0.24272299 -0.05068619 -0.3243303 ]
[ 0.09553773  0.04835801 -0.05717279 -0.04805262]
[ 0.09650489 -0.1458995  -0.05813385  0.2260579 ]
[ 0.0935869   0.05000301 -0.05361269 -0.08438119]
[ 0.09458696 -0.14431107 -0.05530031  0.19091657]
[ 0.09170074  0.0515566  -0.05148198 -0.11868582]
[ 0.09273187 -0.14279137 -0.0538557   0.15732089]
[ 0.08987605 -0.33710255 -0.05070928  0.43253914]
[ 0.083134   -0.14130066 -0.0420585   0.12431171]


[ 0.04605044  0.04896529 -0.03414624 -0.04296598]
[ 0.04702975  0.24455982 -0.03500556 -0.3462238 ]
[ 0.05192094  0.04995284 -0.04193004 -0.06478169]
[ 0.05292    -0.14454364 -0.04322567  0.21438279]
[ 0.05002913  0.05116878 -0.03893801 -0.09161601]
[ 0.0510525  -0.14337406 -0.04077033  0.18853216]
[ 0.04818502 -0.33788972 -0.03699969  0.46808003]
[ 0.04142723 -0.14226512 -0.02763809  0.16396853]
[ 0.03858192 -0.33698074 -0.02435872  0.44780586]
[ 0.03184231 -0.14152284 -0.0154026   0.14754523]
[ 0.02901185  0.05381626 -0.0124517  -0.14995684]
[ 0.03008818 -0.1411252  -0.01545083  0.13877197]
[ 0.02726567 -0.33602249 -0.0126754   0.42654064]
[ 0.02054522 -0.53096263 -0.00414458  0.7152009 ]
[ 0.00992597 -0.33578355  0.01015944  0.4212163 ]
[ 0.0032103  -0.14080701  0.01858376  0.13175339]
[  3.94160796e-04  -3.36190165e-01   2.12188293e-02   4.30240832e-01]
[-0.00632964 -0.14137502  0.02982365  0.14432176]
[-0.00915714  0.05330741  0.03271008 -0.13880517]
[-0.00809099 -0.14226739  0.02

[-0.03032635  0.01473021  0.20098088  0.71209256]
[ 0.01603224  0.04014841  0.01491566  0.01291102]
[ 0.0168352  -0.15518424  0.01517388  0.31026244]
[ 0.01373152 -0.35051906  0.02137913  0.60769189]
[ 0.00672114 -0.15570244  0.03353296  0.32181871]
[ 0.00360709 -0.35128547  0.03996934  0.62488517]
[-0.00341862 -0.15674363  0.05246704  0.34505334]
[-0.00655349  0.03759423  0.05936811  0.06936592]
[-0.00580161 -0.1583264   0.06075543  0.38017309]
[-0.00896814  0.03588254  0.06835889  0.10724827]
[-0.00825048  0.22996169  0.07050385 -0.16310866]
[-0.00365125  0.03390498  0.06724168  0.15095676]
[-0.00297315  0.22800286  0.07026082 -0.11977796]
[ 0.00158691  0.42205149  0.06786526 -0.38949305]
[ 0.01002794  0.22603523  0.0600754  -0.07620842]
[ 0.01454864  0.4202468   0.05855123 -0.3493487 ]
[ 0.02295358  0.61448926  0.05156425 -0.62300887]
[ 0.03524336  0.80885475  0.03910408 -0.89901642]
[ 0.05142046  0.61322526  0.02112375 -0.59430285]
[ 0.06368496  0.41781411  0.00923769 -0.29504161]


[-0.035497   -0.21171777  0.06967769  0.54915762]
[-0.03973136 -0.01764021  0.08066084  0.27921636]
[-0.04008416  0.17624398  0.08624517  0.01302422]
[-0.03655928  0.37002998  0.08650565 -0.25124973]
[-0.02915868  0.56381697  0.08148066 -0.51544232]
[-0.01788235  0.36764781  0.07117181 -0.19823602]
[-0.01052939  0.56168343  0.06720709 -0.46764576]
[ 0.00070428  0.36567954  0.05785417 -0.15455964]
[ 0.00801787  0.169779    0.05476298  0.15579918]
[ 0.01141345 -0.02608253  0.05787897  0.46524331]
[ 0.0108918   0.16817586  0.06718383  0.19135059]
[ 0.01425532 -0.02783967  0.07101084  0.50444776]
[ 0.01369852  0.16621343  0.0810998   0.23496133]
[ 0.01702279 -0.02996794  0.08579903  0.5520834 ]
[ 0.01642343  0.16385089  0.09684069  0.28761856]
[ 0.01970045  0.35746803  0.10259306  0.02698029]
[ 0.02684981  0.16103587  0.10313267  0.35018873]
[ 0.03007053  0.35455146  0.11013644  0.09172499]
[ 0.03716156  0.15803735  0.11197094  0.41702486]
[ 0.04032231  0.35140916  0.12031144  0.16163425]


[ 0.0347875  -0.22751791 -0.07206951  0.09982919]
[ 0.03023714 -0.03144105 -0.07007293 -0.21469272]
[ 0.02960832  0.1646091  -0.07436678 -0.52863083]
[ 0.0329005  -0.02939215 -0.0849394  -0.26027696]
[ 0.03231266  0.16683316 -0.09014494 -0.57849594]
[ 0.03564933 -0.02691748 -0.10171486 -0.31551675]
[ 0.03511098 -0.22045458 -0.10802519 -0.05656417]
[ 0.03070188 -0.41387514 -0.10915647  0.20017781]
[ 0.02242438 -0.60728034 -0.10515292  0.45653068]
[ 0.01027877 -0.41084113 -0.0960223   0.13264138]
[ 0.00206195 -0.21448424 -0.09336948 -0.18872434]
[-0.00222773 -0.01815911 -0.09714396 -0.50934007]
[-0.00259092 -0.21178789 -0.10733077 -0.2487817 ]
[-0.00682667 -0.4052264  -0.1123064   0.00821167]
[-0.0149312  -0.59857354 -0.11214217  0.26345661]
[-0.02690267 -0.79193105 -0.10687303  0.51877033]
[-0.04274129 -0.5954796  -0.09649763  0.19441353]
[-0.05465088 -0.78909825 -0.09260936  0.45516406]
[-0.07043285 -0.59279725 -0.08350608  0.1347861 ]
[-0.08228879 -0.78662997 -0.08081035  0.39999977]


[ 0.01468714 -0.14879048  0.00153427  0.24340449]
[ 0.01171133  0.04630952  0.00640236 -0.04879409]
[ 0.01263752 -0.14890364  0.00542648  0.24590194]
[ 0.00965945  0.04614039  0.01034452 -0.04506442]
[ 0.01058226 -0.14912836  0.00944323  0.25086426]
[ 0.00759969 -0.34438388  0.01446051  0.54651075]
[ 0.00071201 -0.14946805  0.02539073  0.25841882]
[-0.00227735 -0.34494312  0.03055911  0.55900097]
[-0.00917621 -0.54048038  0.04173912  0.86115291]
[-0.01998582 -0.34595093  0.05896218  0.58188061]
[-0.02690484 -0.15170252  0.0705998   0.30833917]
[-0.02993889 -0.34775569  0.07676658  0.62242543]
[-0.036894   -0.15378485  0.08921509  0.35487328]
[-0.0399697   0.03996281  0.09631255  0.09160205]
[-0.03917044 -0.15639822  0.09814459  0.41305069]
[-0.04229841  0.03720551  0.10640561  0.15285083]
[-0.0415543   0.2306557   0.10946262 -0.10445905]
[-0.03694118  0.42405259  0.10737344 -0.36070036]
[-0.02846013  0.22758126  0.10015944 -0.03618342]
[-0.02390851  0.03117628  0.09943577  0.28634521]


[ 0.07041815  0.41027799 -0.06418311 -0.64105272]
[ 0.07862371  0.60623319 -0.07700416 -0.95323719]
[ 0.09074838  0.41222704 -0.0960689  -0.68570719]
[ 0.09899292  0.60854213 -0.10978305 -1.00702282]
[ 0.11116376  0.41504357 -0.1299235  -0.75073373]
[ 0.11946463  0.22192996 -0.14493818 -0.50159484]
[ 0.12390323  0.0291165  -0.15497008 -0.25786996]
[ 0.12448556 -0.16349262 -0.16012747 -0.01779764]
[ 0.12121571 -0.35599912 -0.16048343  0.22039144]
[ 0.11409572 -0.54850671 -0.1560756   0.45846243]
[ 0.10312559 -0.35156265 -0.14690635  0.12093699]
[ 0.09609434 -0.1546748  -0.14448761 -0.21424895]
[ 0.09300084  0.04218552 -0.14877259 -0.54879415]
[ 0.09384455 -0.15056784 -0.15974847 -0.30643588]
[ 0.09083319 -0.34309566 -0.16587719 -0.06808779]
[ 0.08397128 -0.53549901 -0.16723895  0.16801125]
[ 0.0732613  -0.33842722 -0.16387872 -0.17241152]
[ 0.06649276 -0.5308707  -0.16732695  0.06442116]
[ 0.05587534 -0.33379424 -0.16603853 -0.2760297 ]
[ 0.04919946 -0.52620618 -0.17155912 -0.03997242]


[ 0.02490342 -0.0188235  -0.08082077 -0.10552434]
[ 0.02452695  0.17735806 -0.08293125 -0.42257156]
[ 0.02807411 -0.01649708 -0.09138268 -0.15714207]
[ 0.02774417 -0.21019993 -0.09452553  0.10537073]
[ 0.02354017 -0.01385941 -0.09241811 -0.21557443]
[ 0.02326298 -0.20754696 -0.0967296   0.04658479]
[ 0.01911204 -0.01118063 -0.0957979  -0.27498196]
[ 0.01888843  0.18516831 -0.10129754 -0.59627648]
[ 0.02259179 -0.00840104 -0.11322307 -0.33714143]
[ 0.02242377 -0.20174499 -0.1199659  -0.08219895]
[ 0.01838887 -0.00512586 -0.12160988 -0.41019335]
[ 0.01828636  0.19149138 -0.12981375 -0.73860631]
[ 0.02211618  0.38814418 -0.14458587 -1.06916171]
[ 0.02987907  0.19519943 -0.16596911 -0.82512475]
[ 0.03378306  0.00268897 -0.1824716  -0.58889829]
[ 0.03383684 -0.18947218 -0.19424957 -0.35879134]
[ 0.03004739 -0.38137889 -0.2014254  -0.13309602]
[ 0.02241981 -0.57313147 -0.20408732  0.08989825]
[ 0.01095718 -0.76483253 -0.20228935  0.31189609]
[-0.00433947 -0.95658456 -0.19605143  0.53459089]


[-0.10634855  0.12421853  0.16291118  0.23527597]
[-0.10386418  0.31668351  0.1676167  -0.00191464]
[-0.09753051  0.50905459  0.1675784  -0.2373783 ]
[-0.08734941  0.70143551  0.16283084 -0.47286947]
[-0.0733207   0.50443358  0.15337345 -0.13361439]
[-0.06323203  0.30748536  0.15070116  0.20325528]
[-0.05708232  0.50016689  0.15476627 -0.03835262]
[-0.04707899  0.69276989  0.15399921 -0.27848415]
[-0.03322359  0.49582479  0.14842953  0.05853441]
[-0.02330709  0.29892085  0.14960022  0.39412096]
[-0.01732868  0.49163853  0.15748264  0.15209181]
[-0.00749591  0.68419609  0.16052447 -0.0870615 ]
[ 0.00618802  0.4871808   0.15878324  0.25165385]
[ 0.01593163  0.6797212   0.16381632  0.01296274]
[ 0.02952606  0.87216111  0.16407558 -0.2238857 ]
[ 0.04696928  1.06460428  0.15959786 -0.46065155]
[ 0.06826136  1.25715311  0.15038483 -0.69908138]
[ 0.09340443  1.06030152  0.1364032  -0.36308997]
[ 0.11461046  0.86353124  0.1291414  -0.0306978 ]
[ 0.13188108  0.66681657  0.12852745  0.29977921]


[ 0.06361027  0.59578985  0.00373929 -0.72402884]
[ 0.07552607  0.40061638 -0.01074129 -0.43017134]
[ 0.08353839  0.59588878 -0.01934471 -0.72622092]
[ 0.09545617  0.40103957 -0.03386913 -0.43968878]
[ 0.10347696  0.59662409 -0.04266291 -0.74285292]
[ 0.11540944  0.79230814 -0.05751997 -1.048651  ]
[ 0.13125561  0.98814416 -0.07849299 -1.35882076]
[ 0.15101849  0.79408935 -0.1056694  -1.09168804]
[ 0.16690028  0.60050654 -0.12750316 -0.83394249]
[ 0.17891041  0.79711842 -0.14418201 -1.16385077]
[ 0.19485277  0.60413735 -0.16745903 -0.91962558]
[ 0.20693552  0.41162617 -0.18585154 -0.68390141]
[ 0.21516805  0.21950433 -0.19952957 -0.45500771]
[ 0.21955813  0.02767997 -0.20862972 -0.2312531 ]
[ 0.02893066 -0.01340268  0.02558562  0.00354373]
[ 0.02866261  0.18134317  0.0256565  -0.28095809]
[ 0.03228947 -0.01413518  0.02003733  0.01970513]
[ 0.03200677  0.18069377  0.02043144 -0.26658905]
[ 0.03562065  0.37551825  0.01509966 -0.55275836]
[ 0.04313101  0.18018754  0.00404449 -0.25535656]


KeyboardInterrupt: 

### 4. Plot the Scores

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(1, len(scores)+1), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

### 5. Watch a Smart Agent!

In [None]:
env = gym.make('CartPole-v0')

state = env.reset()
img = plt.imshow(env.render(mode='rgb_array'))
for t in range(1000):
    action, _ = policy.act(state)
    img.set_data(env.render(mode='rgb_array')) 
    plt.axis('off')
    display.display(plt.gcf())
    display.clear_output(wait=True)
    state, reward, done, _ = env.step(action)
    if done:
        break 

env.close()