In [4]:
import gym
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch as T

In [8]:
env = gym.make('CartPole-v1')

In [6]:
class PolicyNet(nn.Module):
    def __init__(self,lr,input_dims,num_actions):
        super(PolicyNet,self).__init__()
        self.fc1 = nn.Linear(*input_dims,128)
        self.fc2 = nn.Linear(128,64)
        self.fc3 = nn.Linear(64,32)
        self.fc4 = nn.Linear(32,num_actions)
        self.optimizer = optim.Adam(self.parameters(),lr=lr)
        
        self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')
        self.to(self.device)
        
    def forward(self,state):
        S = F.relu(self.fc1(state))
        S = F.relu(self.fc2(S))
        S = F.relu(self.fc3(S))
        S = self.fc4(S)
        
        return S

In [17]:
class Reinforce_Agent():
    def __init__(self,lr,input_dims,gamma=0.99,num_actions=2):
        self.gamma = gamma
        self.lr = lr
        self.reward_memory = []
        self.action_memory = []
        
        self.policy = PolicyNet(self.lr,input_dims,num_actions)
        
    def select_action(self,obs):
        state = T.Tensor([obs]).to(self.policy.device)
        probs = F.softmax(self.policy.forward(state))
        action_probs = T.distributions.Categorical(probs)
        action = action_probs.sample()
        log_probs = action_probs.log_prob(action)
        self.action_memory.append(log_probs)
        return action.item()
    def store_rewards(self,reward):
        self.reward_memory.append(reward)
    
    def train(self):
        self.policy.optimizer.zero_grad()
        G = np.zeros_like(self.reward_memory,dtype = np.float64)
        for t in range(len(self.reward_memory)):
            G_sum = 0
            discount = 1
            for k in range(t,len(self.reward_memory)):
                G_sum += self.reward_memory[k] * discount
                discount *= self.gamma
            G[t] = G_sum
        G = T.tensor(G,dtype = T.float).to(self.policy.device)
        loss = 0
        for g,logprob in zip(G,self.action_memory):
            loss += -g*logprob
        loss.backward()
        self.policy.optimizer.step()
        
        self.action_memory = []
        self.reward_memory = []
        

In [18]:
epochs = 3000
agent = Reinforce_Agent(gamma= 0.99,lr = 0.0005,input_dims=[4],num_actions =2)
scores = []
for epoch in range(epochs):
    done = False
    obs = env.reset()
    score = 0
    while not done:
        action = agent.select_action(obs)
        next_obs,reward,done,info = env.step(action)
        score+=reward
        agent.store_rewards(reward)
        obs = next_obs
    agent.train()
    scores.append(score)
    avg_score = np.mean(scores[-100:])
    print('Episode: {} Score: {} average acore: {}'.format(epoch,score,avg_score))

  probs = F.softmax(self.policy.forward(state))


Episode: 0 Score: 20.0 average acore: 20.0
Episode: 1 Score: 11.0 average acore: 15.5
Episode: 2 Score: 25.0 average acore: 18.666666666666668
Episode: 3 Score: 31.0 average acore: 21.75
Episode: 4 Score: 15.0 average acore: 20.4
Episode: 5 Score: 39.0 average acore: 23.5
Episode: 6 Score: 16.0 average acore: 22.428571428571427
Episode: 7 Score: 14.0 average acore: 21.375
Episode: 8 Score: 17.0 average acore: 20.88888888888889
Episode: 9 Score: 17.0 average acore: 20.5
Episode: 10 Score: 21.0 average acore: 20.545454545454547
Episode: 11 Score: 12.0 average acore: 19.833333333333332
Episode: 12 Score: 20.0 average acore: 19.846153846153847
Episode: 13 Score: 16.0 average acore: 19.571428571428573
Episode: 14 Score: 47.0 average acore: 21.4
Episode: 15 Score: 18.0 average acore: 21.1875
Episode: 16 Score: 22.0 average acore: 21.235294117647058
Episode: 17 Score: 16.0 average acore: 20.944444444444443
Episode: 18 Score: 18.0 average acore: 20.789473684210527
Episode: 19 Score: 75.0 avera

Episode: 166 Score: 171.0 average acore: 36.26
Episode: 167 Score: 86.0 average acore: 37.03
Episode: 168 Score: 80.0 average acore: 37.62
Episode: 169 Score: 104.0 average acore: 38.54
Episode: 170 Score: 34.0 average acore: 38.68
Episode: 171 Score: 37.0 average acore: 38.62
Episode: 172 Score: 60.0 average acore: 39.13
Episode: 173 Score: 89.0 average acore: 39.85
Episode: 174 Score: 36.0 average acore: 40.08
Episode: 175 Score: 119.0 average acore: 41.02
Episode: 176 Score: 34.0 average acore: 41.16
Episode: 177 Score: 114.0 average acore: 42.11
Episode: 178 Score: 60.0 average acore: 42.39
Episode: 179 Score: 42.0 average acore: 42.65
Episode: 180 Score: 64.0 average acore: 43.03
Episode: 181 Score: 23.0 average acore: 42.57
Episode: 182 Score: 55.0 average acore: 42.98
Episode: 183 Score: 71.0 average acore: 43.5
Episode: 184 Score: 109.0 average acore: 44.34
Episode: 185 Score: 40.0 average acore: 44.6
Episode: 186 Score: 40.0 average acore: 44.85
Episode: 187 Score: 74.0 averag

Episode: 342 Score: 303.0 average acore: 145.39
Episode: 343 Score: 106.0 average acore: 146.08
Episode: 344 Score: 260.0 average acore: 148.41
Episode: 345 Score: 283.0 average acore: 150.52
Episode: 346 Score: 303.0 average acore: 153.14
Episode: 347 Score: 188.0 average acore: 154.51
Episode: 348 Score: 313.0 average acore: 157.34
Episode: 349 Score: 194.0 average acore: 158.78
Episode: 350 Score: 317.0 average acore: 161.67
Episode: 351 Score: 500.0 average acore: 165.81
Episode: 352 Score: 430.0 average acore: 169.81
Episode: 353 Score: 203.0 average acore: 171.13
Episode: 354 Score: 500.0 average acore: 175.57
Episode: 355 Score: 500.0 average acore: 179.98
Episode: 356 Score: 500.0 average acore: 183.87
Episode: 357 Score: 500.0 average acore: 188.31
Episode: 358 Score: 428.0 average acore: 192.13
Episode: 359 Score: 500.0 average acore: 196.47
Episode: 360 Score: 483.0 average acore: 200.54
Episode: 361 Score: 217.0 average acore: 202.35
Episode: 362 Score: 297.0 average acore:

Episode: 514 Score: 490.0 average acore: 266.91
Episode: 515 Score: 353.0 average acore: 269.06
Episode: 516 Score: 500.0 average acore: 272.84
Episode: 517 Score: 362.0 average acore: 274.83
Episode: 518 Score: 451.0 average acore: 277.59
Episode: 519 Score: 500.0 average acore: 281.19
Episode: 520 Score: 500.0 average acore: 284.7
Episode: 521 Score: 380.0 average acore: 286.81
Episode: 522 Score: 500.0 average acore: 289.5
Episode: 523 Score: 500.0 average acore: 292.65
Episode: 524 Score: 251.0 average acore: 293.15
Episode: 525 Score: 500.0 average acore: 296.01
Episode: 526 Score: 500.0 average acore: 298.84
Episode: 527 Score: 500.0 average acore: 302.63
Episode: 528 Score: 500.0 average acore: 306.57
Episode: 529 Score: 500.0 average acore: 308.56
Episode: 530 Score: 500.0 average acore: 311.14
Episode: 531 Score: 500.0 average acore: 313.92
Episode: 532 Score: 500.0 average acore: 316.7
Episode: 533 Score: 500.0 average acore: 319.29
Episode: 534 Score: 500.0 average acore: 32

Episode: 686 Score: 241.0 average acore: 136.77
Episode: 687 Score: 242.0 average acore: 138.62
Episode: 688 Score: 229.0 average acore: 140.4
Episode: 689 Score: 276.0 average acore: 142.4
Episode: 690 Score: 325.0 average acore: 145.21
Episode: 691 Score: 292.0 average acore: 147.58
Episode: 692 Score: 340.0 average acore: 150.08
Episode: 693 Score: 335.0 average acore: 152.66
Episode: 694 Score: 329.0 average acore: 155.64
Episode: 695 Score: 296.0 average acore: 157.97
Episode: 696 Score: 337.0 average acore: 160.93
Episode: 697 Score: 423.0 average acore: 164.35
Episode: 698 Score: 500.0 average acore: 168.73
Episode: 699 Score: 390.0 average acore: 171.86
Episode: 700 Score: 336.0 average acore: 174.44
Episode: 701 Score: 490.0 average acore: 178.41
Episode: 702 Score: 500.0 average acore: 182.55
Episode: 703 Score: 500.0 average acore: 186.82
Episode: 704 Score: 500.0 average acore: 191.49
Episode: 705 Score: 500.0 average acore: 195.53
Episode: 706 Score: 500.0 average acore: 2

Episode: 858 Score: 255.0 average acore: 357.05
Episode: 859 Score: 266.0 average acore: 356.99
Episode: 860 Score: 246.0 average acore: 357.01
Episode: 861 Score: 281.0 average acore: 357.07
Episode: 862 Score: 231.0 average acore: 356.25
Episode: 863 Score: 242.0 average acore: 356.23
Episode: 864 Score: 255.0 average acore: 355.84
Episode: 865 Score: 214.0 average acore: 355.2
Episode: 866 Score: 254.0 average acore: 355.0
Episode: 867 Score: 265.0 average acore: 354.84
Episode: 868 Score: 302.0 average acore: 355.21
Episode: 869 Score: 244.0 average acore: 353.93
Episode: 870 Score: 276.0 average acore: 352.92
Episode: 871 Score: 260.0 average acore: 351.53
Episode: 872 Score: 300.0 average acore: 350.53
Episode: 873 Score: 310.0 average acore: 349.39
Episode: 874 Score: 265.0 average acore: 347.28
Episode: 875 Score: 333.0 average acore: 346.46
Episode: 876 Score: 281.0 average acore: 344.6
Episode: 877 Score: 243.0 average acore: 342.03
Episode: 878 Score: 323.0 average acore: 34

Episode: 1029 Score: 188.0 average acore: 158.93
Episode: 1030 Score: 177.0 average acore: 158.36
Episode: 1031 Score: 170.0 average acore: 157.95
Episode: 1032 Score: 199.0 average acore: 157.6
Episode: 1033 Score: 149.0 average acore: 156.77
Episode: 1034 Score: 201.0 average acore: 156.66
Episode: 1035 Score: 186.0 average acore: 156.32
Episode: 1036 Score: 189.0 average acore: 155.87
Episode: 1037 Score: 185.0 average acore: 155.43
Episode: 1038 Score: 183.0 average acore: 154.88
Episode: 1039 Score: 178.0 average acore: 154.82
Episode: 1040 Score: 189.0 average acore: 154.72
Episode: 1041 Score: 174.0 average acore: 154.42
Episode: 1042 Score: 181.0 average acore: 153.87
Episode: 1043 Score: 190.0 average acore: 154.02
Episode: 1044 Score: 181.0 average acore: 154.03
Episode: 1045 Score: 177.0 average acore: 154.02
Episode: 1046 Score: 162.0 average acore: 153.71
Episode: 1047 Score: 166.0 average acore: 153.39
Episode: 1048 Score: 166.0 average acore: 153.54
Episode: 1049 Score: 

Episode: 1197 Score: 287.0 average acore: 471.98
Episode: 1198 Score: 199.0 average acore: 470.21
Episode: 1199 Score: 381.0 average acore: 469.67
Episode: 1200 Score: 427.0 average acore: 469.48
Episode: 1201 Score: 298.0 average acore: 468.66
Episode: 1202 Score: 500.0 average acore: 469.17
Episode: 1203 Score: 500.0 average acore: 469.21
Episode: 1204 Score: 317.0 average acore: 467.84
Episode: 1205 Score: 201.0 average acore: 464.99
Episode: 1206 Score: 489.0 average acore: 465.39
Episode: 1207 Score: 500.0 average acore: 466.12
Episode: 1208 Score: 470.0 average acore: 465.82
Episode: 1209 Score: 500.0 average acore: 466.58
Episode: 1210 Score: 300.0 average acore: 465.05
Episode: 1211 Score: 380.0 average acore: 464.03
Episode: 1212 Score: 410.0 average acore: 463.13
Episode: 1213 Score: 469.0 average acore: 462.82
Episode: 1214 Score: 312.0 average acore: 460.94
Episode: 1215 Score: 273.0 average acore: 458.67
Episode: 1216 Score: 441.0 average acore: 458.08
Episode: 1217 Score:

Episode: 1365 Score: 500.0 average acore: 492.46
Episode: 1366 Score: 500.0 average acore: 492.46
Episode: 1367 Score: 500.0 average acore: 492.46
Episode: 1368 Score: 500.0 average acore: 492.46
Episode: 1369 Score: 500.0 average acore: 492.46
Episode: 1370 Score: 387.0 average acore: 491.33
Episode: 1371 Score: 500.0 average acore: 491.33
Episode: 1372 Score: 500.0 average acore: 491.33
Episode: 1373 Score: 500.0 average acore: 491.33
Episode: 1374 Score: 500.0 average acore: 491.33
Episode: 1375 Score: 500.0 average acore: 491.33
Episode: 1376 Score: 500.0 average acore: 491.33
Episode: 1377 Score: 388.0 average acore: 490.21
Episode: 1378 Score: 500.0 average acore: 490.21
Episode: 1379 Score: 500.0 average acore: 490.21
Episode: 1380 Score: 500.0 average acore: 490.21
Episode: 1381 Score: 500.0 average acore: 492.13
Episode: 1382 Score: 500.0 average acore: 492.13
Episode: 1383 Score: 500.0 average acore: 492.13
Episode: 1384 Score: 500.0 average acore: 492.13
Episode: 1385 Score:

Episode: 1534 Score: 111.0 average acore: 78.5
Episode: 1535 Score: 110.0 average acore: 76.96
Episode: 1536 Score: 109.0 average acore: 75.84
Episode: 1537 Score: 121.0 average acore: 75.09
Episode: 1538 Score: 124.0 average acore: 74.33
Episode: 1539 Score: 147.0 average acore: 73.91
Episode: 1540 Score: 103.0 average acore: 73.17
Episode: 1541 Score: 126.0 average acore: 74.01
Episode: 1542 Score: 113.0 average acore: 73.5
Episode: 1543 Score: 111.0 average acore: 73.41
Episode: 1544 Score: 104.0 average acore: 73.4
Episode: 1545 Score: 46.0 average acore: 72.87
Episode: 1546 Score: 117.0 average acore: 73.05
Episode: 1547 Score: 136.0 average acore: 74.15
Episode: 1548 Score: 129.0 average acore: 75.03
Episode: 1549 Score: 99.0 average acore: 74.91
Episode: 1550 Score: 131.0 average acore: 75.25
Episode: 1551 Score: 125.0 average acore: 75.43
Episode: 1552 Score: 113.0 average acore: 75.5
Episode: 1553 Score: 119.0 average acore: 76.28
Episode: 1554 Score: 117.0 average acore: 76.4

Episode: 1704 Score: 224.0 average acore: 116.39
Episode: 1705 Score: 223.0 average acore: 118.22
Episode: 1706 Score: 211.0 average acore: 119.33
Episode: 1707 Score: 216.0 average acore: 120.34
Episode: 1708 Score: 249.0 average acore: 121.61
Episode: 1709 Score: 285.0 average acore: 123.38
Episode: 1710 Score: 219.0 average acore: 125.25
Episode: 1711 Score: 303.0 average acore: 127.36
Episode: 1712 Score: 285.0 average acore: 129.15
Episode: 1713 Score: 321.0 average acore: 132.01
Episode: 1714 Score: 307.0 average acore: 134.06
Episode: 1715 Score: 377.0 average acore: 137.47
Episode: 1716 Score: 360.0 average acore: 139.92
Episode: 1717 Score: 355.0 average acore: 142.35
Episode: 1718 Score: 348.0 average acore: 144.68
Episode: 1719 Score: 411.0 average acore: 147.75
Episode: 1720 Score: 426.0 average acore: 150.86
Episode: 1721 Score: 430.0 average acore: 153.97
Episode: 1722 Score: 412.0 average acore: 156.93
Episode: 1723 Score: 448.0 average acore: 160.27
Episode: 1724 Score:

Episode: 1872 Score: 500.0 average acore: 476.94
Episode: 1873 Score: 500.0 average acore: 476.94
Episode: 1874 Score: 500.0 average acore: 476.94
Episode: 1875 Score: 500.0 average acore: 476.94
Episode: 1876 Score: 354.0 average acore: 475.48
Episode: 1877 Score: 500.0 average acore: 475.48
Episode: 1878 Score: 476.0 average acore: 475.24
Episode: 1879 Score: 500.0 average acore: 475.24
Episode: 1880 Score: 500.0 average acore: 475.24
Episode: 1881 Score: 500.0 average acore: 475.24
Episode: 1882 Score: 500.0 average acore: 475.24
Episode: 1883 Score: 500.0 average acore: 475.24
Episode: 1884 Score: 500.0 average acore: 475.24
Episode: 1885 Score: 500.0 average acore: 475.24
Episode: 1886 Score: 500.0 average acore: 475.24
Episode: 1887 Score: 500.0 average acore: 475.24
Episode: 1888 Score: 396.0 average acore: 474.2
Episode: 1889 Score: 500.0 average acore: 474.2
Episode: 1890 Score: 500.0 average acore: 474.2
Episode: 1891 Score: 480.0 average acore: 474.0
Episode: 1892 Score: 500

Episode: 2041 Score: 271.0 average acore: 462.21
Episode: 2042 Score: 328.0 average acore: 460.49
Episode: 2043 Score: 333.0 average acore: 458.82
Episode: 2044 Score: 500.0 average acore: 458.82
Episode: 2045 Score: 500.0 average acore: 458.82
Episode: 2046 Score: 500.0 average acore: 458.82
Episode: 2047 Score: 453.0 average acore: 458.35
Episode: 2048 Score: 498.0 average acore: 458.33
Episode: 2049 Score: 323.0 average acore: 456.56
Episode: 2050 Score: 410.0 average acore: 455.66
Episode: 2051 Score: 500.0 average acore: 455.66
Episode: 2052 Score: 427.0 average acore: 454.93
Episode: 2053 Score: 369.0 average acore: 453.62
Episode: 2054 Score: 500.0 average acore: 453.62
Episode: 2055 Score: 500.0 average acore: 453.62
Episode: 2056 Score: 500.0 average acore: 453.62
Episode: 2057 Score: 497.0 average acore: 453.59
Episode: 2058 Score: 272.0 average acore: 451.31
Episode: 2059 Score: 382.0 average acore: 450.13
Episode: 2060 Score: 500.0 average acore: 450.13
Episode: 2061 Score:

Episode: 2209 Score: 500.0 average acore: 497.12
Episode: 2210 Score: 500.0 average acore: 497.12
Episode: 2211 Score: 500.0 average acore: 497.12
Episode: 2212 Score: 500.0 average acore: 497.12
Episode: 2213 Score: 500.0 average acore: 497.12
Episode: 2214 Score: 500.0 average acore: 497.12
Episode: 2215 Score: 500.0 average acore: 497.12
Episode: 2216 Score: 500.0 average acore: 497.12
Episode: 2217 Score: 500.0 average acore: 497.12
Episode: 2218 Score: 500.0 average acore: 497.12
Episode: 2219 Score: 500.0 average acore: 497.12
Episode: 2220 Score: 500.0 average acore: 497.12
Episode: 2221 Score: 500.0 average acore: 497.12
Episode: 2222 Score: 500.0 average acore: 497.12
Episode: 2223 Score: 500.0 average acore: 497.12
Episode: 2224 Score: 500.0 average acore: 497.12
Episode: 2225 Score: 500.0 average acore: 497.12
Episode: 2226 Score: 500.0 average acore: 497.12
Episode: 2227 Score: 500.0 average acore: 497.12
Episode: 2228 Score: 500.0 average acore: 497.12
Episode: 2229 Score:

Episode: 2377 Score: 500.0 average acore: 499.3
Episode: 2378 Score: 500.0 average acore: 499.3
Episode: 2379 Score: 500.0 average acore: 499.3
Episode: 2380 Score: 500.0 average acore: 499.3
Episode: 2381 Score: 500.0 average acore: 499.3
Episode: 2382 Score: 500.0 average acore: 499.3
Episode: 2383 Score: 472.0 average acore: 499.02
Episode: 2384 Score: 500.0 average acore: 499.02
Episode: 2385 Score: 500.0 average acore: 499.02
Episode: 2386 Score: 500.0 average acore: 499.02
Episode: 2387 Score: 500.0 average acore: 499.02
Episode: 2388 Score: 500.0 average acore: 499.02
Episode: 2389 Score: 500.0 average acore: 499.02
Episode: 2390 Score: 500.0 average acore: 499.02
Episode: 2391 Score: 500.0 average acore: 499.02
Episode: 2392 Score: 500.0 average acore: 499.02
Episode: 2393 Score: 500.0 average acore: 499.02
Episode: 2394 Score: 500.0 average acore: 499.02
Episode: 2395 Score: 500.0 average acore: 499.02
Episode: 2396 Score: 500.0 average acore: 499.02
Episode: 2397 Score: 500.0

Episode: 2545 Score: 391.0 average acore: 488.62
Episode: 2546 Score: 500.0 average acore: 488.62
Episode: 2547 Score: 500.0 average acore: 488.62
Episode: 2548 Score: 474.0 average acore: 488.36
Episode: 2549 Score: 500.0 average acore: 488.36
Episode: 2550 Score: 500.0 average acore: 488.36
Episode: 2551 Score: 500.0 average acore: 488.36
Episode: 2552 Score: 500.0 average acore: 488.36
Episode: 2553 Score: 500.0 average acore: 488.36
Episode: 2554 Score: 476.0 average acore: 488.12
Episode: 2555 Score: 500.0 average acore: 488.12
Episode: 2556 Score: 500.0 average acore: 488.12
Episode: 2557 Score: 500.0 average acore: 488.12
Episode: 2558 Score: 500.0 average acore: 488.12
Episode: 2559 Score: 401.0 average acore: 487.13
Episode: 2560 Score: 343.0 average acore: 485.56
Episode: 2561 Score: 318.0 average acore: 483.74
Episode: 2562 Score: 377.0 average acore: 482.51
Episode: 2563 Score: 500.0 average acore: 482.51
Episode: 2564 Score: 456.0 average acore: 482.07
Episode: 2565 Score:

Episode: 2713 Score: 500.0 average acore: 498.13
Episode: 2714 Score: 500.0 average acore: 498.13
Episode: 2715 Score: 500.0 average acore: 498.13
Episode: 2716 Score: 500.0 average acore: 498.13
Episode: 2717 Score: 409.0 average acore: 497.22
Episode: 2718 Score: 500.0 average acore: 497.22
Episode: 2719 Score: 500.0 average acore: 497.22
Episode: 2720 Score: 500.0 average acore: 497.22
Episode: 2721 Score: 500.0 average acore: 497.22
Episode: 2722 Score: 500.0 average acore: 497.22
Episode: 2723 Score: 500.0 average acore: 497.22
Episode: 2724 Score: 500.0 average acore: 497.22
Episode: 2725 Score: 500.0 average acore: 497.22
Episode: 2726 Score: 500.0 average acore: 497.22
Episode: 2727 Score: 500.0 average acore: 497.22
Episode: 2728 Score: 500.0 average acore: 497.22
Episode: 2729 Score: 500.0 average acore: 497.22
Episode: 2730 Score: 500.0 average acore: 497.22
Episode: 2731 Score: 495.0 average acore: 497.17
Episode: 2732 Score: 500.0 average acore: 497.17
Episode: 2733 Score:

Episode: 2881 Score: 500.0 average acore: 494.28
Episode: 2882 Score: 500.0 average acore: 494.28
Episode: 2883 Score: 500.0 average acore: 494.28
Episode: 2884 Score: 500.0 average acore: 494.28
Episode: 2885 Score: 500.0 average acore: 494.28
Episode: 2886 Score: 500.0 average acore: 494.28
Episode: 2887 Score: 500.0 average acore: 494.28
Episode: 2888 Score: 500.0 average acore: 494.28
Episode: 2889 Score: 500.0 average acore: 494.28
Episode: 2890 Score: 500.0 average acore: 494.28
Episode: 2891 Score: 500.0 average acore: 494.28
Episode: 2892 Score: 500.0 average acore: 494.28
Episode: 2893 Score: 500.0 average acore: 494.28
Episode: 2894 Score: 500.0 average acore: 494.28
Episode: 2895 Score: 458.0 average acore: 493.86
Episode: 2896 Score: 500.0 average acore: 493.86
Episode: 2897 Score: 500.0 average acore: 493.86
Episode: 2898 Score: 432.0 average acore: 493.18
Episode: 2899 Score: 500.0 average acore: 493.18
Episode: 2900 Score: 500.0 average acore: 493.18
Episode: 2901 Score:

In [19]:
env.reset()
for epoch in range(20):
    done = False
    obs = env.reset()
    score = 0
    while not done:
        env.render()
        action = agent.select_action(obs)
        new_obs,reward,done,info = env.step(action)
        score += reward
        agent.store_rewards(reward)
        obs = new_obs
    agent.train()
    scores.append(score)
    #avg_score = np.mean(scores[-100:])
    print('episode: {} score: {}'.format(epoch+1,score))

  probs = F.softmax(self.policy.forward(state))


episode: 0 score: 500.0
episode: 1 score: 500.0
episode: 2 score: 500.0
episode: 3 score: 500.0
episode: 4 score: 500.0
episode: 5 score: 500.0
episode: 6 score: 500.0
episode: 7 score: 500.0
episode: 8 score: 500.0
episode: 9 score: 500.0
episode: 10 score: 500.0
episode: 11 score: 500.0
episode: 12 score: 500.0
episode: 13 score: 500.0
episode: 14 score: 500.0
episode: 15 score: 500.0
episode: 16 score: 500.0
episode: 17 score: 500.0
episode: 18 score: 500.0
episode: 19 score: 500.0
