In [1]:
import gym
import torch
import torch.nn as nn
import torch.distributions as distributions
import numpy as np
import copy
import random

In [2]:
class PolicyNet(nn.Module):
    def __init__(self, env) -> None:
        super().__init__()
        self.action_dim = env.action_space.n
        self.net = nn.Sequential(
            nn.Linear(4, 100),
            nn.LeakyReLU(),
            nn.Dropout(0.6),
            nn.Linear(100, self.action_dim),
            nn.Softmax(dim=-1)
        )
    def forward(self, state):
        state = torch.tensor(state, dtype=torch.float)
        state = state.unsqueeze(0)
        return self.net(state)

In [11]:
env_name="CartPole-v1"
env = gym.make(env_name)

D = []
Q = PolicyNet(env)
Q_hat = PolicyNet(env)
gamma = 0.99
num_epi = 100
lr = 1e-5
batch_size = 64
C = 3
optimizer = torch.optim.Adam(Q.parameters(), lr=lr)


def grad_descent(loss):
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


def select_action(Q, state, epsilon=0):
    thresh = np.random.rand()
    with torch.no_grad():
        greedy_action = torch.argmax(Q(state))
    if thresh > epsilon:
        return env.action_space.sample()
    else:
        return greedy_action.item()

for i in range(num_epi):
    state = env.reset()
    done = False
    step = 0
    while not done:
        step += 1
        action = select_action(Q, state, epsilon=0.9)
        old_state = state
        state, reward, done, info = env.step(action)
        D.append((old_state, action, reward, state, done))
        batch = random.choices(D, k=batch_size)
        for ele in batch:
            if batch[-1]:
                y = ele[2]
            else:
                y = ele[2] + gamma * torch.max(Q_hat(ele[-2]))
            loss = torch.square(y - Q(ele[0])[0][ele[1]])
            grad_descent(loss)
        if step % C == 0:
            Q_hat = copy.deepcopy(Q)
    print("# Steps for Episode %d: %d" % (i, step))
    

# Steps for Episode 0: 9
# Steps for Episode 1: 13
# Steps for Episode 2: 9
# Steps for Episode 3: 8
# Steps for Episode 4: 11
# Steps for Episode 5: 10
# Steps for Episode 6: 9
# Steps for Episode 7: 9
# Steps for Episode 8: 10
# Steps for Episode 9: 8
# Steps for Episode 10: 10
# Steps for Episode 11: 10
# Steps for Episode 12: 11
# Steps for Episode 13: 9
# Steps for Episode 14: 10
# Steps for Episode 15: 8
# Steps for Episode 16: 10
# Steps for Episode 17: 10
# Steps for Episode 18: 10
# Steps for Episode 19: 9
# Steps for Episode 20: 10
# Steps for Episode 21: 8
# Steps for Episode 22: 9
# Steps for Episode 23: 9
# Steps for Episode 24: 9
# Steps for Episode 25: 11
# Steps for Episode 26: 13
# Steps for Episode 27: 10
# Steps for Episode 28: 10
# Steps for Episode 29: 10
# Steps for Episode 30: 13
# Steps for Episode 31: 9
# Steps for Episode 32: 9
# Steps for Episode 33: 9
# Steps for Episode 34: 9
# Steps for Episode 35: 9
# Steps for Episode 36: 10
# Steps for Episode 37: 9
# S