In [1]:
import gym
import random
import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [2]:
class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.device = device
        # Neural Net Layers
        self.fc1 = nn.Linear(state_size, 24)
        self.fc2 = nn.Linear(24, 24)
        self.out = nn.Linear(24,action_size)
        # Random Uniform
        torch.nn.init.uniform_(self.out.weight,-1e-3,1e-3)

    def forward(self,x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        q = self.out(x)
        return q

In [3]:
# Hyper-parameters
class DQNAgent:
    def __init__(self, state_size, action_size, device):
        self.state_size = state_size
        self.action_size= action_size
        self.device = device

        # Neural Network Architecture
        self.model        = DQN(self.state_size, self.action_size).to(self.device)
        self.model.load_state_dict(torch.load('./save_model/cartpole_dqn_TORCH'))

    def get_action(self, state):
        state = torch.FloatTensor([state]).to(self.device)
        # Exploration and Exploitation
        return self.model.forward(state).max(1)[1].view(1, 1)

In [4]:
%matplotlib tk

ENV_NAME = 'CartPole-v1'
EPISODES = 5
# if gpu is to be used
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("DEVICE : ", device)

if __name__ == "__main__":
    env = gym.make(ENV_NAME)
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n
    print('Env Name : ',ENV_NAME)
    print('States {}, Actions {}'
            .format(state_size, action_size))

    agent = DQNAgent(state_size, action_size, device)

    scores, episodes, epsilons, losses = [], [], [], []
    score_avg = 0
    
    for e in range(EPISODES):
        done = False
        score = 0

        state = env.reset()
        
        while not done:
            env.render()

            # Interact with env.
            action = agent.get_action(state)
            next_state, reward, done, info = env.step(action.item())
            state = next_state

            # 
            score += reward
            if done:
                print('epi: {:3d} | score {:3.2f}'.format(e+1, score))
                scores = np.append(scores,score)
    print('Avg. score {:4.2f}'.format(np.mean(scores)))
    env.close()


DEVICE :  cuda
Env Name :  CartPole-v1
States 4, Actions 2
epi:   1 | score 500.00
epi:   2 | score 500.00
epi:   3 | score 500.00
epi:   4 | score 500.00
epi:   5 | score 500.00
epi:   6 | score 500.00
epi:   7 | score 500.00
epi:   8 | score 500.00
epi:   9 | score 500.00
epi:  10 | score 500.00
Avg. score 500.00
