By: Adrian Ruvalcaba, Donatas Tamosauskas, Zaid Al Nouman

# Testing Implementation

In [2]:
import random
import pickle

import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
import numpy as np
from torchsummary import summary

from dumbbells.dataset import *
from dumbbells.agent import *
from dumbbells.q_functions import *
from dumbbells.models import *
from dumbbells.trainer import *
from Trained_weights import *

%load_ext autoreload
%autoreload 2

### Let's create an agent with no trained weights

In [9]:
ARCH = DeepFcDqn
ENV = "CartPole-v1"
MEMORY_BUFFER = 1024
GAMMA = 0.999
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 200 


dataset = Dataset(ENV, MEMORY_BUFFER)
arch = ARCH(dataset.state_space.shape[0], dataset.action_space.n)
q_func = DnnQFunction(arch, GAMMA)
agent = Agent(
        q_func, 
        dataset.action_space, 
        dataset.state_space, 
        dataset.reward_space, 
        EPS_START, 
        EPS_END, 
        EPS_DECAY, 
)

### Now let's load pre-trained weights for our Fully Connect Neural Network!

In [11]:
agent.q.load_weights("Trained_weights/FNNModel/model/weights190014-11-34")

Now let's see how our model does...

In [19]:
data = Dataset(ENV, 256, produce_gif=True)

done = False
state = data.reset()

for i in range(1000):
    if done:
        state = data.reset()
        done = False
        continue
    action = agent.action(state.unsqueeze(0))
    action = action[0][0].item()
    state, reward, done = data.step(action)

data.env.close()
# Uncomment the line below to obtain a gif of the agent playing
# data.save_frames_as_gif(filename="FullyConnectedNNAgent.gif")

### Next, let's load pre-trained weights for our Convoluted Neural Network

In [25]:
ARCH = CnnDqn
ENV = "CartPole-v1"
MEMORY_BUFFER = 1024
GAMMA = 0.999
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 100 

dataset = HistoryDataset(ENV, MEMORY_BUFFER)
arch = ARCH(dataset.state_space.shape[0], dataset.action_space.n)
q_func = DnnQFunction(arch, GAMMA)
agent = Agent(
        q_func, 
        dataset.action_space, 
        dataset.state_space, 
        dataset.reward_space, 
        EPS_START, 
        EPS_END, 
        EPS_DECAY, 
)
agent.q.load_weights("Trained_weights/CNNModel/model/weights190000-20-17")

Now let's see how the CNN performs in comparison...

In [27]:
data = HistoryDataset(ENV, 256, produce_gif=True)

done = False
state = data.reset()

for i in range(1000):
    if done:
        state = data.reset()
        done = False
        continue
    action = agent.action(state.unsqueeze(0))
    action = action[0][0].item()
    state, reward, done = data.step(action)

data.env.close()
# Uncomment the line below to obtain a gif of the agent playing
# data.save_frames_as_gif(filename="CNNAgent.gif")