# Load and test trained model

## Load libraries

In [1]:
import numpy as np
import torch

import torch.nn as nn
from enduro_lstm import *

In [2]:
def conf_cuda(use_cuda):
    
    if use_cuda:
        
        # torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
        is_cuda = torch.cuda.is_available()

        # If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
        if is_cuda:
            device = torch.device("cuda")
            print("GPU is available")
        else:
            device = torch.device("cpu")
            print("GPU not available, CPU used")
    else:
        device = torch.device("cpu")
        print("Selected CPU")
    return device

In [3]:
device = conf_cuda(False)

Selected CPU


In [4]:
device

device(type='cpu')

In [5]:
device.type

'cpu'

## Set configurations

In [6]:
import os
dir_path = "models/CNN_sem_bnorm_m1_f1to1030_epoch5000_H100" + "/"
arr = os.listdir(f'./{dir_path}')
for i in range(len(arr)):
    print(arr[i])

CNN_sem_bnorm_m1_f1to1030_epoch5000_H100
loss_file.txt
train_loss_arr.npz


In [7]:
model_path = dir_path + "CNN_sem_bnorm_m1_f1to1030_epoch5000_H100"

In [8]:
start_match = 1
end_match = 1

hidden_neurons = 100
zigzag = False
is_softmax = True

start_frame = 1
end_frame = 1030

In [9]:
data_path = r"../1-generate/data/"

use_cuda = False

In [10]:
if zigzag:
    output_size = 2
else:
    output_size = 9

## Load trained model

In [11]:
%load_ext autoreload
%autoreload 2

In [12]:
from setup_model import *
from setup_model_types import *

In [13]:
def load_checkpoint(model, filename='checkpoint.pth.tar'):
    
    print("=> loading checkpoint '{}'".format(filename))
    checkpoint = torch.load(filename, map_location=device)
    model.load_state_dict(checkpoint['state_dict'])

    return model, checkpoint['optimizer']['state'][0]['step'], checkpoint['losslogger']

In [14]:
model = CNNLSTMModel(device=device, input_size=100, output_size=output_size, hidden_dim=hidden_neurons, n_layers=1)
model, last_epoch, last_logger = load_checkpoint(model, model_path)

=> loading checkpoint 'models/CNN_sem_bnorm_m1_f1to1030_epoch5000_H100/CNN_sem_bnorm_m1_f1to1030_epoch5000_H100'


In [15]:
ACTIONS_LIST = get_actions_list(zigzag=zigzag)

In [16]:
num_of_frames_arr = []
frames_arr = []
actions_arr = []

start_frame = 1
end_frame = 1000

for m in range(start_match, end_match + 1):
    
    num_of_frames, frames, actions, rewards, lifes = load_npz(data_path, m)
    frames = frames[start_frame - 1:end_frame, 30:130, 10:110]
    actions = actions[start_frame - 1:end_frame]
    
    action_one_hot = [prepare_action_data(i, ACTIONS_LIST) for i in actions]
    actions = np.array(action_one_hot)
    actions = actions.reshape(len(actions), -1)
    
    frames_arr.append(frames)
    actions_arr.append(actions)
    num_of_frames_arr.append(end_frame - start_frame + 1) 

X_train = np.array(frames_arr)/255
Y_train = np.array(actions_arr)
num_of_frames_arr = np.array(num_of_frames_arr)

X_train = torch.tensor(X_train).float()
Y_train = torch.tensor(Y_train).float()

Successfully loaded NPZ.


In [17]:
X_train = X_train.reshape(1, end_frame-start_frame+1, -1)

In [18]:
X_train.shape, Y_train.shape

(torch.Size([1, 1000, 10000]), torch.Size([1, 1000, 9]))

## Prepare cell with trained model 

In [19]:
conv = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(5,5))
activation = nn.ReLU()
# bnorm = nn.BatchNorm2d(num_features=16)
pool = nn.MaxPool2d(kernel_size=(2,2))

lstmcell = nn.LSTMCell(48*48*16, hidden_neurons)
linear = nn.Linear(hidden_neurons, output_size)
output = nn.Softmax()

In [20]:
output

Softmax(dim=None)

In [21]:
lstmcell.weight_ih = model.lstm.weight_ih_l0
lstmcell.weight_hh = model.lstm.weight_hh_l0
lstmcell.bias_hh = model.lstm.bias_hh_l0
lstmcell.bias_ih = model.lstm.bias_ih_l0
linear.weight = model.fc.weight
linear.bias = model.fc.bias

In [22]:
hx = torch.zeros(1, hidden_neurons)
cx = torch.zeros(1, hidden_neurons)

In [23]:
print(model.lstm.weight_ih_l0.shape)
print(model.lstm.weight_hh_l0.shape)
print(model.lstm.bias_ih_l0.shape)
print(model.lstm.bias_hh_l0.shape)
print(model.fc.weight.shape)
print(model.fc.bias.shape)

torch.Size([400, 36864])
torch.Size([400, 100])
torch.Size([400])
torch.Size([400])
torch.Size([9, 100])
torch.Size([9])


## Testing outputs of model

In [24]:
X_train = torch.tensor(X_train).float()
Y_train = torch.tensor(Y_train).float()

  X_train = torch.tensor(X_train).float()
  Y_train = torch.tensor(Y_train).float()


In [25]:
hx = torch.zeros(1, hidden_neurons)
out_arr = []
for i in range(10):
    step_input = X_train[0][i]
    step_input = step_input.view(1, 1, 100, 100)
    step_input = model.conv1(step_input)
    print("conv:", step_input)
    step_input = model.activation(step_input)
    print("activation:", step_input)
    #step_input = model.bnorm(step_input)
    #print("bnorm:", step_input)
    step_input = model.pool(step_input)
    print("pool:", step_input)
    
    step_input = step_input.reshape(1, -1)
    hx, cx = lstmcell(step_input, (hx,cx))
    out = linear(hx)
    out = output(out)
    out_arr.append(out)

conv: tensor([[[[0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808],
          [0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808],
          [0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808],
          ...,
          [0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808],
          [0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808],
          [0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808]],

         [[0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403],
          [0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403],
          [0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403],
          ...,
          [0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403],
          [0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403],
          [0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403]],

         [[0.2280, 0.2280, 0.2280,  ..., 0.2280, 0.2280, 0.2280],
          [0.2280, 0.2280, 0.2280,  ..., 0.2280, 0.2280, 0.2280],
          [0.2280, 0.2280, 0.2280,  

  out = output(out)


conv: tensor([[[[0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808],
          [0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808],
          [0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808],
          ...,
          [0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808],
          [0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808],
          [0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808]],

         [[0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403],
          [0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403],
          [0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403],
          ...,
          [0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403],
          [0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403],
          [0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403]],

         [[0.2280, 0.2280, 0.2280,  ..., 0.2280, 0.2280, 0.2280],
          [0.2280, 0.2280, 0.2280,  ..., 0.2280, 0.2280, 0.2280],
          [0.2280, 0.2280, 0.2280,  

pool: tensor([[[[0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808],
          [0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808],
          [0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808],
          ...,
          [0.1808, 0.1808, 0.3279,  ..., 0.1808, 0.1808, 0.1808],
          [0.1808, 0.1808, 0.3279,  ..., 0.1808, 0.1808, 0.1808],
          [0.1808, 0.3279, 0.1727,  ..., 0.1808, 0.1808, 0.1808]],

         [[0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403],
          [0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403],
          [0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403],
          ...,
          [0.1403, 0.1403, 0.4159,  ..., 0.1403, 0.1403, 0.1403],
          [0.1403, 0.1403, 0.4159,  ..., 0.1403, 0.1403, 0.1403],
          [0.1403, 0.4159, 0.2305,  ..., 0.1403, 0.1403, 0.1403]],

         [[0.2280, 0.2280, 0.2280,  ..., 0.2280, 0.2280, 0.2280],
          [0.2280, 0.2280, 0.2280,  ..., 0.2280, 0.2280, 0.2280],
          [0.2280, 0.2280, 0.2280,  

conv: tensor([[[[0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808],
          [0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808],
          [0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808],
          ...,
          [0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808],
          [0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808],
          [0.1808, 0.1808, 0.1808,  ..., 0.1808, 0.1808, 0.1808]],

         [[0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403],
          [0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403],
          [0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403],
          ...,
          [0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403],
          [0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403],
          [0.1403, 0.1403, 0.1403,  ..., 0.1403, 0.1403, 0.1403]],

         [[0.2280, 0.2280, 0.2280,  ..., 0.2280, 0.2280, 0.2280],
          [0.2280, 0.2280, 0.2280,  ..., 0.2280, 0.2280, 0.2280],
          [0.2280, 0.2280, 0.2280,  

In [26]:
out_arr[:10]

[tensor([[9.9182e-01, 4.5343e-03, 1.7647e-04, 9.7072e-04, 2.7072e-04, 5.2381e-05,
          1.8754e-05, 1.2401e-03, 9.1518e-04]], grad_fn=<SoftmaxBackward>),
 tensor([[9.9883e-01, 6.2321e-04, 2.6093e-05, 1.4674e-04, 2.7098e-05, 2.8344e-06,
          1.3074e-06, 2.8864e-04, 5.7078e-05]], grad_fn=<SoftmaxBackward>),
 tensor([[9.9890e-01, 5.7025e-04, 3.6225e-05, 7.1913e-05, 2.1959e-05, 1.5834e-06,
          1.2475e-06, 3.7157e-04, 2.6882e-05]], grad_fn=<SoftmaxBackward>),
 tensor([[9.9730e-01, 1.7682e-03, 7.8330e-05, 7.6111e-05, 4.3274e-05, 2.3565e-06,
          2.2044e-06, 7.0234e-04, 2.7827e-05]], grad_fn=<SoftmaxBackward>),
 tensor([[9.9542e-01, 3.3559e-03, 1.1437e-04, 8.0090e-05, 6.0328e-05, 3.1790e-06,
          3.2225e-06, 9.3030e-04, 3.4194e-05]], grad_fn=<SoftmaxBackward>),
 tensor([[9.9719e-01, 1.7615e-03, 9.1617e-05, 5.3741e-05, 4.7000e-05, 2.0609e-06,
          2.1640e-06, 8.3622e-04, 1.8665e-05]], grad_fn=<SoftmaxBackward>),
 tensor([[9.9764e-01, 1.3652e-03, 8.2940e-05, 4.8146

In [27]:
model.eval()

CNNLSTMModel(
  (conv1): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1))
  (activation): ReLU()
  (pool): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (lstm): LSTM(36864, 100, batch_first=True)
  (fc): Linear(in_features=100, out_features=9, bias=True)
  (out): Softmax(dim=None)
)

In [28]:
out2 = model(X_train.view(1, 1000, 100, 100), torch.FloatTensor([1000]))

  out = self.out(out)


In [29]:
out2[:10]

tensor([[9.9182e-01, 4.5343e-03, 1.7647e-04, 9.7072e-04, 2.7072e-04, 5.2382e-05,
         1.8754e-05, 1.2401e-03, 9.1518e-04],
        [9.9883e-01, 6.2321e-04, 2.6093e-05, 1.4674e-04, 2.7098e-05, 2.8344e-06,
         1.3074e-06, 2.8864e-04, 5.7078e-05],
        [9.9890e-01, 5.7025e-04, 3.6225e-05, 7.1913e-05, 2.1959e-05, 1.5834e-06,
         1.2475e-06, 3.7157e-04, 2.6882e-05],
        [9.9730e-01, 1.7682e-03, 7.8330e-05, 7.6111e-05, 4.3274e-05, 2.3565e-06,
         2.2044e-06, 7.0234e-04, 2.7827e-05],
        [9.9542e-01, 3.3559e-03, 1.1437e-04, 8.0090e-05, 6.0328e-05, 3.1790e-06,
         3.2225e-06, 9.3030e-04, 3.4194e-05],
        [9.9719e-01, 1.7615e-03, 9.1617e-05, 5.3741e-05, 4.7000e-05, 2.0609e-06,
         2.1640e-06, 8.3622e-04, 1.8665e-05],
        [9.9764e-01, 1.3652e-03, 8.2940e-05, 4.8146e-05, 4.2503e-05, 1.7625e-06,
         1.8956e-06, 8.0289e-04, 1.5518e-05],
        [9.9698e-01, 1.9197e-03, 9.5038e-05, 5.9038e-05, 4.8941e-05, 2.2260e-06,
         2.4208e-06, 8.7454e-0

In [30]:
Y_train = Y_train.reshape(-1, len(ACTIONS_LIST))

In [31]:
acertou = 0
errou = 0
for i in range(1000):
    if torch.argmax(Y_train[i]) == torch.argmax(out_arr[i]):
        acertou += 1
    else:
        errou += 1
        
print(acertou)
print(errou)

IndexError: list index out of range

In [None]:
acertou/1000

## Play Gym Enduro

In [32]:
import gym
import time
from PIL import Image

In [33]:
if zigzag:
        
    ACTIONS = {
        "right": 2,
        "left": 3,
    }

else:

    ACTIONS = {
        "noop": 0,
        "accelerate": 1,
        "right": 2,
        "left": 3,
        "break": 4,
        "right_break": 5,
        "left_break": 6,
        "right_accelerate": 7,
        "left_accelerate": 8,
    }

In [34]:
y_min, y_max, x_min, x_max = 30, 130, 10, 110
shape_of_single_frame = (1, (y_max-y_min),(x_max-x_min))

In [35]:
sleep_time = 0.05

In [35]:
env = gym.make("Enduro-v0")
frame = env.reset()
reward, action, done, info = 0, 0, False, {'ale.lives': 0}

hx = torch.zeros(1, hidden_neurons)
cx = torch.zeros(1, hidden_neurons)

env.render()
time.sleep(1)

for _ in range(1000):
    
    time.sleep(sleep_time)
    env.render()
    
    frame = frame[y_min:y_max, x_min:x_max]

    frame = Image.fromarray(frame)
    frame = frame.convert("L")
    
    frame = np.asarray(frame)
    frame = torch.tensor(frame)/255
    
    frame = frame.view(1, 1, 100, 100)
    frame = model.pool(model.activation(model.conv1(frame)))
    
    frame = frame.reshape(1, -1)
    hx, cx = lstmcell(frame, (hx, cx))
    out = linear(hx)
    action = output(out)
    
    action = list(ACTIONS.values())[torch.argmax(action, axis=1)]
    print(action)
    frame, reward, done, info = env.step(action)

  action = output(out)


0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


KeyboardInterrupt: 

Error in callback <bound method AutoreloadMagics.post_execute_hook of <autoreload.AutoreloadMagics object at 0x7f7ca95c2640>> (for post_execute):



KeyboardInterrupt



KeyboardInterrupt: 