# Load and test trained model

## Load libraries

In [1]:
import numpy as np
import torch
import torch.nn as nn
from enduro_lstm import *

In [2]:
device = conf_cuda(True)

GPU not available, CPU used


In [3]:
device

device(type='cpu')

In [5]:
device.type

'cpu'

## Set configurations

In [6]:
data_path = r"../1-generate/data/"
match = 45
use_cuda = False
zigzag = False

## Load trained model

In [93]:
newpath = 'models/play_m45to46_f1to1000_epoch10000_H500/' + 'play_m45to46_f1to1000_epoch10000_H500'

In [94]:
model = Model(device=device, input_size=20400, output_size=9, hidden_dim=500, n_layers=1)
model.load_state_dict(torch.load(newpath, map_location=device))
model.eval()

Model(
  (lstm): LSTM(20400, 500, batch_first=True)
  (fc): Linear(in_features=500, out_features=9, bias=True)
  (sigmoid): Sigmoid()
)

## Prepare cell with trained model 

In [97]:
lstmcell = nn.LSTMCell(20400, 500)
linear = nn.Linear(500, 9)
sigmoid = nn.Sigmoid()

In [98]:
lstmcell.weight_ih = model.lstm.weight_ih_l0
lstmcell.weight_hh = model.lstm.weight_hh_l0
lstmcell.bias_hh = model.lstm.bias_hh_l0
lstmcell.bias_ih = model.lstm.bias_ih_l0

In [99]:
linear.weight = model.fc.weight
linear.bias = model.fc.bias

In [100]:
hx = torch.zeros(1, 500)
cx = torch.zeros(1, 500)

In [101]:
print(model.lstm.weight_ih_l0.shape)
print(model.lstm.weight_hh_l0.shape)
print(model.lstm.bias_ih_l0.shape)
print(model.lstm.bias_hh_l0.shape)
print(model.fc.weight.shape)
print(model.fc.bias.shape)

torch.Size([2000, 20400])
torch.Size([2000, 500])
torch.Size([2000])
torch.Size([2000])
torch.Size([9, 500])
torch.Size([9])


## Testing outputs of model

In [102]:
ACTIONS_LIST = get_actions_list(zigzag=zigzag)

In [103]:
num_of_frames, frames, actions, rewards, lifes = load_npz(data_path, match)

Successfully loaded NPZ.


In [104]:
X_train = frames.reshape(1, len(frames) ,-1)/255

In [105]:
action_one_hot = [prepare_action_data(i, ACTIONS_LIST) for i in actions]
Y_train = np.array(action_one_hot)
Y_train = Y_train.reshape(1, len(Y_train), -1)

In [106]:
X_train = torch.tensor(X_train).float()
Y_train = torch.tensor(Y_train).float()

In [107]:
output = []
for i in range(120):
    step_input = X_train[0][i]
    step_input = step_input.reshape(1, -1)
    hx, cx = lstmcell(step_input, (hx, cx))
    out = linear(hx)
    out = sigmoid(out)
    output.append(out)

In [49]:
step_input.shape

torch.Size([1, 20400])

In [45]:
for i in range(120):
    print(str(i) + ' - ' + str(output[i]))

0 - tensor([[9.8464e-01, 6.3757e-03, 3.1810e-03, 7.0127e-03, 8.4442e-03, 6.9136e-04,
         1.1428e-03, 2.6194e-04, 3.2737e-03]], grad_fn=<SigmoidBackward>)
1 - tensor([[9.9504e-01, 1.0786e-03, 1.0695e-03, 2.8307e-03, 3.1207e-03, 1.0626e-04,
         1.4665e-04, 4.4985e-05, 1.0269e-03]], grad_fn=<SigmoidBackward>)
2 - tensor([[9.9508e-01, 1.3055e-03, 6.1178e-04, 1.7390e-03, 2.3334e-03, 7.9974e-05,
         1.5671e-04, 2.6572e-05, 6.4801e-04]], grad_fn=<SigmoidBackward>)
3 - tensor([[9.9379e-01, 2.0991e-03, 4.4174e-04, 1.3332e-03, 2.0393e-03, 7.7213e-05,
         1.8832e-04, 2.0792e-05, 4.3777e-04]], grad_fn=<SigmoidBackward>)
4 - tensor([[9.9594e-01, 8.1084e-04, 8.6053e-04, 2.2886e-03, 2.5752e-03, 7.6249e-05,
         1.0747e-04, 3.1953e-05, 8.0991e-04]], grad_fn=<SigmoidBackward>)
5 - tensor([[9.9597e-01, 8.0191e-04, 8.6148e-04, 2.3058e-03, 2.5583e-03, 7.6190e-05,
         1.0689e-04, 3.1950e-05, 8.1505e-04]], grad_fn=<SigmoidBackward>)
6 - tensor([[9.9596e-01, 7.9574e-04, 8.6478e-0

## Play Gym Enduro

In [108]:
import gym
import time
from PIL import Image

In [109]:
if zigzag:
        
    ACTIONS = {
        "right": 2,
        "left": 3,
    }

else:

    ACTIONS = {
        "noop": 0,
        "accelerate": 1,
        "right": 2,
        "left": 3,
        "break": 4,
        "right_break": 5,
        "left_break": 6,
        "right_accelerate": 7,
        "left_accelerate": 8,
    }

In [110]:
y_min, y_max, x_min, x_max = 25, 195, 20, 140
shape_of_single_frame = (1, (y_max-y_min),(x_max-x_min))

In [111]:
sleep_time = 0.05

In [112]:
env = gym.make("Enduro-v0")
frame = env.reset()
reward, action, done, info = 0, 0, False, {'ale.lives': 0}

hx = torch.zeros(1, 500)
cx = torch.zeros(1, 500)

while(True):
    
    time.sleep(sleep_time)
    env.render()
    
    frame = frame[y_min:y_max, x_min:x_max]

    frame = Image.fromarray(frame)
    frame = frame.convert("L")
    
    frame = np.asarray(frame)
    frame = frame.reshape(1, -1)
    frame = torch.tensor(frame)/255
    
    hx, cx = lstmcell(frame, (hx, cx))
    out = linear(hx)
    action = sigmoid(out)
    
    action = list(ACTIONS.values())[torch.argmax(action, axis=1)]
    frame, reward, done, info = env.step(action)

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/ryo/.local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3437, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-112-3251920a06ec>", line 10, in <module>
    time.sleep(sleep_time)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/ryo/.local/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2061, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'KeyboardInterrupt' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/ryo/.local/lib/python3.8/site-packages/IPython/core/ultratb.py", line 1101, in get_records
    return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)
  File "/home/ryo/.local/lib/python3.8/site-packages/IPython/core

TypeError: object of type 'NoneType' has no len()