In [1]:
from functools import partial
import numpy as np
import numpy.random as rnd
import tensorflow as tf
import os

  from ._conv import register_converters as _register_converters


In [2]:
%run Dino_Classes.ipynb

Constants

In [3]:
width = 80
height = 80
len_epoch = 100000000
num_actions = len(Environment.actions)

Args

In [4]:
checkpoint_nr = None
checkpoint_hz = 120
refresh_hz = 100
update_target_network_hz = 20
training = True #True if train else False
visualize = True
# log_dir = './trained-model/'  #dir with trained model
log_dir = './logs/' #dir to train

In [5]:
def play(agent, env, preprocessor):
    # load pretrained model,
    name = log_dir + "rex.ckpt"
    if checkpoint_nr is not None:
        name = name + "-" + str(checkpoint_nr)

    agent.load(name)
    agent.explore_prob = 0.0

    while True:
        frame, _, crashed = env.start_game()
        frame = preprocessor.process(frame)
        state = preprocessor.get_initial_state(frame)

        while not crashed:
            action, _  = agent.act(state)
            next_frame, reward, crashed = env.do_action(action)
            print("action: {}".format(env.actions[action]))
            next_frame = preprocessor.process(next_frame)
            next_state = preprocessor.get_updated_state(next_frame)

            state = next_state

        print("Crash")

In [6]:
def train(agent, env, preprocessor):
    #train new model
    agent.update_target_network()

    epoch = 0
    while True:
        epoch += 1
        print("\nEpoch: ", epoch)

        frame, _ , crashed = env.start_game()
        frame = preprocessor.process(frame)
        state = preprocessor.get_initial_state(frame)
        ep_steps, ep_reward = 0, 0

        while not crashed:

            action, explored = agent.act(state)
            next_frame, reward, crashed = env.do_action(action)
            #'*'action if it was randomly chosen (not produced by network)
            action_str = Environment.actions[action] + ["", "*"][explored]
            print("action: {}\t crashed: {}".format(action_str, crashed))
            next_frame = preprocessor.process(next_frame)
            next_state = preprocessor.get_updated_state(next_frame)
            agent.remember(state, action, reward, next_state, crashed)

            ep_steps += 1
            ep_reward += reward

            state = next_state

        agent.replay(epoch)
        agent.explore_less()

        if epoch % update_target_network_hz == 0:
            agent.update_target_network()

        if epoch % checkpoint_hz == 0:
            agent.save(epoch)
            time.sleep(5)

        if epoch % refresh_hz == 0:
            env.refresh_game()


In [7]:
def main(log_dir):
    log_dir = log_dir if log_dir.endswith('/') else log_dir + '/'
    if training and os.path.exists(log_dir):
        print("PATH FOR STORING RESULTS ALREADY EXISTS(Results will be overwritten).")
        exit(1)
    elif not training and not os.path.exists(log_dir):
        print("PATH DOES NOT EXISTS. TRAINED MODEL NOT FOUND.")
        exit(1)
    elif training and not os.path.exists(log_dir):
        os.makedirs(log_dir)

    session = tf.Session()  
    env = Environment("127.0.0.1", 9090)
    agent = DDQNAgent(session, num_actions, width, height, log_dir, None)
    preprocessor = Preprocessor(width, height)

    if training:
        train(agent, env, preprocessor)
    else:
        play(agent, env, preprocessor)

In [8]:
main(log_dir)
#python3 -m http.server  8000

INFO:tensorflow:Restoring parameters from ./trained-model/rex.ckpt
Model restored: ./trained-model/rex.ckpt


`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.


action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: UP
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: UP
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: UP
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: UP
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: UP
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: UP
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FO

action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: DOWN
action: FORTH
action: FORTH
action: FORTH
action: UP
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: UP
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: UP
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: UP
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: UP
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: UP
action: UP
action: FORTH
action: UP
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: FORTH
action: UP
action: UP
action: F

KeyboardInterrupt: 