In [1]:
from keras.optimizers import RMSprop
import keras
import numpy as np
from model import get_predefined_model
from ddql import DDQNNGame
from preprocessing import scale_color, wrap_deepmind
import gym
import random
import matplotlib.pyplot as plt
import os
import tensorflow as tf
#!pip install gym[atari]

Using TensorFlow backend.


In [2]:
# %% Set env
WRAPPER = "DM" 
env = gym.make('SpaceInvaders-v0')
path = os.getcwd()

if WRAPPER == "DM":
    env = wrap_deepmind(env, frame_stack=True)
    INPUT_SHAPE = (84, 84, 4)
else:
    from gym.wrappers import AtariPreprocessing
    env = AtariPreprocessing(env)
    INPUT_SHAPE = (84, 84, 1)

# requirements:
# Open AI Gym (pip install gym[all])
# OpenCV
# JSAnimation - Only for Jupyter Display
# ImageIO 

In [3]:
# %% Instantiate model
# little_a

cnn = get_predefined_model("little_a", INPUT_SHAPE)

Instructions for updating:
Colocations handled automatically by placer.
6
added conv
added conv
added maxpool
added flatten
added dense
added dense
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 41, 41, 16)        1040      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 39, 39, 32)        4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 19, 19, 32)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 11552)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               1478784   
Total params: 1,484,464
Trainable params: 1,484,464
Non-trainable params: 0
__________________________________________________

In [11]:
# %% Setup

# /home/usuario/Documentos/github/reinforcement-learning-ic/
# sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
path = os.getcwd()
paths = {"model":path+"/model/model.h5"}
#assert os.path.isdir(path+"/atari/model/"), "Corregir el path del modelo" 

In [12]:
exploration_max = 1.0
exploration_min = 0.1
exploration_steps = 850000
exploration_decay = (exploration_max-exploration_min)/exploration_steps


params = {"gamma":0.99, "memory_size": 900000, "batch_size": 16,
            "training_frequency": 4, "target_network_update_frequency": 40000,
            "model_persistence_update_frequency": 10000,
            "replay_start_size": 500 ,"exploration_test": 0.02,
            "exploration_max": exploration_max, 
            "exploration_min": exploration_min,
            "exploration_steps": exploration_steps, 
            "exploration_decay": exploration_decay}

# Para poder estudiar si se estaba entrenando tuve que cambiar dos parametros
# batch_size lo baje de 32 a 16
# replay_start_size lo baje de 50000 a 500
# Ademas cambio y agregue muchos mas episodios y corridas

train = True

game_model = DDQNNGame(cnn, env, paths, params, train)

# Agregue los logs a TensorBoard pero no me funciono
#keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0, batch_size=32, write_graph=True)

env.reset()
frameshistory = []
done = False
total_step_limit = 100000
total_run_limit = 200
render = False #True
clip = True

run = 0
total_step = 0

In [14]:
# %% Main loop
exit = 0
print("Partida número: ", run)
print(game_model._weigths_snapshot())
while exit == 0:
    
    run += 1
    current_state = env.reset()
    if WRAPPER != "DM":
        current_state = np.reshape(current_state, (84, 84, 1))

    step = 0
    score = 0
    while exit == 0:
        if total_step >= total_step_limit:
            print ("Reached total step limit of: " + str(total_step_limit))
            # No sería mejor un break?
            exit = 1
        total_step += 1
        step += 1

        if render:
            env.render()

        action = game_model.move(current_state)
        next_state, reward, terminal, info = env.step(action)
        if WRAPPER != "DM":
            next_state = np.reshape(next_state, (84, 84, 1))

        # next_state = scale_color(next_state)

        if clip:
            reward = np.sign(reward)
        score += reward
        
        game_model.remember(current_state, action, reward, next_state, terminal)
        current_state = next_state

        game_model.step_update(total_step)

        if terminal:
            # game_model.save_run(score, step, run)
            if run % 10 == 0:
                weights_snap = game_model._weigths_snapshot()
                print("Partida número: ", run)
                print("Pesos modelo base: ", weights_snap[0])
                print("Pesos modelo base: ", weights_snap[1])
                print(score)
            game_model._save_model()
            break
           
    # Corto por episodios
    if total_run_limit is not None and run >= total_run_limit:
        print ("Reached total run limit of: " + str(total_run_limit))
        exit = 1
        

Partida número:  37
([-9.708551, -49.99063, 285.0838], [-8.4394455, -46.736122, 201.50139])
Partida número:  40
Pesos modelo base:  [-9.655106, -50.0331, 290.31064]
Pesos modelo base:  [-8.4394455, -46.736122, 201.50139]
0.0
Partida número:  50
Pesos modelo base:  [-9.883835, -50.668262, 311.74274]
Pesos modelo base:  [-8.4394455, -46.736122, 201.50139]
1.0
Partida número:  60
Pesos modelo base:  [-10.154356, -51.169815, 331.70572]
Pesos modelo base:  [-8.4394455, -46.736122, 201.50139]
4.0
Partida número:  70
Pesos modelo base:  [-10.190048, -52.26076, 356.06393]
Pesos modelo base:  [-8.4394455, -46.736122, 201.50139]
9.0
Partida número:  80
Pesos modelo base:  [-10.460072, -52.764046, 380.57678]
Pesos modelo base:  [-8.4394455, -46.736122, 201.50139]
9.0
Partida número:  90
Pesos modelo base:  [-10.543872, -53.592365, 396.06177]
Pesos modelo base:  [-8.4394455, -46.736122, 201.50139]
2.0
Partida número:  100
Pesos modelo base:  [-10.53868, -54.577896, 418.5523]
Pesos modelo base:  [-

In [22]:
%load_ext tensorboard

The tensorboard module is not an IPython extension.


In [21]:
%tensorboard --logdir logs

UsageError: Line magic function `%tensorboard` not found.


In [28]:
game_model.base_model.__dict__

{'name': 'sequential_1',
 'trainable': True,
 '_is_compiled': False,
 '_expects_training_arg': False,
 '_initial_weights': None,
 'supports_masking': False,
 'optimizer': <keras.optimizers.RMSprop at 0x7f23c6129400>,
 '_updates': [],
 '_losses': [],
 '_per_input_losses': {},
 '_per_input_updates': {},
 '_layers': [<keras.engine.input_layer.InputLayer at 0x7f23c5b35320>,
  <keras.layers.convolutional.Conv2D at 0x7f23c6118198>,
  <keras.layers.convolutional.Conv2D at 0x7f23c5b35400>,
  <keras.layers.pooling.MaxPooling2D at 0x7f23c5293630>,
  <keras.layers.core.Flatten at 0x7f23c5293748>,
  <keras.layers.core.Dense at 0x7f23c52b9978>],
 '_outbound_nodes': [],
 '_inbound_nodes': [<keras.engine.base_layer.Node at 0x7f23c60f5da0>],
 '_is_graph_network': True,
 '_uses_inputs_arg': True,
 'outputs': [<tf.Tensor 'dense_1/Relu:0' shape=(?, 128) dtype=float32>],
 'inputs': [<tf.Tensor 'conv2d_1_input:0' shape=(?, 84, 84, 4) dtype=float32>],
 '_built': True,
 '_build_input_shape': None,
 '_compute

In [40]:
game_model.base_model.layers[1]._trainable_weights[0]._snapshot.__dict__

{'_op': <tf.Operation 'conv2d_2/kernel/read' type=Identity>,
 '_value_index': 0,
 '_dtype': tf.float32,
 '_tf_output': <tensorflow.python.pywrap_tensorflow_internal.TF_Output; proxy of <Swig Object of type 'TF_Output *' at 0x7f23c529e660> >,
 '_shape_val': TensorShape([Dimension(3), Dimension(3), Dimension(16), Dimension(32)]),
 '_consumers': [],
 '_id': 45}

In [None]:
# https://stackoverflow.com/questions/43715047/keras-2-x-get-weights-of-layer
import tensorflow as tf
from tensorflow.contrib.keras import layers

input_x = tf.placeholder(tf.float32, [None, 10], name='input_x')    
dense1 = layers.Dense(10, activation='relu')
y = dense1(input_x)

weights = dense1.get_weights()

In [60]:
# resp 2 https://stackoverflow.com/questions/43715047/keras-2-x-get-weights-of-layer
weigths = [] 
# lista de listas que contiene capa por capa y en cada capa contiene en el primer elemento los pesos y los bias
for layer in game_model.base_model.layers: 
    print(layer.get_config())
    weigths.append(layer.get_weights())
    weigths[4][0].sum()
    #print(layer.get_config(), layer.get_weights())

{'name': 'conv2d_1', 'trainable': True, 'batch_input_shape': (None, 84, 84, 4), 'dtype': 'float32', 'filters': 16, 'kernel_size': (4, 4), 'strides': (2, 2), 'padding': 'valid', 'data_format': 'channels_last', 'dilation_rate': (1, 1), 'activation': 'relu', 'use_bias': True, 'kernel_initializer': {'class_name': 'VarianceScaling', 'config': {'scale': 1.0, 'mode': 'fan_avg', 'distribution': 'uniform', 'seed': None}}, 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, 'kernel_regularizer': None, 'bias_regularizer': None, 'activity_regularizer': None, 'kernel_constraint': None, 'bias_constraint': None}
{'name': 'conv2d_2', 'trainable': True, 'batch_input_shape': (None, 84, 84, 4), 'dtype': 'float32', 'filters': 32, 'kernel_size': (3, 3), 'strides': (1, 1), 'padding': 'valid', 'data_format': 'channels_last', 'dilation_rate': (1, 1), 'activation': 'relu', 'use_bias': True, 'kernel_initializer': {'class_name': 'VarianceScaling', 'config': {'scale': 1.0, 'mode': 'fan_avg', 'distribution'

In [65]:
weigths[3]

[]

In [62]:
weigths[4][0].sum()

IndexError: list index out of range