# Deep Q learning

In [1]:
import random
from time import sleep 
from engine import TetrisEngine

In [2]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, BatchNormalization, Activation, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.constraints import max_norm
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import load_model

In [3]:
## use pip install --upgrade --force-reinstall  git+https://github.com/Bosmansc/tetris_openai.git
## not pip install  pip install keras-rl2, this is not compatible with tetris environment

from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.policy import GreedyQPolicy
from rl.memory import SequentialMemory

In [4]:
env = TetrisEngine()

In [9]:
def build_model_conv(actions):
    
    ## edit state based on code Seyedomid
    #resize(state)
    
    # Network defined by the Deepmind paper
    model = tf.keras.models.Sequential()
    
    model.add(Conv2D(32, (3, 3), padding='same', kernel_initializer='he_uniform', 
                     kernel_constraint=max_norm(4), input_shape=(1, 8, 6)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(64, (3, 3), padding='same', kernel_initializer='he_uniform'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(64, (3, 3), padding='same', kernel_initializer='he_uniform'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    #model.add(MaxPooling2D(pool_size=(2,2)))
    
    
    # end of convolutional layers, start of 'hidden' dense layers 
    model.add(Flatten())
    model.add(Dense(128, kernel_initializer='he_uniform', kernel_constraint=max_norm(3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    
    # Final dense layer
    model.add(Dense(actions, activation='linear'))
    
    return model

In [10]:
actions = 6
model = build_model_conv(actions)
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 1, 8, 32)          1760      
_________________________________________________________________
batch_normalization_4 (Batch (None, 1, 8, 32)          128       
_________________________________________________________________
activation_4 (Activation)    (None, 1, 8, 32)          0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 1, 8, 64)          18496     
_________________________________________________________________
batch_normalization_5 (Batch (None, 1, 8, 64)          256       
_________________________________________________________________
activation_5 (Activation)    (None, 1, 8, 64)          0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 1, 8, 64)         

In [11]:
def build_agent(model, actions):
    # policy = GreedyQPolicy() ## hyperparm, GreedyQPolicy is used in paper: https://www.elen.ucl.ac.be/Proceedings/esann/esannpdf/es2008-118.pdf
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=1000, target_model_update=1e-2)
    return dqn

In [12]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=10000, visualize=True)

Training for 10000 steps ...
Interval 1 (0 steps performed)




done, took 169.113 seconds


<tensorflow.python.keras.callbacks.History at 0x7f50c039ba30>

In [16]:
scores = dqn.test(env, nb_episodes=100, visualize=True, nb_max_episode_steps=50)
print(np.mean(scores.history['episode_reward']))

Testing for 100 episodes ...
Episode 1: reward: 71.600, steps: 50
Episode 2: reward: 71.600, steps: 50
Episode 3: reward: 71.600, steps: 50
Episode 4: reward: 71.600, steps: 50
Episode 5: reward: 71.600, steps: 50
Episode 6: reward: 71.600, steps: 50
Episode 7: reward: 71.600, steps: 50
Episode 8: reward: 71.600, steps: 50
Episode 9: reward: 71.600, steps: 50
Episode 10: reward: 71.600, steps: 50
Episode 11: reward: 71.600, steps: 50
Episode 12: reward: 71.600, steps: 50
Episode 13: reward: 71.600, steps: 50
Episode 14: reward: 71.600, steps: 50
Episode 15: reward: 71.600, steps: 50
Episode 16: reward: 71.600, steps: 50
Episode 17: reward: 71.600, steps: 50
Episode 18: reward: 71.600, steps: 50
Episode 19: reward: 71.600, steps: 50
Episode 20: reward: 71.600, steps: 50
Episode 21: reward: 71.600, steps: 50
Episode 22: reward: 71.600, steps: 50
Episode 23: reward: 71.600, steps: 50
Episode 24: reward: 71.600, steps: 50
Episode 25: reward: 71.600, steps: 50
Episode 26: reward: 71.600, st