In [1]:
import tensorflow as tf
import keras
from keras.models import Model, load_model
from keras.utils import multi_gpu_model
from keras.layers import *
import keras.backend as K
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append('src') # Fix for jupyter
import src.emulator as emulator
import src.emulator_utils as emulator_utils
import src.emulator_vis as emulator_vis
%matplotlib inline
from IPython.display import clear_output
import time
import multiprocessing
from multiprocessing import Event
from multiprocessing import Queue
from multiprocessing import Process as Thread
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="3"

Using TensorFlow backend.


# Main params

In [2]:
INPUT_SIZE = (16, 16, 4) # Map size fixed to 16x16 (2 to 3 players)
N_ACTIONS = 4

# Define the Layers Blocks

In [3]:
# Convolutional Block
def conv_block(in_layer, name, filters=128, kernel_size=(3,3), bn=True, relu=True):
    l = Conv2D(filters, kernel_size, padding='same', name = name)(in_layer)
    if bn:
        l = BatchNormalization(name = name + '_bn')(l)
    if relu:
        l = Activation('relu', name = name + '_relu')(l)
    return l

# Residual Block
def residual_conv(in_layer, idx, filters=128, kernel_size=(3,3), bn=True, relu=True):
    name = 'res_' + str(idx)
    # Full conv block of pre-defined shape
    l = conv_block(in_layer, name + '_conv1', filters, kernel_size=(3,3), bn=True, relu=True)
    # Second block with skip connection
    l = Conv2D(filters, kernel_size, padding='same', name = name + '_conv2')(l)
    if bn:
        l = BatchNormalization(name = name + '_conv2_bn')(l)
    l = Concatenate()([in_layer, l]) # Skip conn.
    if relu:
        l = Activation('relu', name = name + '_relu')(l)
    return l

def value_head(in_layer):
    l = conv_block(in_layer, 'value_head', filters=1, kernel_size=(1,1))
    l = Flatten(name = 'value_flatten')(l)
    l = Dense(128, name = 'value_dense')(l)
    l = Activation('relu', name = 'value_relu')(l)
    l = Dense(1, name = 'value', activation='tanh')(l) # Value output
    return l

def policy_head(in_layer):
    l = conv_block(in_layer, 'policy_head', filters=2, kernel_size=(1,1))
    l = Flatten(name = 'policy_flatten')(l)
    l = Dense(N_ACTIONS, name = 'policy', activation='softmax')(l) # Policy output
    return l

# Model

In [4]:
def create_model(cpu = False):
    def declare_model():
        n_residual = 16

        input_layer = Input(INPUT_SIZE)
        l = conv_block(input_layer, 'conv')
        for i in range(n_residual):
            l = residual_conv(l, idx=i + 1)

        policy = policy_head(l)
        value = value_head(l)

        alphabot = Model(input_layer, [policy, value])
        return alphabot
    
    if cpu:
        with tf.device('/cpu'):
            alphabot = declare_model()
        return alphabot
    
    alphabot = declare_model()
    return alphabot

In [5]:
def copy_weights(): # Gpu is used for training, we clone its weights
    alphabot_cpu.set_weights(alphabot.get_weights)

In [6]:
alphabot = create_model()
alphabot.summary(line_length=112)

________________________________________________________________________________________________________________
Layer (type)                        Output Shape             Param #       Connected to                         
input_1 (InputLayer)                (None, 16, 16, 4)        0                                                  
________________________________________________________________________________________________________________
conv (Conv2D)                       (None, 16, 16, 128)      4736          input_1[0][0]                        
________________________________________________________________________________________________________________
conv_bn (BatchNormalization)        (None, 16, 16, 128)      512           conv[0][0]                           
________________________________________________________________________________________________________________
conv_relu (Activation)              (None, 16, 16, 128)      0             conv_bn[0][0]        

In [7]:
n_players = 2
# Simulate N games before each epoch
n_games = 10 #10_000
# Games to be stored n_games * K
k = 4
complete_history = []

num_threads = 1 #48

In [8]:
def simulate_games():
    parallel_sim() # Parallel Games
    while not history_buffer.full():
        # We have to predict until buffer is full
        
    print('Simulated', n_games)

IndentationError: expected an indented block (<ipython-input-8-3c2dd33ac844>, line 5)

In [9]:
def training_cycle():    
    # Simulate n_games
    simulate_games()

In [25]:
t = time.time()
cycles = 1

for i in range(cycles):
    training_cycle()
time.time() - t

0

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [16]:
def map_to_state(gmap, gmap_old, p_alive):
    if gmap_old == None:
        gmap_old = np.full_like(gmap, -1)
    
    n_alive = sum(p_alive[np.where(p_alive == 1)])
    states = np.empty((n_alive, *INPUT_SIZE), dtype=np.int)
    
    idx_alive = 0
    for idx, alive in enumerate(p_alive):
        if alive == 0: # Skip dead player
            continue
            
        # Player is alive, we collect its state
        states[idx_alive] = process_map(idx, gmap, gmap_old)

def process_map(idx, gmap, gmap_old):
    pov_me = np.zeros((*INPUT_SIZE[:2], 1), dtype=np.int)
    pov_me_last = np.zeros((*INPUT_SIZE[:2], 1), dtype=np.int)
    pov_not_me = np.zeros((*INPUT_SIZE[:2], 1), dtype=np.int)
    pov_not_me_last = np.zeros((*INPUT_SIZE[:2], 1), dtype=np.int)
    
    pov_me[np.where(gmap == idx)] = 1 # Set to 1 where bot is
    pov_me_last[np.where(gmap_old == idx)] = 1
    
    pov_not_me[np.where(gmap not in [idx, -1])] = 1 # Set to 1 where bot is not
    pov_not_me_last[np.where(gmap_old not in [idx, -1])] = 1
    
    return np.concatenate([pov_me, pov_me_last, pov_not_me, pov_not_me_last], axis=2)

In [10]:
def ask_predict(id, x):
    # Adds to queue id and data from process
    processable_buffer.put((id, x))

def sim():
    # Will be used for correct communication with main
    process_id = multiprocessing.current_process().name
    
    while True:
        # Simulate the game, if a prediction is needed use ask_predict
        game = emulator.Game(n_players)
        gmap = game.map # Access map manually on first step
        
        while True:
            ask_predict(process_id, ...#TODO)
            raw_actions = pipe.recv()
            print('Raw Actions ', raw_actions)
            gmap, p_alive, n_alive, score, game_end = game.step(actions)
            if game_end:
                break
        try:
            history_buffer.put_nowait(0)
        except:
            break
                    
def stop_simulation():
    global workers
    global history_buffer
    global processable_buffer
    
    if 'workers' in globals() and len(workers) != 0:
        for worker in workers:
            worker.terminate()
    workers = []
    
    # Then we empty the queues
    del history_buffer
    del processable_buffer

def parallel_sim():
    global workers
    global history_buffer
    global processable_buffer
    
    if 'workers' in globals() and len(workers) != 0:
        stop_threads()
    
    history_buffer = Queue(n_games) # This numbers can be tweaked
    processable_buffer = Queue(num_threads * n_players)
    
    workers = []
    for i in range(num_threads):
        worker = Thread(target=sim)
        worker.daemon = False
        worker.start()
        workers.append(worker)