In [1]:
import numpy as np
import math
import random
import os

import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.optimizers as opt
import tensorflow.keras.layers as lr
import tensorflow.keras.metrics as metrics
from   tensorflow.keras.models import Sequential

import sklearn as sk
import matplotlib.pyplot as plt
%matplotlib inline

print("Imported packages.")
print(tf.__version__)

Imported packages.
2.1.0


# Loading data

## Loading the data generated through simulation

In [2]:
X_data_step1 = np.load(os.path.join("data", "MergedData", "terranBotInputData.npy"))
print("Loaded training data.")
print("X_data shape:", X_data_step1.shape)

Loaded training data.
X_data shape: (10194, 33)


In [3]:
X_data_step1 = sk.utils.shuffle(X_data_step1, random_state=0)
print("Shuffled data.")

Shuffled data.


# Importing step1 model

This model can predict the result of a fight. It is not perfect, but it is far faster than actually simulate the fight through the game.

We will use it as our lost function for our reinforcment learning training


In [4]:
def step1_model(input_shape, output_shape):
    model = Sequential([
        lr.Dense(units=input_shape, activation='relu', input_dim=input_shape),
        lr.Flatten(),
        lr.Dropout(0.2),
        lr.Dense(units=56, activation='relu'),
        lr.Dropout(0.2),
        lr.Dense(units=28, activation='relu'),
        lr.Dropout(0.2),
        lr.Dense(units=14, activation='relu'),
        lr.Dropout(0.2),
        lr.Dense(units=7, activation='relu'),
        lr.Dense(units=output_shape, activation='tanh')
    ])

    model.compile(optimizer=opt.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0000001),
              loss="mean_absolute_error",
              metrics=['accuracy'])
    return model

In [5]:
step1_model = step1_model(56, 1)
step1_model.load_weights(os.path.join("data", "Models", "model_0.1343982719605969_step1.h5"))

## Defining step2 model

This model takes the units from the Terran, and returns the Zerg units requiered to beat them.

That said, it will take an input array of shape 14 (33-5 for upgrades, divided by 2 as we don't need to give health information), and will return an output array of shape 9.

In [6]:
def create_model(input_shape, output_shape):
    model = Sequential([
        lr.Dense(units=input_shape, activation='relu', input_dim=input_shape),
        lr.Flatten(),
        lr.Dropout(0.2),
        lr.Dense(units=28, activation='relu'),
        lr.Dropout(0.2),
        lr.Dense(units=14, activation='relu'),
        lr.Dropout(0.2),
        lr.Dense(units=10, activation='relu'),
        lr.Dense(units=output_shape, activation='tanh')
    ])

    model.compile(optimizer=opt.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0000001),
              loss="mean_absolute_error",
              metrics=['accuracy'])
    return model

In [7]:
step1_model = create_model(14, 9)

In [8]:
from numba import jit

@jit
def format_array_to_step2(array):
    croped = array[:-5:]
    out = np.zeros((int(croped.shape[0]/2), ), dtype=np.float32)
    for i in range(len(out)):
        out[i] = croped[2*i]
    return out

@jit
def format_array_to_step1(array):
    out = np.zeros((array.shape[0]*2+5, ), dtype=np.float32)
    for i in range(len(array)):
        out[2*i] = array[i]
        out[2*i +1 ] = 0. if array[i] == 0 else 1.
    for i in range(5):
        out[-i] = 1 # We assume that units are not upgraded
    return out

@jit
def format_data_to_step2(data):
    out = np.zeros((data.shape[0], int((data.shape[1]-5)/2)), dtype=np.float32)
    for i in range(len(data)):
       out[i] = format_array_to_step2(data[i])
    return out

@jit
def format_data_to_step1(data):
    out = np.zeros((data.shape[0], (data.shape[1]*2)+5), dtype=np.float32)
    for i in range(len(data)):
       out[i] = format_array_to_step1(data[i])
    return out

In [9]:
# Testing format_data_to_step2
X_data_step2 = format_data_to_step2(X_data_step1)
print(X_data_step2.shape)
print(X_data_step1[0])
print(X_data_step2[0])

# Testing format_data_to_step1
X_data_step1_bk = format_data_to_step1(X_data_step2)
print(X_data_step1_bk.shape)
print(X_data_step1_bk[0])
print(X_data_step2[0])

(10194, 14)
[ 1.  1. 22.  1.  0.  0.  1.  1.  1.  1.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  2.  2.  2.  2.  2.]
[ 1. 22.  0.  1.  1.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
(10194, 33)
[ 1.  1. 22.  1.  0.  0.  1.  1.  1.  1.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  0.  1.  1.  1.  1.]
[ 1. 22.  0.  1.  1.  0.  0.  0.  0.  0.  0.  0.  0.  1.]


# Training a simple model

## Defining model 

The API used is tensorflow.keras as it is powerful and easy to use.

The model itself only has Dense layers since there is no spacial information yet


In [10]:
def create_model(input_shape, output_shape):
    model = Sequential([
        lr.Dense(units=input_shape, activation='relu', input_dim=input_shape),
        lr.Flatten(),
        lr.Dropout(0.2),
        lr.Dense(units=14, activation='relu'),
        lr.Dropout(0.2),
        lr.Dense(units=14, activation='relu'),
        lr.Dropout(0.2),
        lr.Dense(units=14, activation='relu'),
        lr.Dropout(0.2),
        lr.Dense(units=9, activation='relu'),
        lr.Dense(units=output_shape, activation='softmax')
    ])

    model.compile(optimizer=opt.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0000001),
              loss="mean_absolute_error",
              metrics=['accuracy'])
    
    return model

In [11]:
model = create_model(X_data_step2[0].shape[0], 9)

## Defining Reinforcment Learning parameters

Taken from one of keras-rl GitHub's page example : https://github.com/keras-rl/keras-rl/blob/master/examples/dqn_atari.py

In [12]:
from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor, Env
from rl.callbacks import FileLogger, ModelIntervalCheckpoint
import gym

Using TensorFlow backend.


In [13]:
# Not done yet, but implement Processor and Environment following the link bellow
#https://github.com/keras-rl/keras-rl/blob/216c3145f3dc4d17877be26ca2185ce7db462bad/rl/core.py#L515
class CustomProcessor(Processor):
    def process_observation(self, observation):
        return observation
    
    def process_state_batch(self, batch):
        return batch
    
    def process_reward(self, reward):
        return reward

class CustomEnvironment(Env):
    def step(self, action):
        return action
    
    def seed(self, seed=None):
        return 0

In [14]:
env = CustomEnvironment()
np.random.seed(123)
env.seed(123)

0

In [15]:
memory = SequentialMemory(limit=1000000, window_length=4)
processor = CustomProcessor()

policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1.,
                              value_min=.1, value_test=.05, nb_steps=1000000)
nb_actions = 1000

In [16]:
dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory,
               processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000,
               train_interval=4, delta_clip=1.)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

TypeError: len is not well defined for symbolic Tensors. (dense_16/Identity:0) Please call `x.shape` rather than `len(x)` for shape information.

## Training the model

First, there is a check on the availability of a GPU. If you have an NVIDIA GPU on your computer, make sure CUDA 10.0, 10.1 and 10.2 are installed, are they are used by tensorlflow.

In [None]:
NB_STEPS = 1000000

In [None]:
# Training Model on GPU if available
list_devices = tf.config.list_physical_devices('GPU')
print("Num GPUs Available: ", len(list_devices))
print("Using GPU: ", list_devices[0] if len(list_devices) > 0 else False)

In [None]:
# Training model_tilte_1
device = tf.device('/GPU:0') if len(list_devices) > 0 else tf.device('/CPU:0')
with tf.device('/GPU:0'):
    history = dqn.fit(env, callbacks=callbacks, nb_steps=NB_STEPS)

## Evaluating the model

This should be done directly through the game.

## Saving the current model

In [None]:
dqn.save_weights('data/Models/model_step2.h5')
print("Saved model.")