In [2]:
# Import libraries

import tensorflow as tf
import random
import graphical, game
import numpy as np

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [10]:
N_Rows = 10
N_Cols = 8
N_Dir = 2

All_Actions = np.arange(0,N_Rows*N_Cols*N_Dir)

N_All_Actions = len(All_Actions)
N_State = N_Rows * N_Cols + 1 # board game state + moves_left

def get_state(board, moves_left):
    state = np.zeros(len(board) - (N_Rows-1) + 1)
    
    c_state_index = 0
    for s in range(0,len(board)):
        if board[s] != '\n':
            #print(board[s])
            state[c_state_index] = ord(board[s]) - ord('a')
            state[c_state_index] = -1 if state[c_state_index] < 0 else state[c_state_index]
            c_state_index += 1
            
    state[-1] = moves_left
    return state

def get_action_from(move):
    action = np.array(move)
    
    if move[2]:
        action[2] = 1
    else:
        action[2] = 0
    
    out_action = (action[2]) * (N_Rows * N_Cols) + (action[0] * N_Rows + action[1])
    
    return out_action

def get_move_from(action):
    row_col = action % (N_Rows * N_Cols)
    
    dir = int(action / (N_Rows * N_Cols))
    
    return (int(row_col / N_Rows), row_col % N_Rows, dir >= 1)

# test action conversion
num_error_in_conversion = 0
for i in range(0,160):
    a = get_move_from(i)
    ii = get_action_from(a)
    if i != ii:
        num_error_in_conversion += 1
print("number of errors happens in action conversion: ", num_error_in_conversion)

[  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159]
number of errors happens in action conversion:  0


In [11]:
class QNetwork():
    """Q-Value Estimator neural network.

    This network is used for both the Q-Network and the Target Network.
    """

    def __init__(self, scope="estimator", summaries_dir=None):
        self.scope = scope
        # Writes Tensorboard summaries to disk
        self.summary_writer = None
        with tf.variable_scope(scope):
            # Build the graph
            self._build_model()
            if summaries_dir:
                summary_dir = os.path.join(summaries_dir, "summaries_{}".format(scope))
                if not os.path.exists(summary_dir):
                    os.makedirs(summary_dir)
                self.summary_writer = tf.summary.FileWriter(summary_dir)

    def _build_model(self):
        """
        Builds the Tensorflow graph.
        """

        # Placeholders for our input
        # Our inputs are board game state with shape of (None, N_State)
        self.X_pl = tf.placeholder(shape=[None, N_State], dtype=tf.uint8, name="X")
        # The TD target value
        self.y_pl = tf.placeholder(shape=[None], dtype=tf.float32, name="y")
        # Integer id of which action was selected
        self.actions_pl = tf.placeholder(shape=[None], dtype=tf.int32, name="actions")

        X = (tf.to_float(self.X_pl) + 1) / 5.0 # normalize input between (0,1)
        batch_size = tf.shape(self.X_pl)[0]

        # Three fully connected layers
        fully1 = tf.contrib.layers.fully_connected(X, 100, activation_fn=tf.nn.relu)      # 80 to 100
        fully2 = tf.contrib.layers.fully_connected(fully1, 120, activation_fn=tf.nn.relu) # 100 to 120
        fully3 = tf.contrib.layers.fully_connected(fully2, 140, activation_fn=tf.nn.relu) # 120 to 140

        # output layers
        self.predictions = tf.contrib.layers.fully_connected(fully3, N_All_Actions)  # 140 to 160

        # Get the predictions for the chosen actions only
        gather_indices = tf.range(batch_size) * tf.shape(self.predictions)[1] + self.actions_pl
        self.action_predictions = tf.gather(tf.reshape(self.predictions, [-1]), gather_indices)

        # Calculate the loss
        self.losses = tf.squared_difference(self.y_pl, self.action_predictions)
        self.loss = tf.reduce_mean(self.losses)

        # Optimizer Parameters from original paper
        self.optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6)
        self.train_op = self.optimizer.minimize(self.loss, global_step=tf.contrib.framework.get_global_step())

        # Summaries for Tensorboard
        self.summaries = tf.summary.merge([
            tf.summary.scalar("loss", self.loss),
            tf.summary.histogram("loss_hist", self.losses),
            tf.summary.histogram("q_values_hist", self.predictions),
            tf.summary.scalar("max_q_value", tf.reduce_max(self.predictions))
        ])

    def predict(self, sess, s):
        """
        Predicts action values.

        Args:
          sess: Tensorflow session
          s: State input of shape [batch_size, N_State]

        Returns:
          Tensor of shape [batch_size, N_All_Actions] containing the estimated 
          action values.
        """
        return sess.run(self.predictions, { self.X_pl: s })

    def update(self, sess, s, a, y):
        """
        Updates the estimator towards the given targets.

        Args:
          sess: Tensorflow session object
          s: State input of shape [batch_size, N_State]
          a: Chosen actions of shape [batch_size]
          y: Targets of shape [batch_size]

        Returns:
          The calculated loss on the batch.
        """
        feed_dict = { self.X_pl: s, self.y_pl: y, self.actions_pl: a }
        summaries, global_step, _, loss = sess.run(
            [self.summaries, tf.contrib.framework.get_global_step(), self.train_op, self.loss],
            feed_dict)
        if self.summary_writer:
            self.summary_writer.add_summary(summaries, global_step)
        return loss

In [12]:
class QNetworkCopier():
    """
    Copy model parameters of one estimator to another.
    """
    
    def __init__(self, estimator1, estimator2):
        """
        Defines copy-work operation graph.  
        Args:
          estimator1: Estimator to copy the paramters from
          estimator2: Estimator to copy the parameters to
        """
        e1_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator1.scope)]
        e1_params = sorted(e1_params, key=lambda v: v.name)
        e2_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator2.scope)]
        e2_params = sorted(e2_params, key=lambda v: v.name)

        self.update_ops = []
        for e1_v, e2_v in zip(e1_params, e2_params):
            op = e2_v.assign(e1_v)
            self.update_ops.append(op)
            
    def make(self, sess):
        """
        Makes copy.
        Args:
            sess: Tensorflow session instance
        """
        sess.run(self.update_ops)

In [4]:
global num_run

def ai_callback(board, score, moves_left):
    dir = random.randint(0, 1) == 0
    return (random.randint(0, 7 if dir else 6), random.randint(0, 8 if dir else 9), dir)


def transition_callback(board, move, score_delta, next_board, moves_left):
    #print(len(board))
    state = get_state(board, moves_left + 1)
    reward = score_delta
    action = get_action(move)
    n_state = get_state(next_board, moves_left)
    print(move.type)
    print(asd)
    pass # This can be used to monitor outcomes of moves

def end_of_game_callback(boards, scores, moves, final_score):
    global num_run
    num_run = num_run - 1
    if num_run == 0:
        return False
    return True # True = play another, False = Done


if __name__ == '__main__':
    global num_run
    num_run = 2
    speedup = 10.0
    g = graphical.Game(ai_callback, transition_callback, end_of_game_callback, speedup)
    g.run()


Seed: 280033793071808167617286528385611455212


AttributeError: 'tuple' object has no attribute 'type'