In [1]:
import os
import random

import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K
import seaborn as sb

tf.enable_eager_execution()

In [2]:
raw_data = []
with open("Rock_Paper_Scissors_Raw.csv", encoding="utf-8") as f:
    f.readline() # header
    for line in f:
        raw_data.append([int(c) for c in line.split(",")])

In [3]:
game_data = []
current_game_id = None
current_game = []

for r in raw_data:
    if r[2] == 0 or r[3] ==0:
        continue
    if current_game_id != r[0]:
        game_data.append(current_game)
        current_game_id = r[0]
        current_game = []
    current_game.append((r[2], r[3]))

game_data = game_data[1:]

In [4]:
game_data_simulated = []
r_move = lambda : random.choice((1,2,3))

# length=1 (constant)
game_data_simulated += [[(move, r_move()) for i in range(random.randint(6,20))]
                        for move in (1,2,3) for i_g in range(1000)]

# length=2 (alternating)
game_data_simulated += [sum(([(move1, r_move()), (move2, r_move())] for i in range(random.randint(4,10))), [])
                        for move1 in (1,2,3) for move2 in (1,2,3) for i_g in range(500)]

# length=3 
game_data_simulated += [sum(([(move1, r_move()), (move2, r_move()), (move3, r_move())]
                             for i in range(random.randint(4,7))), [])
                        for move1 in (1,2,3) for move2 in (1,2,3) for move3 in (1,2,3) for i_g in range(150)]

# length=4
game_data_simulated += [sum(([(move1, r_move()), (move2, r_move()), (move3, r_move()), (move4, r_move())]
                             for i in range(random.randint(3,5))), [])
                        for move1 in (1,2,3) for move2 in (1,2,3) for move3 in (1,2,3) for move4 in (1,2,3) 
                        for i_g in range(50)]

In [5]:
def enc_game(game, invert=False):
    game_enc = []
    # we add a zero input to the beginning
    # as the model should be able to predict the first move
    game_enc.append([0] * 6)
    for p1, p2 in game:
        if invert:
            p1, p2 = p2, p1
        game_enc.append([0] * 6)
        game_enc[-1][p1 - 1] = 1
        game_enc[-1][p2 - 1 + 3] = 1
    return game_enc

In [6]:
game_data_one_hot = []
for game in game_data:
    # for real games we can use both players to train
    for invert in (True, False):
        game_data_one_hot.append(enc_game(game, invert=invert))
for game in game_data_simulated:
    # for simulated games we don't train on the player playing randomly
    game_data_one_hot.append(enc_game(game))

In [7]:
# for batchsize=1 training
# training with batchsize 1 is very slow but the resulting models at least make sense

X = [game[:-1] for game in game_data_one_hot]
Y = [[move[:3] for move in game[1:]] for game in game_data_one_hot]
X_np = [np.array(x).astype(np.float32) for x in X]
Y_np = [np.array(y).astype(np.float32) for y in Y]
dataset = tf.data.Dataset.from_generator(lambda: zip(X_np, Y_np), output_types=(tf.dtypes.float32, tf.dtypes.float32))

Instructions for updating:
tf.py_func is deprecated in TF V2. Instead, use
    tf.py_function, which takes a python function which manipulates tf eager
    tensors instead of numpy arrays. It's easy to convert a tf eager tensor to
    an ndarray (just call tensor.numpy()) but having access to eager tensors
    means `tf.py_function`s can use accelerators such as GPUs as well as
    being differentiable using a gradient tape.
    


In [8]:
test_size = 2000
dev_size = 2000
dataset = dataset.shuffle(100000)
test_dataset = dataset.take(test_size)
dev_dataset = dataset.skip(test_size).take(dev_size)
train_dataset = dataset.skip(test_size + dev_size)
train_size = len(X) - test_size - dev_size

In [9]:
# I tried various ways to get training with batch_size > 1 to work (see rps_experiments.ipynb) 
# but there was always some kind of problem.
# Just padding gave bad results (maybe because the average game length is far below the maximum), and the
# networks trained with masking behaved very weirdly, giving exploding probabilities after a few moves.
# Thankfully the networks and dataset are small enough that training with batch_size=1 doesn't take too long (<24h).
batch_size = 1
train_dataset_batched = train_dataset.batch(batch_size, drop_remainder=True).repeat()

In [10]:
def build_deep_model(state_dims, batch_size, stateful=False):
    return tf.keras.Sequential([
        #tf.keras.layers.Masking(mask_value=-1.0, batch_input_shape=[batch_size, None, 6]),
        tf.keras.layers.SimpleRNN(state_dims[0], batch_input_shape=[batch_size, None, 6],
                                 return_sequences=True,  stateful=stateful, activation="softsign"),
    ] + [tf.keras.layers.SimpleRNN(s, return_sequences=True, stateful=stateful, activation="softsign") 
         for s in state_dims[1:]
        ] + [
        tf.keras.layers.Dense(3),
        tf.keras.layers.Softmax()
    ])

In [13]:
deep_model_3l = build_deep_model([10,10,10], 1)
opt = tf.keras.optimizers.Adam(lr=0.001)
deep_model_3l.compile(opt, loss=tf.keras.losses.categorical_crossentropy,metrics = ["accuracy"])

In [14]:
deep_model_3l.fit(train_dataset_batched, steps_per_epoch=train_size // batch_size, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1ad083ba320>

In [15]:
tf.keras.backend.set_value(deep_model_3l.optimizer.lr, 0.0001)

In [16]:
deep_model_3l.fit(train_dataset_batched, steps_per_epoch=train_size // batch_size, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1ad18b6dda0>

In [17]:
tf.keras.backend.set_value(deep_model_3l.optimizer.lr, 0.00001)

In [None]:
deep_model_3l.fit(train_dataset_batched, steps_per_epoch=train_size // batch_size, epochs=3)

Epoch 1/3

In [18]:
deep_model_3l.save_weights("deep_3l_s10_softsign_moresim.h5")

In [21]:
deep_model_3l.evaluate(dev_dataset.batch(batch_size), steps=dev_size // batch_size)



[1.0928817737363279, 0.3873162]