In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="-1"
import numpy as np
import tensorflow as tf
tf.enable_eager_execution()

In [2]:
def build_simple_model(state_dim, batch_size, stateful=False):
    return tf.keras.Sequential([
        #tf.keras.layers.Masking(mask_value=-1.0, batch_input_shape=[batch_size, None, 6]),
        tf.keras.layers.SimpleRNN(state_dim, batch_input_shape=[batch_size, None, 6],
                                 return_sequences=True,  stateful=stateful, activation="softsign"),
        tf.keras.layers.Dense(3),
        tf.keras.layers.Softmax()
    ])

In [23]:
def build_deep_model(state_dims, batch_size, stateful=False):
    return tf.keras.Sequential([
        #tf.keras.layers.Masking(mask_value=-1.0, batch_input_shape=[batch_size, None, 6]),
        tf.keras.layers.SimpleRNN(state_dims[0], batch_input_shape=[batch_size, None, 6],
                                 return_sequences=True,  stateful=stateful, activation="softsign"),
    ] + [tf.keras.layers.SimpleRNN(s, return_sequences=True, stateful=stateful, activation="softsign") 
         for s in state_dims[1:]
        ] + [
        tf.keras.layers.Dense(3),
        tf.keras.layers.Softmax()
    ])

In [50]:
deep_model_3l = build_deep_model([10,10,10], 1, stateful=True)
deep_model_3l.load_weights("deep_3l_s10_softsign_sim.h5")
unquantized_weights = deep_model_3l.get_weights()

In [36]:
def quantize_array_8bit(arr):
    max_val = np.max(arr)
    min_val = np.min(arr)
    scale_factor = (max_val - min_val) / 255
    normalized_arr = (arr - min_val) / scale_factor
    uint8_arr = normalized_arr.astype(np.uint8)
    quantized_float_arr = uint8_arr.astype(np.float32) * scale_factor + min_val
    return uint8_arr, min_val, scale_factor, quantized_float_arr

In [63]:
quantized_weights = [quantize_array_8bit(w)[-1] for w in deep_model_3l.get_weights()]
deep_model_3l.set_weights(quantized_weights)

In [37]:
# print quantized weights for copying into model_weights.c
[(q[0].shape, q[0].reshape(-1), q[1], q[2]) for q in (quantize_array_8bit(w) for w in unquantized_weights)]

[((6, 10),
  array([187, 224, 157, 152,   0, 214, 151, 187, 157, 170, 135,  92,  55,
         237, 228,  76, 130, 252, 200,  82, 203, 161, 255, 167, 172, 157,
          22,  51,  88, 162, 161, 162, 150, 178, 148, 161,  98, 153, 123,
         145, 160, 167, 149, 178, 149, 197, 122, 154, 159, 142, 164, 175,
         149, 175, 144,  65, 123, 150, 158, 147], dtype=uint8),
  -4.629354,
  0.03016486822390089),
 ((10, 10),
  array([199,  98, 123,  53, 158, 119, 126, 170, 170, 229, 171, 171, 138,
         149,  85, 133, 148, 113,  96, 126, 209, 174, 213,  91, 200, 166,
         181, 229,  88, 138, 156, 255, 212,  56, 140, 139,  91, 116,  93,
         167,  48, 110, 160, 150, 224,  79, 220, 210,  94, 157, 132, 132,
         138, 134, 142, 126, 133, 147, 128, 146, 144,  85, 133, 119, 146,
         114, 169, 136, 173, 131,  42, 119, 148, 115, 143, 131, 239, 234,
          61, 172, 144, 155, 120, 150, 152, 205, 135, 105,  72, 148, 149,
          96,  63,   0, 176, 125, 174, 146,  99, 104], dtype=u

In [51]:
def make_input_vector(opponent_move=None, model_move=None):
    if opponent_move is None and model_move is None:
        return np.zeros((1,1,6)).astype(np.float32)
    elif opponent_move is None or model_move is None:
        raise ValueError
    move_ids = {"r": [1,0,0], "p": [0,1,0], "s": [0,0,1]}
    return np.array([[move_ids[opponent_move] + move_ids[model_move]]]).astype(np.float32)

In [87]:
deep_model_3l.reset_states()

In [88]:
deep_model_3l(make_input_vector())

<tf.Tensor: id=49491, shape=(1, 1, 3), dtype=float32, numpy=array([[[0.3558706 , 0.35276127, 0.2913682 ]]], dtype=float32)>

In [89]:
deep_model_3l.layers[0].states

[<tf.Variable 'simple_rnn_8/Variable:0' shape=(1, 10) dtype=float32, numpy=
 array([[ 0.15546353,  0.31109032,  0.37262443, -0.00143206,  0.4737557 ,
         -0.15750432,  0.2921558 ,  0.12020159, -0.16659214,  0.26985827]],
       dtype=float32)>]

In [90]:
deep_model_3l(make_input_vector("r", "p"))

<tf.Tensor: id=49684, shape=(1, 1, 3), dtype=float32, numpy=array([[[0.3299879 , 0.33203024, 0.33798188]]], dtype=float32)>

In [91]:
deep_model_3l.layers[0].states

[<tf.Variable 'simple_rnn_8/Variable:0' shape=(1, 10) dtype=float32, numpy=
 array([[ 0.5596891 ,  0.70770854,  0.46054533, -0.39471355, -0.7354919 ,
          0.67643493,  0.4983232 ,  0.726524  , -0.45127013,  0.38980037]],
       dtype=float32)>]

In [109]:
# define functions that evaluate the model using exactly the same operations
# as on the microcontroller, to check correctness of the code there

softsign = lambda x: x/(1+np.abs(x))

def numpy_eval_recurrent_layer_like_attiny(x_in, ws, state):
    int_state = state@ws[1]
    print("state@ws[1]")
    print(int_state)
    state, int_state = int_state, state
    
    int_state = x_in@ws[0]
    print("x@ws[0]")
    print(int_state)
    state = state + int_state
    
    state = state + ws[2]
    print("state+ws[2] (state bias)")
    print(state)
    state = softsign(state)
    print("softsign(state)")
    print(state)
    return state

def numpy_eval_deep_model_like_attiny(x, ws, states, temperature=1.0):
    print("LAYER 1")
    states[0] = numpy_eval_recurrent_layer_like_attiny(x, ws[:3], states[0])
    for i in range(1, len(states)):
        print(f"LAYER {i+1}")
        states[i] = numpy_eval_recurrent_layer_like_attiny(states[i-1], ws[i*3:i*3+3], states[i])
    print("OUTPUT LAYER")
    output = states[-1]@ws[-2]
    print(f"states[{i}]@W_out")
    print(output)
    output = output + ws[-1]
    print("output + out_bias")
    print(output)
    output = output * temperature
    print("output * temperature")
    print(output)
    probs = np.exp(output) / np.sum(np.exp(output),axis=-1)
    return states, probs

In [110]:
states = [np.zeros((1,10)) for i in range(3)]
ws = deep_model_3l.get_weights()
states, probs = numpy_eval_deep_model_like_attiny(np.zeros((1,6)), ws, states)
print(probs)
print()
print()
print("SECOND EVAL")
print()
states, probs = numpy_eval_deep_model_like_attiny(np.array([[1,0,0,0,1,0]]), ws, states)
print(probs)

LAYER 1
state@ws[1]
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
x@ws[0]
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
state+ws[2] (state bias)
[[ 0.18408149  0.45156911  0.59394157 -0.00143412  0.90025806 -0.18694972
   0.41274026  0.13662401 -0.19989267  0.36959711]]
softsign(state)
[[ 0.15546353  0.31109033  0.37262443 -0.00143206  0.47375569 -0.15750433
   0.2921558   0.12020159 -0.16659213  0.26985827]]
LAYER 2
state@ws[1]
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
x@ws[0]
[[-0.29058412 -0.1792779   0.2809158  -1.10307785 -0.18096764 -1.28420414
  -1.39919403  0.42065452  0.33583989 -2.06540322]]
state+ws[2] (state bias)
[[-0.6321368   0.53675776  1.16355789 -1.19109907  1.32463735 -1.40844414
  -0.83527722  1.25983397  0.54481264 -1.97957432]]
softsign(state)
[[-0.38730626  0.34927936  0.53779836 -0.54360804  0.56982537 -0.58479419
  -0.45512319  0.55748962  0.35267231 -0.66438159]]
LAYER 3
state@ws[1]
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
x@ws[0]
[[-5.66822344e-01  5.99073583e+00 -1.49520326e+00 -1.19448984e+00
  

In [56]:
def test_model(model):
    # interactively test a model
    moves = ["r", "p", "s"]
    counter_move = {"r": "p", "p": "s", "s": "r"}
    model_wins = player_wins = 0
    model.reset_states()
    prediction = model(make_input_vector())
    while True:
        player_move = input("r/p/s:")
        if player_move not in ("r", "p", "s"):
            break
        model_move = counter_move[moves[np.argmax(prediction)]]
        print(f"player plays {player_move}")
        print(f"model plays {model_move}")
        if model_move == counter_move[player_move]:
            model_wins += 1
            print(f"model wins! (model win / tot win = {100*model_wins / (model_wins + player_wins) if model_wins else 0:.2f}%)")
        elif player_move == counter_move[model_move]:
            player_wins += 1
            print(f"Player wins! (model win / tot win = {100*model_wins / (model_wins + player_wins) if model_wins else 0:.2f}%)")
        else:
            print("It's a draw!")
        print(prediction)
        prediction = model(make_input_vector(player_move, model_move))

In [57]:
test_model(deep_model_3l)

r/p/s:r
player plays r
model plays p
model wins! (model win / tot win = 100.00%)
tf.Tensor([[[0.3558687  0.34965733 0.29447404]]], shape=(1, 1, 3), dtype=float32)
r/p/s:r
player plays r
model plays p
model wins! (model win / tot win = 100.00%)
tf.Tensor([[[0.33916703 0.32936612 0.33146688]]], shape=(1, 1, 3), dtype=float32)
r/p/s:r
player plays r
model plays p
model wins! (model win / tot win = 100.00%)
tf.Tensor([[[0.41741738 0.28312516 0.2994575 ]]], shape=(1, 1, 3), dtype=float32)
r/p/s:p
player plays p
model plays p
It's a draw!
tf.Tensor([[[0.5196548  0.28064114 0.1997041 ]]], shape=(1, 1, 3), dtype=float32)
r/p/s:p
player plays p
model plays p
It's a draw!
tf.Tensor([[[0.35841417 0.2936207  0.34796515]]], shape=(1, 1, 3), dtype=float32)
r/p/s:p
player plays p
model plays p
It's a draw!
tf.Tensor([[[0.37515318 0.34275842 0.28208843]]], shape=(1, 1, 3), dtype=float32)
r/p/s:p
player plays p
model plays s
model wins! (model win / tot win = 100.00%)
tf.Tensor([[[0.3269663  0.44456187