# Set numerical variables


In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
import time

import environment as env
import briscola_players as players
import simulate_games as sim_games
import module_next_states as next_states

import importlib

number_of_rounds = 20

semi = ['bastoni', 'coppe', 'denari', 'spade']
numeri = ['2', '3', '4', '5', '6', '7', '8', '9', '0', '1']
my_points = [ 0, 10, 0, 0, 0, 0, 2, 3, 4, 11]
my_cards = pd.DataFrame({i:[1 for j in numeri] for i in semi}, index = numeri)
my_cards['points'] = my_points
sorted_cards = my_cards.sort_values(by = ['points'], ascending = False)
sorted_cards

cards_in_string = [numero+seme for numero in numeri for seme in semi]

all_cards_in_strings = [i + j for i in numeri for j in semi]
all_cards_in_strings.append('None')
sorted_cards_in_string = sorted(all_cards_in_strings)
all_cards_in_strings_set = set(all_cards_in_strings)


from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer(classes = sorted_cards_in_string)



## Training functions

In [4]:
loss_fn = tf.keras.losses.MeanSquaredError()
optimizer_dense = tf.keras.optimizers.Adam()
gamma = .8

def training_with_dense(model, batch_of_games, target_q_val, past_loss = None):
    '''
    batch of games is of shape (batch_size, 20, #features for a game)
    '''
    batch_of_games = batch_of_games[:,:number_of_rounds,:]
    mask = create_mask(batch_of_games) 
    
    to_encode_the_games = np.reshape(batch_of_games, (batch_of_games.shape[0]*batch_of_games.shape[1], batch_of_games.shape[2]))
    
    #since the bach of games still contains ['pl 2 hand 1', 'pl 2 hand 2', 'pl 2 hand 3', 'played card'], we get rid of this columns using before_encoding_a_game 
    
    encode_games = next_states.encode_a_game(next_states.before_encoding_a_game(to_encode_the_games)) 
    encode_games = np.reshape(encode_games, (encode_games.shape[0], 1, encode_games.shape[1])) #(batch_size, 20, #features for encoded game)
    
    with tf.GradientTape() as tape:
        _, all_Q_values = model(encode_games) #(batch_size, 20, 3)
        all_Q_values = tf.reshape(all_Q_values, [batch_of_games.shape[0], batch_of_games.shape[1], 3])
        Q_values = all_Q_values*mask #(batch_size, 20, 3)
        
        final_q_values = tf.reduce_sum(Q_values, axis = -1) #(batch_size, 20) #This if different for probs!
        
        
        #transpose in case we want to put different weights to different rounds of a game
        transposed_final_q_val = tf.transpose(final_q_values)
        transposed_target = tf.transpose(target_q_val)
        loss =  loss_fn(transposed_target, transposed_final_q_val) 
        if past_loss != None:
            past_loss.append(loss)
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer_dense.apply_gradients(zip(grads, model.trainable_variables))
    if past_loss != None:
        return past_loss
    

def get_target_qvals_with_dense(model, batch_of_games):
    '''
    batch of games is of shape (batch_size, 20, #features for a game)
    
    returns result of shape (batch_size, 20)
    '''
    points = sim_games.get_points(np.reshape(batch_of_games, (batch_of_games.shape[0]* batch_of_games.shape[1],
                                            batch_of_games.shape[2])))
    points = points / 120
    
    full_batch_of_games = batch_of_games
    batch_of_games = full_batch_of_games[:,:number_of_rounds,:]
    
    next_state = next_states.get_new_single_state_from_a_batch_of_games(full_batch_of_games) #(batch_size, 20, 1, #features for next_states). The third dimension 1 is deprecated
    
    #encode next state
    to_encode_the_games = np.reshape(next_state, (next_state.shape[0]*next_state.shape[1]*next_state.shape[2],
                                                      next_state.shape[3]))
    encode_games = next_states.encode_a_game(to_encode_the_games) #(batch_size*20,  #features for encoded game)
            
    reshaped_encode_games_nn = tf.reshape(encode_games, (encode_games.shape[0], 1, encode_games.shape[1]))
    #print('shape encoded games:', reshaped_encode_games_nn.shape)
    #print('shape list_reshaped_outputs_nn[0]:', list_reshaped_outputs_nn[0].shape)
    _, q_vals = model(reshaped_encode_games_nn)
    
    shape_q_vals = [next_state.shape[0], next_state.shape[1], next_state.shape[2], 3]
    all_q_vals = tf.reshape(q_vals, (shape_q_vals))
    
    ##compute max q-val
    #compute max along 3rd axis, shape = (batch_size, 20, 1)
    max_q_vals = tf.math.reduce_max(all_q_vals, axis = -1)
    
    #average along 2nd axis, shape = (batch_size, 20)
    target_q_vals = tf.math.reduce_mean(max_q_vals, axis = -1)
    
    target_q_vals = points + gamma * target_q_vals #Bellmann equation
        
    return target_q_vals

    
def create_mask(batch_of_games):
    '''
    Creates a mask of shape (batch_size, 20, 3).
    
    The entry [0,0,:] is [0.,1.,0.] means in the game at batch 0 at the hand 0 pl1 played card at position 2
    '''
    batch_of_games = batch_of_games[:,:number_of_rounds,:]
    def select_card(array):
        hand = array[1:4]
        card = array[10]
        location = np.where(hand == card)[0][0]
        return np.eye(3)[location]
    reshaped_batch = np.reshape(batch_of_games,
                                (batch_of_games.shape[0]*batch_of_games.shape[1], batch_of_games.shape[2]))
    which_action = np.apply_along_axis(select_card, -1, reshaped_batch)
    return np.reshape(which_action, (batch_of_games.shape[0], batch_of_games.shape[1], 3))[:,:number_of_rounds,:]
    
    
def select_batch_of_games(all_games, batch_size):
    '''
    Creates a batch of games of batch size.
    
    Drops the last round from each game (the one with the empty hand)
    '''
    number_of_games = int(all_games.shape[0]/(number_of_rounds+1))#Recall: Assumes there are 21 rounds
    reshaped_games = np.reshape(all_games, (number_of_games, number_of_rounds+1, all_games.shape[1]))
    if batch_size > number_of_games:
        return reshaped_games
    else:
        indices = np.random.choice(number_of_games, batch_size, replace = False)
        return reshaped_games[indices,:,:]

In [8]:
##This function is needed to load the weights previous model
def my_load_weights(to_be_trained_model, location = 'location'):
    ## initialize weights
    a = np.linspace(1,100,250)[np.newaxis][np.newaxis]
    to_be_trained_model(a)

    ##load weights
    loaded_model = tf.keras.models.load_model(location)

    ## set weights
    to_be_trained_model.set_weights(loaded_model.get_weights())
    return to_be_trained_model

best_model = players.MyModel() 
best_model.compile()
my_load_weights(best_model, 'best_model_estimate_prob')

<briscola_players.MyModel at 0x7f9fcdb6a850>

## Main

Sample training loop. See README for the details on how my model was trained.

In [None]:
import matplotlib.pyplot as plt

player_1 = players.DeepPlayer
player_2 = players.DeterministicPlayer

model_copy = players.MyModel_dense(compute_prob_winning = False)
model_copy.compile()
model_copy(np.linspace(1,100,250)[np.newaxis][np.newaxis])

model_dense = players.MyModel_dense(compute_prob_winning = False)
model_dense.compile()
model_dense(np.linspace(1,100,250)[np.newaxis][np.newaxis])

partita = env.Briscola_env()

my_games = sim_games.simulate_games_and_record_data(5000, player_1, player_2, partita,
                                                                 epsilon = .2, pl_1_model = model_dense)

model_copy.set_weights(model_dense.get_weights())

batch_size = 250

top_score = 0.132

past_loss_vs_model = []

fractions_games_lost_vs_random = []
fractions_games_lost_vs_det = []
fractions_games_lost_vs_model = []

for episode in range(5000):
    if episode%10 == 0 and episode%500 > 100:
        next_50_games = sim_games.simulate_games_and_record_data(50, player_1, player_2, partita,
                                                                 epsilon = .2, pl_1_model = model_dense)
        my_games = np.concatenate([my_games, next_50_games], axis = 0)
    
    if my_games.shape[0] > 2100000:
        my_games = my_games[-2100000:,:]
        
    batch = select_batch_of_games(my_games, batch_size)

    tar_q_val = get_target_qvals_with_dense(model_copy, batch)
    past_loss_vs_model = training_with_dense(model_copy, batch, tar_q_val, past_loss_vs_model)
    
    ##metrics
    if episode%50 == 0 and episode > 1:
        a = pd.DataFrame(past_loss_vs_model, columns = ['loss vs model']).rolling(window = 100).mean()
        my_plot = a.plot(figsize = (18,12))
        plt.show()
        
    if episode%500 == 0 and episode > 1:
        model_dense.set_weights(model_copy.get_weights())
        print('-----------------------------------------------')
        print('Episode:', episode)
        print('-----------------------------------------------')
        player1 = players.DeepPlayer
        randompl = players.RandomPlayer
        det_pl = players.DeterministicPlayer
        
        #play games
        lost_vs_random = sim_games.simulate_games(500, player1, randompl, partita, pl_1_model = model_dense)
        
        lost_vs_det = sim_games.simulate_games(500, player1, det_pl, partita, pl_1_model = model_dense)
        
        lost_vs_model_np = sim_games.simulate_games_and_record_data(500, player1, players.DeepPlayer,
                                                                    partita, pl_1_model = model_dense,
                                                                    pl_2_model = best_model)
        my_games = np.concatenate([my_games,lost_vs_model_np], axis = 0)
        lost_vs_model = sim_games.how_many_lost_games(lost_vs_model_np)
        
        fractions_games_lost_vs_random.append(lost_vs_random)
        fractions_games_lost_vs_det.append(lost_vs_det)
        fractions_games_lost_vs_model.append(lost_vs_model)
        
        #Save the model if we outperform the previous model        
        if lost_vs_random < top_score:
            top_score = lost_vs_random
            print('-----------------------------------------------')
            print("New top score:", lost_vs_random)
            print('-----------------------------------------------')
            my_save_weights(model_dense, 'model_dense_top_accuracy_july')
            
        print('-----------------------------------------------')
        print('Last result vs random:', lost_vs_random)
        print('Last result vs det:', lost_vs_det)
        print('Last result vs model:', lost_vs_model)
        print('-----------------------------------------------')
        b = pd.DataFrame([fractions_games_lost_vs_random, fractions_games_lost_vs_det, fractions_games_lost_vs_model],
                         index = ['lost vs random', 'lost vs det', 'lost vs model' ]).transpose()
        my_second_plot = b.plot(figsize = (18,12))
        my_second_plot.axhline(0.4)
        my_second_plot.axhline(0.3)
        my_second_plot.axhline(0.2)
        my_second_plot.axhline(0.1)
        plt.show()
        
        if episode%500 == 0 and episode > 5000:
            c1 = pd.DataFrame([fractions_games_lost_vs_random, fractions_games_lost_vs_det, fractions_games_lost_vs_model],
                              index = ['lost vs random', 'lost vs det', 'lost vs model' ]).transpose()
            c = c1.rolling(window = 10).mean()
            my_third_plot = c.plot(figsize = (18,12))
            my_third_plot.axhline(0.3)
            my_third_plot.axhline(0.2)
            my_third_plot.axhline(0.1)
            plt.show()