# Simple dice poker

In [3]:
import math
import numpy as np
from src.game import Game, RLPlayer
from src.strategy import Strategy
import pickle


## load pre-trained model from memory (optional):

In [5]:
# load model from memory (optional)
with open("strat/strat.pkl", "rb") as f:
    strat = pickle.load(f)

In [5]:
math.comb(11,1)

11

## Train a new Model:

In [6]:
print(1e5)

100000.0


In [None]:
# Train a model 
epsilon_start = 0.25
strat = Strategy(n = 2, gamma = 1, alpha = 0.5, decay_rate = 0.1, epsilon = epsilon_start)

players = [RLPlayer(strat), RLPlayer(strat)] # giving them the same strat

for i in range(int(2.5e7)): #25 mil simulations ~ 40 minutes
    #every 100.000 steps update epsilon to make it less random
    if i % 1e5 == 0:
        epsilon *=0.95 
        strat.epsilon = epsilon
    game = Game(players)
    game.simulate_game()


In [9]:
#some stats for the model
n_params = 2*(sum([math.comb(11,2)*math.comb(9+i, 0+i) for i in range(4)]))
print("Total number of learnable values: ",n_params) # the amount of distinct values
print("mean number of visits pr. state: ", 1e7/n_params)
print("Alpha convergence towards :", 1/1+0.1*(1e7/n_params))
print("Epsilon convergence towards: ", 0.25*(0.95**(2.5e7/1e5)))

print("epsilon after 20 visits to an action:",0.25*(0.97**20))

print(6e-01)

Total number of learnable values:  31460
mean number of visits pr. state:  317.86395422759057
Alpha convergence towards : 32.78639542275906
Epsilon convergence towards:  6.742816344747503e-07
epsilon after 20 visits to an action: 0.13594858573168675
0.6


## Save model (optional):

In [60]:
with open("strat/strat.pkl", "wb") as f:
    pickle.dump(strat, f)

## Testing

In [59]:
hand = [10,8]
board = [9, 9, 6]
#action: 0 -> continue, 1 -> fold
action = 0

print("number of visits: ", strat.n_action_updates[len(board)][action][strat._get_state_idx(hand, board)])
print(f"action value for action {"Continue" if action==0 else "fold"}:",strat.action_values[len(board)][action][strat._get_state_idx(hand, board)])


number of visits:  1406.0
action value for action Continue: -3.618537682921602
