In [1]:
import numpy as np
import pandas as pd
import pystan
import matplotlib.pyplot as plt
import random
import time
%matplotlib inline

In [2]:
def logit(z): return 1./(1.+np.exp(-z))

In [3]:
def generate_players(num_of_players, skill_cap):
    ## returns a list of player's skill
    return [round(random.uniform(0,skill_cap),2) for i in range(num_of_players)]

In [4]:
def generate_games_for_new_player(true_players, new_player, num_of_games,scale, style='pygm'):
    p1_list=[]
    p2_list=[]
    win_list=[]
    for i in range(num_of_games):
        p1 = len(true_players)+1
        p2 = random.sample(range(1,len(true_players)+1),1)[0]
        win_rate = logit (scale* (new_player- true_players[p2-1]))
        
        p1_list.append(p1)
        p2_list.append(p2)
        win_list.append(*random.choices([1,0],weights=[win_rate, 1-win_rate]))
#         games.append((p1,p2,*random.choices([1,-1],weights=[win_rate, 1-win_rate])))

    return p1_list, p2_list, win_list

In [34]:
def create_skill_model(skills):
    model = """
data {
  int<lower=1> N;             # Total number of players
  int<lower=1> E;             # number of games
  real<lower=0> scale;        # scale value for probability computation
  int<lower=0,upper=1> win[E]; # PA wins vs PB
  int PA[E];                  # player info between each game
  int PB[E];                  # 
}
parameters {
  vector [N] skill;           # skill values for each player
}

model{
"""
    for i in range(1, len(skills)+1):
        model += f"skill[{i}]~normal({skills[i-1]}, 3);\n"
    model += f"skill[{len(skills)+1}]~normal(25,10);\n"
    model += """for (i in 1:E){ 
    win[i] ~ bernoulli_logit( (scale)*(skill[PA[i]]-skill[PB[i]]) ); 
  }   # win probability is a logit function of skill difference 
}"""
    return model

In [6]:
true_players=generate_players(10,50)
skill_model = create_skill_model(true_players)

In [7]:
import pickle

sm = pystan.StanModel(model_code = skill_model)
#with open('skill_model_test.pkl', 'wb') as f: pickle.dump(sm, f)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_f7da18d307891872d2855febe1643eb0 NOW.


In [8]:
skill_model

'\ndata {\n  int<lower=1> N;             # Total number of players\n  int<lower=1> E;             # number of games\n  real<lower=0> scale;        # scale value for probability computation\n  int<lower=0,upper=1> win[E]; # PA wins vs PB\n  int PA[E];                  # player info between each game\n  int PB[E];                  # \n}\nparameters {\n  vector [N] skill;           # skill values for each player\n}\n\nmodel{\nskill[1]~normal(41.21, 3);\nskill[2]~normal(12.9, 3);\nskill[3]~normal(2.34, 3);\nskill[4]~normal(10.09, 3);\nskill[5]~normal(21.78, 3);\nskill[6]~normal(12.11, 3);\nskill[7]~normal(3.1, 3);\nskill[8]~normal(2.81, 3);\nskill[9]~normal(23.5, 3);\nskill[10]~normal(2.44, 3);\nskill[11]~normal(25,8);\nfor (i in 1:E){ \n    win[i] ~ bernoulli_logit( (scale)*(skill[PA[i]]-skill[PB[i]]) ); \n  }   # win probability is a logit function of skill difference \n}'

In [9]:
new_player = round(random.uniform(0,50),2)

In [10]:
num_of_games = 20
scale = 0.5
p1_list,p2_list,win_list = generate_games_for_new_player(true_players, new_player, num_of_games,scale )

In [11]:
len(p1_list), len(p2_list)

[11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11]

In [13]:
win_list

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [14]:
skill_data = {
    'N': len(true_players) + 1,
    'E': num_of_games,
    'scale': 0.5,
    'win':win_list,
    'PA': p1_list,
    'PB': p2_list,
}

In [15]:
fit = sm.sampling(data=skill_data, iter=200, chains=2)

In [16]:
samples = fit.extract()

In [21]:
samples['skill'].mean(0)

array([38.92907177, 12.85365802,  2.6025486 , 10.30073679, 21.92964736,
       12.1198313 ,  3.08185213,  2.85486205, 23.56666353,  2.30696619,
       42.16616524])

In [18]:
true_players

[41.21, 12.9, 2.34, 10.09, 21.78, 12.11, 3.1, 2.81, 23.5, 2.44]

In [19]:
new_player

46.05

# Compare

In [39]:
def compare_ngames(n_games_range, iter = 10, n_players = 10, skill_cap = 50, scale = 0.5):
    true_players=generate_players(n_players, skill_cap)
    skill_model = create_skill_model(true_players)
    sm = pystan.StanModel(model_code = skill_model)
    
    new_player = round(random.uniform(0,skill_cap),2)
    print(true_players)
    
    for num_of_games in n_games_range:
        print(num_of_games, new_player)
        diffs = []
        for _ in range(iter):
            p1_list,p2_list,win_list = generate_games_for_new_player(true_players, new_player, num_of_games, scale)
            skill_data = {
                'N': len(true_players) + 1,
                'E': num_of_games,
                'scale': 0.5,
                'win':win_list,
                'PA': p1_list,
                'PB': p2_list,
            }
            fit = sm.sampling(data=skill_data, iter=200, chains=2)
            samples = fit.extract()

            pred = samples['skill'].mean(0)[-1]
            diff = abs(pred - new_player)
            print(f'pred:{pred}, diff:{diff}')
            
            diffs.append(diff)
        print('mean:',np.mean(diffs), '\n')

In [40]:
compare_ngames([10,30,50,100], iter = 20, n_players = 50, skill_cap = 50)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_9b5d79c472fcd46e0a353ce994ee6ad8 NOW.


[36.54, 24.87, 10.17, 3.37, 34.44, 0.47, 9.94, 37.15, 2.69, 20.81, 34.62, 41.91, 22.74, 41.15, 37.76, 4.34, 18.01, 13.1, 1.41, 40.09, 48.63, 33.23, 36.27, 29.19, 41.82, 19.75, 37.34, 17.52, 43.87, 16.92, 19.12, 26.54, 24.35, 9.96, 14.88, 13.83, 0.86, 18.49, 27.94, 14.48, 38.46, 3.67, 48.65, 4.69, 34.8, 21.41, 38.47, 9.22, 30.46, 41.85]
10 18.23
pred:22.59091218571879, diff:4.360912185718789
pred:17.02768716442382, diff:1.2023128355761799
pred:21.4038245280466, diff:3.173824528046598




pred:21.26818002839691, diff:3.0381800283969085
pred:15.938826909461486, diff:2.291173090538514
pred:20.67565835740111, diff:2.4456583574011113
pred:16.65771313307297, diff:1.5722868669270298
pred:21.18737435082392, diff:2.957374350823919




pred:23.670805523694018, diff:5.440805523694017
pred:17.53078744093261, diff:0.6992125590673908
pred:18.931345207725148, diff:0.7013452077251472
pred:16.69980492274071, diff:1.5301950772592896
pred:21.50450552551607, diff:3.2745055255160693
pred:20.27616965545208, diff:2.046169655452079
pred:16.791062034515303, diff:1.4389379654846977
pred:15.841174441345562, diff:2.388825558654439
pred:17.488237061046902, diff:0.741762938953098




pred:14.238555708236726, diff:3.9914442917632744




pred:19.110609180278406, diff:0.8806091802784053
pred:16.456315977548922, diff:1.7736840224510786
mean: 2.297460987486402 

30 18.23
pred:18.531200952552194, diff:0.30120095255219326
pred:21.996057342945914, diff:3.7660573429459134
pred:17.355998123024243, diff:0.8740018769757576
pred:15.448480505449673, diff:2.781519494550327
pred:18.526320363417554, diff:0.2963203634175535
pred:18.75638309003554, diff:0.5263830900355408
pred:17.49085345242916, diff:0.7391465475708401
pred:20.128158056996238, diff:1.8981580569962375
pred:21.741720247559982, diff:3.511720247559982
pred:23.060894577866076, diff:4.830894577866076
pred:19.250700683044567, diff:1.0207006830445664
pred:19.66776444662911, diff:1.4377644466291102
pred:17.97019989664768, diff:0.25980010335231896
pred:21.551003299492294, diff:3.3210032994922933
pred:19.354229552969755, diff:1.124229552969755
pred:18.775760759029442, diff:0.5457607590294415
pred:17.48421270440282, diff:0.7457872955971787
pred:19.817506782876816, diff:1.587506782



pred:18.824558061694017, diff:0.5945580616940163
pred:18.716422086258632, diff:0.4864220862586315
pred:14.93073229048322, diff:3.29926770951678




pred:17.617225967830823, diff:0.6127740321691775




pred:17.165686708689663, diff:1.064313291310338
pred:17.807527521351908, diff:0.4224724786480927




pred:18.046942217071237, diff:0.1830577829287634




pred:19.048167608800924, diff:0.8181676088009233
pred:18.167745203935013, diff:0.062254796064987517
pred:17.664673321478105, diff:0.5653266785218953




pred:17.787897998939226, diff:0.4421020010607748




pred:18.191028389850928, diff:0.03897161014907269
pred:18.011550331487147, diff:0.21844966851285363
mean: 0.9447154775246597 

