In [1]:
import numpy as np
import pandas as pd
import pystan
import matplotlib.pyplot as plt
import random
import time
%matplotlib inline

In [2]:
def logit(z): return 1./(1.+np.exp(-z))

In [3]:
def generate_players(num_of_players, skill_cap):
    ## returns a list of player's skill
    return [round(random.uniform(0,skill_cap),2) for i in range(num_of_players)]

In [4]:
def generate_games_for_new_player(true_players, new_player, num_of_games,scale, style='pygm'):
    p1_list=[]
    p2_list=[]
    win_list=[]
    for i in range(num_of_games):
        p1 = len(true_players)+1
        p2 = random.sample(range(1,len(true_players)+1),1)[0]
        win_rate = logit (scale* (new_player- true_players[p2-1]))
        
        p1_list.append(p1)
        p2_list.append(p2)
        win_list.append(*random.choices([1,0],weights=[win_rate, 1-win_rate]))
#         games.append((p1,p2,*random.choices([1,-1],weights=[win_rate, 1-win_rate])))

    return p1_list, p2_list, win_list

In [5]:
def create_skill_model(skills):
    model = """
data {
  int<lower=1> N;             # Total number of players
  int<lower=1> E;             # number of games
  real<lower=0> scale;        # scale value for probability computation
  int<lower=0,upper=1> win[E]; # PA wins vs PB
  int PA[E];                  # player info between each game
  int PB[E];                  # 
}
parameters {
  vector [N] skill;           # skill values for each player
}

model{
"""
    for i in range(1, len(skills)+1):
        model += f"skill[{i}]~normal({skills[i-1]}, 3);\n"
    model += f"skill[{len(skills)+1}]~normal(25,8);\n"
    model += """for (i in 1:E){ 
    win[i] ~ bernoulli_logit( (scale)*(skill[PA[i]]-skill[PB[i]]) ); 
  }   # win probability is a logit function of skill difference 
}"""
    return model

In [6]:
true_players=generate_players(10,50)
skill_model = create_skill_model(true_players)

In [7]:
import pickle

sm = pystan.StanModel(model_code = skill_model)
with open('skill_model_test.pkl', 'wb') as f: pickle.dump(sm, f)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_c23c3a185adf00d76dcaea91ddc1c8f7 NOW.


In [8]:
skill_model

'\ndata {\n  int<lower=1> N;             # Total number of players\n  int<lower=1> E;             # number of games\n  real<lower=0> scale;        # scale value for probability computation\n  int<lower=0,upper=1> win[E]; # PA wins vs PB\n  int PA[E];                  # player info between each game\n  int PB[E];                  # \n}\nparameters {\n  vector [N] skill;           # skill values for each player\n}\n\nmodel{\nskill[1]~normal(22.47, 3);\nskill[2]~normal(21.01, 3);\nskill[3]~normal(35.41, 3);\nskill[4]~normal(30.65, 3);\nskill[5]~normal(26.5, 3);\nskill[6]~normal(0.85, 3);\nskill[7]~normal(40.56, 3);\nskill[8]~normal(37.81, 3);\nskill[9]~normal(38.25, 3);\nskill[10]~normal(33.5, 3);\nskill[11]~normal(25,8);\nfor (i in 1:E){ \n    win[i] ~ bernoulli_logit( (scale)*(skill[PA[i]]-skill[PB[i]]) ); \n  }   # win probability is a logit function of skill difference \n}'

In [40]:
new_player = round(random.uniform(0,50),2)

In [41]:
num_of_games = 1000
scale = 0.5
p1_list,p2_list,win_list = generate_games_for_new_player(true_players, new_player, num_of_games,scale )

skill_data = {
    'N': len(true_players) + 1,
    'E': num_of_games,
    'scale': 0.5,
    'win':win_list,
    'PA': p1_list,
    'PB': p2_list,
}

fit = sm.sampling(data=skill_data, iter=200, chains=2)
samples = fit.extract()

samples['skill'].mean(0)

array([22.73472125, 21.14534852, 33.92323191, 30.19909834, 26.56169446,
        0.6682659 , 40.77231298, 37.47479408, 38.62768463, 32.97764358,
       46.74489307])

In [42]:
true_players

[22.47, 21.01, 35.41, 30.65, 26.5, 0.85, 40.56, 37.81, 38.25, 33.5]

In [43]:
new_player

46.75

In [13]:
p1_list

[11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11,
 11]

In [14]:
p2_list

[5, 10, 7, 5, 10, 2, 3, 7, 7, 2, 9, 7, 8, 7, 5, 10, 7, 3, 1, 8]

In [15]:
win_list

[1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0]