In [1]:
import cmdstanpy
import numpy as np
import pandas as pd

from IPython.display import clear_output
from itertools import product
cmdstanpy.install_cmdstan();

CmdStan install directory: /Users/igor.michels/.cmdstan
CmdStan version 2.34.1 already installed
Test model compilation


In [2]:
NUM_SEASONS = 1
NUM_TEAMS = 20

In [3]:
def generate_matchups(n_equipes, habilidades, home_force):
    if home_force is None: home_force = 0
    confrontos = []
    for equipe1, equipe2 in product(range(1, n_equipes + 1), repeat=2):
        if equipe1 == equipe2: continue
        prob = np.exp(habilidades[equipe1 - 1] + home_force)
        prob = prob / (np.exp(habilidades[equipe1 - 1] + home_force) + np.exp(habilidades[equipe2 - 1]))
        confrontos.append({
            'equipe1': equipe1,
            'equipe2': equipe2,
            'vitoria_equipe1': (np.random.random() < prob) * 1
        })

    df_confrontos = pd.DataFrame(confrontos)

    return df_confrontos

def generate_seasons(n_equipes, habilidades, n_seasons, home_force=None):
    df = pd.DataFrame()
    for _ in range(n_seasons):
        df = pd.concat([df, generate_matchups(n_equipes, habilidades, home_force)])
    
    return df

In [4]:
habilidades = np.random.normal(0, 1, NUM_TEAMS)
df = generate_seasons(NUM_TEAMS, habilidades, NUM_SEASONS)

modelo_compilado = cmdstanpy.CmdStanModel(stan_file='../models/BT_model.stan', force_compile=False)
dados_modelo = {
    'num_jogos': len(df),
    'num_equipes': NUM_TEAMS,
    'equipe1': df['equipe1'].values,
    'equipe2': df['equipe2'].values,
    'vitoria_equipe1': df['vitoria_equipe1'].values,
}

ajuste = modelo_compilado.sample(data=dados_modelo, chains=4, iter_warmup=2000, iter_sampling=10000, show_console=False)
habilidades += ajuste.summary()['Mean']['habilidade[1]'] - habilidades[0]
inside_ci = (habilidades > ajuste.summary()['5%'].values[1:])
inside_ci *= ((habilidades < ajuste.summary()['95%'].values[1:]))

clear_output()
sum(inside_ci) / NUM_TEAMS

0.8

In [5]:
home_force = abs(np.random.normal(0, 1))
habilidades = np.random.normal(0, 1, NUM_TEAMS)
df = generate_seasons(NUM_TEAMS, habilidades, NUM_SEASONS, home_force)

modelo_compilado = cmdstanpy.CmdStanModel(stan_file='../models/BT_model_2.stan', force_compile=False)
dados_modelo = {
    'num_jogos': len(df),
    'num_equipes': NUM_TEAMS,
    'equipe1': df['equipe1'].values,
    'equipe2': df['equipe2'].values,
    'vitoria_equipe1': df['vitoria_equipe1'].values,
}

ajuste = modelo_compilado.sample(data=dados_modelo, chains=4, iter_warmup=2000, iter_sampling=10000, show_console=False)
habilidades += ajuste.summary()['Mean']['habilidade[1]'] - habilidades[0]
inside_ci = (habilidades > ajuste.summary()['5%'].values[2:])
inside_ci *= ((habilidades < ajuste.summary()['95%'].values[2:]))

clear_output()
sum(inside_ci) / NUM_TEAMS

1.0

In [6]:
home_force

0.7513962642717635

In [7]:
ajuste.summary()

Unnamed: 0,Mean,MCSE,StdDev,5%,50%,95%,N_Eff,N_Eff/s,R_hat
lp__,5215700.0,3758920.0,5729850.0,1448.98,2137430.0,13592000.0,2.32359,0.250333,4.07006
home_force,1.16126,6.11527,8.66028,-8.17792,8.26665,10.16,2.00554,0.216068,20.8021
habilidade[1],-3.95655,2.97224,4.22643,-9.0334,-0.394592,0.830463,2.02199,0.21784,9.06032
habilidade[2],-6.80452,5.23187,7.41691,-15.2288,0.343494,0.832775,2.00971,0.216516,13.8417
habilidade[3],-6.99161,5.21003,7.38564,-15.2057,-1.93976,2.36616,2.00954,0.216498,13.9628
habilidade[4],-5.16805,4.24184,6.02578,-12.324,0.357992,1.2715,2.01798,0.217408,10.0427
habilidade[5],-6.3294,4.66045,6.61453,-13.9128,-1.72253,1.97135,2.01438,0.21702,11.2647
habilidade[6],-7.12078,5.0244,7.12392,-15.2135,-0.9322,0.837623,2.01034,0.216585,13.3927
habilidade[7],-4.34294,3.37793,4.80151,-10.0901,-0.106634,0.932995,2.02048,0.217678,9.39363
habilidade[8],-4.29408,3.03916,4.32262,-9.53319,-0.170965,0.161759,2.02296,0.217944,8.86683
