In [1]:
import cmdstanpy
import numpy as np
import pandas as pd

from IPython.display import clear_output
from itertools import product
cmdstanpy.install_cmdstan();

CmdStan install directory: /Users/igor.michels/.cmdstan
CmdStan version 2.34.1 already installed
Test model compilation


In [2]:
NUM_SEASONS = 1
NUM_TEAMS = 20

In [3]:
def generate_matchups(n_equipes, habilidades, home_force):
    if home_force is None: home_force = 0
    confrontos = []
    for equipe1, equipe2 in product(range(1, n_equipes + 1), repeat=2):
        if equipe1 == equipe2: continue
        prob = np.exp(habilidades[equipe1 - 1] + home_force)
        prob = prob / (np.exp(habilidades[equipe1 - 1] + home_force) + np.exp(habilidades[equipe2 - 1]))
        confrontos.append({
            'equipe1': equipe1,
            'equipe2': equipe2,
            'vitoria_equipe1': (np.random.random() < prob) * 1
        })

    df_confrontos = pd.DataFrame(confrontos)

    return df_confrontos

def generate_seasons(n_equipes, habilidades, n_seasons, home_force=None):
    df = pd.DataFrame()
    for _ in range(n_seasons):
        df = pd.concat([df, generate_matchups(n_equipes, habilidades, home_force)])
    
    return df

In [4]:
habilidades = np.random.normal(0, 1, NUM_TEAMS)
df = generate_seasons(NUM_TEAMS, habilidades, NUM_SEASONS)

modelo_compilado = cmdstanpy.CmdStanModel(stan_file='../models/BT_model.stan', force_compile=False)
dados_modelo = {
    'num_jogos': len(df),
    'num_equipes': NUM_TEAMS,
    'equipe1': df['equipe1'].values,
    'equipe2': df['equipe2'].values,
    'vitoria_equipe1': df['vitoria_equipe1'].values,
}

ajuste = modelo_compilado.sample(data=dados_modelo, chains=4, iter_warmup=2000, iter_sampling=10000, show_console=False)
habilidades += ajuste.summary()['Mean']['habilidade[1]'] - habilidades[0]
inside_ci = (habilidades > ajuste.summary()['5%'].values[1:])
inside_ci *= ((habilidades < ajuste.summary()['95%'].values[1:]))

clear_output()
sum(inside_ci) / NUM_TEAMS

0.8

In [5]:
home_force = abs(np.random.normal(0, 1))
habilidades = np.random.normal(0, 1, NUM_TEAMS)
df = generate_seasons(NUM_TEAMS, habilidades, NUM_SEASONS, home_force)

modelo_compilado = cmdstanpy.CmdStanModel(stan_file='../models/BT_model_2.stan', force_compile=False)
dados_modelo = {
    'num_jogos': len(df),
    'num_equipes': NUM_TEAMS,
    'equipe1': df['equipe1'].values,
    'equipe2': df['equipe2'].values,
    'vitoria_equipe1': df['vitoria_equipe1'].values,
}

ajuste = modelo_compilado.sample(data=dados_modelo, chains=4, iter_warmup=2000, iter_sampling=10000, show_console=False)
habilidades += ajuste.summary()['Mean']['habilidade[1]'] - habilidades[0]
inside_ci = (habilidades > ajuste.summary()['5%'].values[2:])
inside_ci *= ((habilidades < ajuste.summary()['95%'].values[2:]))

clear_output()
sum(inside_ci) / NUM_TEAMS

0.55

In [6]:
home_force

0.6668953786312211

In [7]:
ajuste.summary()

Unnamed: 0,Mean,MCSE,StdDev,5%,50%,95%,N_Eff,N_Eff/s,R_hat
lp__,10211700.0,1024090.0,3468070.0,3235220.0,10861100.0,14550600.0,11.4682,1.63319,1.73577
home_force,9.77752,0.135032,0.472997,8.70202,9.90391,10.267,12.27,1.74737,1.48917
habilidade[1],0.741591,0.990569,1.40109,-0.615599,0.986715,2.93035,2.0006,0.284905,794952.0
habilidade[2],0.434904,0.661977,0.936318,-0.473513,0.455496,1.94814,2.0006,0.284905,970354.0
habilidade[3],0.970299,0.608388,0.86052,-0.476293,1.35003,1.783,2.0006,0.284905,659283.0
habilidade[4],0.331038,0.569753,0.805873,-0.428655,0.276184,1.6564,2.0006,0.284905,978459.0
habilidade[5],1.05836,0.466768,0.660208,0.378751,1.17332,2.08531,2.0006,0.284905,873139.0
habilidade[6],-0.326321,0.551389,0.779898,-1.18777,0.120926,0.718931,2.0006,0.284905,1121008.0
habilidade[7],0.531985,0.726128,1.02705,-0.475699,1.09134,1.93734,2.0006,0.284905,636626.0
habilidade[8],0.705307,0.456132,0.645165,-0.308842,0.949321,1.46475,2.0006,0.284905,603378.0
