In [1]:
import cmdstanpy
import numpy as np
import pandas as pd

from itertools import product
cmdstanpy.install_cmdstan();

CmdStan install directory: /Users/igor.michels/.cmdstan
CmdStan version 2.34.0 already installed
Test model compilation


In [2]:
def generate_matchups(n_equipes, habilidades, home_force):
    if home_force is None: home_force = 0
    confrontos = []
    for equipe1, equipe2 in product(range(1, n_equipes + 1), repeat=2):
        if equipe1 == equipe2: continue
        prob = np.exp(habilidades[equipe1 - 1] + home_force)
        prob = prob / (np.exp(habilidades[equipe1 - 1] + home_force) + np.exp(habilidades[equipe2 - 1]))
        confrontos.append({
            'equipe1': equipe1,
            'equipe2': equipe2,
            'vitoria_equipe1': (np.random.random() < prob) * 1
        })

    df_confrontos = pd.DataFrame(confrontos)

    return df_confrontos

def generate_seasons(n_equipes, habilidades, n_seasons, home_force=None):
    df = pd.DataFrame()
    for _ in range(n_seasons):
        df = pd.concat([df, generate_matchups(n_equipes, habilidades, home_force)])
    
    return df

In [3]:
num_seasons = 5
num_equipes = 20
habilidades = np.random.normal(0, 1, num_equipes)
df = generate_seasons(num_equipes, habilidades, num_seasons)

modelo_compilado = cmdstanpy.CmdStanModel(stan_file='../models/model.stan', force_compile=False)
dados_modelo = {
    'num_jogos': len(df),
    'num_equipes': num_equipes,
    'equipe1': df['equipe1'].values,
    'equipe2': df['equipe2'].values,
    'vitoria_equipe1': df['vitoria_equipe1'].values,
}

ajuste = modelo_compilado.sample(data=dados_modelo, chains=4, iter_warmup=2000, iter_sampling=10000, show_console=False)
habilidades += ajuste.summary()['Mean']['habilidade[1]'] - habilidades[0]
inside_ci = (habilidades > ajuste.summary()['5%'].values[1:])
inside_ci *= ((habilidades < ajuste.summary()['95%'].values[1:]))
sum(inside_ci) / num_equipes

11:13:06 - cmdstanpy - INFO - compiling stan file /Users/igor.michels/Documents/FGV/BrazilianSoccerEDA/Modeling/models/model.stan to exe file /Users/igor.michels/Documents/FGV/BrazilianSoccerEDA/Modeling/models/model
11:13:10 - cmdstanpy - INFO - compiled model executable: /Users/igor.michels/Documents/FGV/BrazilianSoccerEDA/Modeling/models/model
11:13:11 - cmdstanpy - INFO - CmdStan start processing


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                

11:13:22 - cmdstanpy - INFO - CmdStan done processing.





0.95

In [4]:
num_seasons = 10
num_equipes = 20
home_force = abs(np.random.normal(0, 1))
habilidades = np.random.normal(0, 1, num_equipes)
df = generate_seasons(num_equipes, habilidades, num_seasons, home_force)

modelo_compilado = cmdstanpy.CmdStanModel(stan_file='../models/model_2.stan', force_compile=False)
dados_modelo = {
    'num_jogos': len(df),
    'num_equipes': num_equipes,
    'equipe1': df['equipe1'].values,
    'equipe2': df['equipe2'].values,
    'vitoria_equipe1': df['vitoria_equipe1'].values,
}

ajuste = modelo_compilado.sample(data=dados_modelo, chains=4, iter_warmup=2000, iter_sampling=10000, show_console=False)
habilidades += ajuste.summary()['Mean']['habilidade[1]'] - habilidades[0]
inside_ci = (habilidades > ajuste.summary()['5%'].values[2:])
inside_ci *= ((habilidades < ajuste.summary()['95%'].values[2:]))
sum(inside_ci) / num_equipes

11:13:24 - cmdstanpy - INFO - compiling stan file /Users/igor.michels/Documents/FGV/BrazilianSoccerEDA/Modeling/models/model_2.stan to exe file /Users/igor.michels/Documents/FGV/BrazilianSoccerEDA/Modeling/models/model_2
11:13:28 - cmdstanpy - INFO - compiled model executable: /Users/igor.michels/Documents/FGV/BrazilianSoccerEDA/Modeling/models/model_2
11:13:28 - cmdstanpy - INFO - CmdStan start processing


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                

11:14:11 - cmdstanpy - INFO - CmdStan done processing.
	Chain 1 had 433 divergent transitions (4.3%)
	Chain 2 had 362 divergent transitions (3.6%)
	Chain 3 had 446 divergent transitions (4.5%)
	Use the "diagnose()" method on the CmdStanMCMC object to see further information.





1.0

In [5]:
home_force

0.7274480028937723

In [6]:
ajuste.summary()

Unnamed: 0,Mean,MCSE,StdDev,5%,50%,95%,N_Eff,N_Eff/s,R_hat
lp__,8450680.0,3572610.0,5797180.0,148796.0,10338500.0,16440500.0,2.63308,0.031038,3.18428
home_force,-11.8468,23.7933,33.6565,-70.6512,7.5837,8.06609,2.00091,0.023586,88.0073
habilidade[1],-24.5314,30.4061,43.0094,-99.8478,0.553428,1.64622,2.00081,0.023585,90.973
habilidade[2],-26.8667,32.7204,46.2826,-107.832,-0.423655,1.64435,2.00078,0.023585,99.1494
habilidade[3],-30.7687,37.2451,52.6823,-122.851,-0.106196,0.000653201,2.00074,0.023584,113.026
habilidade[4],-25.8147,32.7459,46.3188,-106.845,1.88639,2.07076,2.00078,0.023585,97.5282
habilidade[5],-37.432,45.3743,64.1795,-149.212,-0.676457,0.791942,2.00065,0.023583,186.32
habilidade[6],-22.8208,28.2554,39.9675,-92.8612,0.0652633,0.880075,2.00083,0.023586,86.3015
habilidade[7],-33.9515,42.4692,60.0702,-138.561,1.28518,1.93478,2.00065,0.023583,185.935
habilidade[8],-29.4058,36.576,51.7358,-119.829,0.453102,0.722518,2.00074,0.023584,113.3
