In [1]:
import nest_asyncio
nest_asyncio.apply()

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm 

from model.mma_features import DataPreprocessor, SimpleFeatureExtractor, EloFeatureExtractor, BioFeatureExtractor

stats_df = pd.read_csv("data/clean_stats3.csv")
# run thru preprocessor to get _opp stats for each fight
DP = DataPreprocessor(stats_df)
pp_df = DP.get_preprocessed_df()
pp_df.head()

Unnamed: 0,Date,Opponent,Event,FighterResult,TSL,TSA,SSL,SSA,TSL-TSA,KD,...,SDHA_opp,SDLL_opp,SDLA_opp,TD_fails_opp,submission_rate_opp,distance_strikes_landed_opp,clinch_strikes_landed_opp,standing_strikes_opp,KD_power_opp,ground_strikes_landed_opp
0,1991-09-26,Murilo Bustamante,Desafio,L,,,,,,,...,,,,,,,,,,
1,1992-01-01,Renzo Gracie,Desafio,L,,,,,,,...,,,,,,,,,,
2,1993-08-29,Moura Moura,CP X CB,W,,,,,,,...,,,,,,,,,,
3,1993-08-29,Jose Landi-Jons,CP X CB,L,,,,,,,...,,,,,,,,,,
4,1993-11-08,Ken Shamrock,Pancrase,L,,,,,,,...,,,,,,,,,,


In [3]:
simple_fe = SimpleFeatureExtractor(pp_df)
simple_fe.fit_transform_all()
simple_fe.trans_df.head()

100%|██████████████████████████████████████████████████████████████████████| 4450/4450 [00:39<00:00, 113.93it/s]


Unnamed: 0,FighterID,OpponentID,Date,total_fights,total_ufc_fights,t_since_last_fight,total_fights_opp,total_ufc_fights_opp,t_since_last_fight_opp,t_since_last_fight_diff,t_since_last_fight_log_diff,total_fights_diff,total_fights_sqrt_diff,total_ufc_fights_diff,total_ufc_fights_sqrt_diff
0,2558095/marcelo-mendes,2354059/murilo-bustamante,1991-09-26,0,0,730,0,0,730,0,0.0,0,0.0,0,0.0
1,2501396/luiz-augusto-alvareda,2354119/renzo-gracie,1992-01-01,0,0,730,0,0,730,0,0.0,0,0.0,0,0.0
2,2354132/jose-landi-jons,3107994/moura-moura,1993-08-29,0,0,730,0,0,730,0,0.0,0,0.0,0,0.0
4,2557847/takaku-fuke,2335653/ken-shamrock,1993-11-08,0,0,730,0,0,730,0,0.0,0,0.0,0,0.0
8,2335738/gerard-gordeau,2504082/kevin-rosier,1993-11-12,0,0,730,0,0,730,0,0.0,0,0.0,0,0.0


In [4]:
# real_elo_target_cols = ["SSL", "TSL", "TDL"]
real_elo_target_cols = [
#     "fighter_result_time_left", 
    "ordinal_fighter_result",
    "submission_fighter_result",
    "tko_ko_fighter_result",
    "decision_fighter_result",
    "finish_fighter_result",
]
diff_elo_target_cols = [
    'TSL',
    'TDL',
    'TDS',
    'SSL',
    'SM',
    'RV',
    'KD',
    'SGHL',
    'SGBL',
    'SCBL',
    'SCHL',
    'ADTB',
    'ADTM',
    'AD',
    'TD_fails',
    'distance_strikes_landed',
    'clinch_strikes_landed',
    'standing_strikes',
    'ground_strikes_landed',
]
binary_elo_target_cols = ["Win"]

elo_fe = EloFeatureExtractor(pp_df.assign(Win=pp_df["FighterResult"].map({"W":1.,"L":0.,"D":np.nan})), 
                             elo_alpha=0.4,
                             real_elo_target_cols = real_elo_target_cols, 
                             diff_elo_target_cols = diff_elo_target_cols,
                             binary_elo_target_cols = binary_elo_target_cols)
elo_fe.fit_transform_all()
elo_fe.elo_df.head()

3959it [00:00, 6825.76it/s]
3959it [00:00, 7091.64it/s]
3959it [00:00, 7264.66it/s]
3962it [00:00, 6888.08it/s]
3959it [00:00, 7487.47it/s]
3959it [00:00, 6576.85it/s]
3959it [00:00, 6753.84it/s]
3959it [00:00, 6769.36it/s]
3959it [00:00, 6919.08it/s]
3959it [00:00, 7204.33it/s]
3962it [00:00, 7227.51it/s]
3959it [00:00, 7173.09it/s]
3959it [00:00, 6984.64it/s]
3959it [00:00, 7050.53it/s]
3962it [00:00, 7249.16it/s]
39390it [00:06, 6026.56it/s]
3959it [00:00, 6377.45it/s]
40119it [00:06, 6467.65it/s]
3962it [00:00, 6958.78it/s]
40119it [00:05, 7071.85it/s]
3959it [00:00, 6974.01it/s]
40119it [00:05, 6903.64it/s]
3959it [00:00, 7130.35it/s]
40119it [00:05, 6943.14it/s]
40119it [00:06, 6537.27it/s]


Unnamed: 0,FighterID,OpponentID,oldFighterEloAD,oldOpponentEloAD,predTargetAD,targetAD,Date,newFighterEloAD,newOpponentEloAD,oldEloDiffAD,...,newFighterElosubmission_fighter_result,newOpponentElosubmission_fighter_result,oldEloDiffsubmission_fighter_result,oldFighterElotko_ko_fighter_result,oldOpponentElotko_ko_fighter_result,predTargettko_ko_fighter_result,targettko_ko_fighter_result,newFighterElotko_ko_fighter_result,newOpponentElotko_ko_fighter_result,oldEloDifftko_ko_fighter_result
0,2335635/jason-delucia,2335757/trent-jenkins,0.0,0.0,0.0,1.0,1993-11-12,0.2,-0.2,0.0,...,0.2,-0.2,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0
1,2335738/gerard-gordeau,2504081/teila-tuli,0.0,0.0,0.0,0.0,1993-11-12,0.0,0.0,0.0,...,-0.128,-0.032,-0.16,0.0,0.0,0.0,1,0.2,-0.2,0.0
2,2335728/zane-frazier,2504082/kevin-rosier,0.0,0.0,0.0,0.0,1993-11-12,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,-1,-0.2,0.2,0.0
3,2335738/gerard-gordeau,2504082/kevin-rosier,0.0,0.0,0.0,0.0,1993-11-12,0.0,0.0,0.0,...,-0.1024,-0.0256,-0.128,0.2,0.2,0.0,1,0.4,0.0,0.0
4,2335635/jason-delucia,2951141/scott-baker,0.2,0.0,0.2,0.267949,1994-03-11,0.21359,-0.01359,0.2,...,0.2256,-0.1936,0.032,0.0,0.0,0.0,0,0.0,0.0,0.0


In [5]:
feat_df = elo_fe.elo_df.merge(
    simple_fe.trans_df,
    on=["FighterID", "OpponentID", "Date"],
    how="inner"
)

bio_df = pd.read_csv("data/clean_bios.csv")
bio_fe = BioFeatureExtractor(bio_df)
feat_df = bio_fe.fit_transform_all(feat_df)
feat_df.head()

Unnamed: 0,FighterID,OpponentID,oldFighterEloAD,oldOpponentEloAD,predTargetAD,targetAD,Date,newFighterEloAD,newOpponentEloAD,oldEloDiffAD,...,imp_reach_opp,imp_weight_opp,imp_height_opp,age,age_opp,age_diff,reach_diff,weight_diff,log_weight_diff,height_diff
0,2335635/jason-delucia,2335757/trent-jenkins,0.0,0.0,0.0,1.0,1993-11-12,0.2,-0.2,0.0,...,78.426958,250.0,75.854283,,,0.0,-5.273051,-64.4375,-0.298069,-4.64658
1,2335738/gerard-gordeau,2504081/teila-tuli,0.0,0.0,0.0,0.0,1993-11-12,0.0,0.0,0.0,...,78.426958,250.0,75.854283,34.646575,,0.0,0.0,0.0,0.0,0.0
2,2335728/zane-frazier,2504082/kevin-rosier,0.0,0.0,0.0,0.0,1993-11-12,0.0,0.0,0.0,...,71.634058,167.003261,69.868379,27.345205,,0.0,0.0,0.0,0.0,0.0
3,2335738/gerard-gordeau,2504082/kevin-rosier,0.0,0.0,0.0,0.0,1993-11-12,0.0,0.0,0.0,...,71.634058,167.003261,69.868379,34.646575,,0.0,0.0,0.0,0.0,0.0
4,2335635/jason-delucia,2951141/scott-baker,0.2,0.0,0.2,0.267949,1994-03-11,0.21359,-0.01359,0.2,...,78.426958,250.0,75.854283,,,0.0,-5.273051,-64.4375,-0.298069,-4.64658


In [9]:
# okay i need the opening moneyline...
ml_df = pd.read_csv("data/clean_stats_plus_ml.csv")[[
    "Date", "FighterID", "OpponentID", 
    'FighterOpen', 'OpponentOpen',
    'FighterCloseLeft', 'FighterCloseRight', 'OpponentCloseLeft',
    'OpponentCloseRight',
]]
ml_df["Date"] = pd.to_datetime(ml_df["Date"])

def parse_american_odds(x:pd.Series):
    fav_inds = x <= 0
    dog_inds = x > 0
    y = pd.Series(0, index=x.index)
    y.loc[fav_inds] = -1 * x / (100 - x)
    y.loc[dog_inds] = 100 / (100 + x)
    return y

ml_df["p_fighter"] = parse_american_odds(ml_df["FighterOpen"])
ml_df["p_opponent"] = parse_american_odds(ml_df["OpponentOpen"])
ml_df["p_fighter_midpoint"] = (ml_df["p_fighter"] + 1 - ml_df["p_opponent"]) / 2
ml_df["p_fighter_implied"] = ml_df["p_fighter"] / (ml_df["p_fighter"] + ml_df["p_opponent"])
# print(feat_df.shape, ml_df.shape)
ml_df.columns

Index(['Date', 'FighterID', 'OpponentID', 'FighterOpen', 'OpponentOpen',
       'FighterCloseLeft', 'FighterCloseRight', 'OpponentCloseLeft',
       'OpponentCloseRight', 'p_fighter', 'p_opponent', 'p_fighter_midpoint',
       'p_fighter_implied'],
      dtype='object')

In [10]:
feat_ml_df = feat_df.merge(
    ml_df, 
    on=["Date", "FighterID", "OpponentID"],
    how="inner"
)
print(feat_ml_df.shape)
feat_ml_df.head()

(3220, 217)


Unnamed: 0,FighterID,OpponentID,oldFighterEloAD,oldOpponentEloAD,predTargetAD,targetAD,Date,newFighterEloAD,newOpponentEloAD,oldEloDiffAD,...,FighterOpen,OpponentOpen,FighterCloseLeft,FighterCloseRight,OpponentCloseLeft,OpponentCloseRight,p_fighter,p_opponent,p_fighter_midpoint,p_fighter_implied
0,2335629/bj-penn,2335885/jens-pulver,0.842355,0.364097,0.478257,2.645751,2007-06-23,1.275853,-0.069401,0.478257,...,-490.0,340.0,-357.0,-330.0,225.0,280.0,0.830508,0.227273,0.801618,0.785142
1,2335676/joe-lauzon,2354360/brandon-melendez,0.091024,0.0,0.091024,2.0,2007-06-23,0.472819,-0.381795,0.091024,...,-430.0,330.0,-700.0,-475.0,355.0,425.0,0.811321,0.232558,0.789381,0.777217
2,2335447/anderson-silva,2335475/nate-marquardt,0.227196,0.736307,-0.509111,-1.0,2007-07-07,0.129018,0.834485,-0.509111,...,-170.0,150.0,-145.0,-145.0,125.0,125.0,0.62963,0.4,0.614815,0.611511
3,2335302/heath-herring,2335521/antonio-rodrigo-nogueira,0.10641,0.0,0.10641,-2.645751,2007-07-07,-0.444022,0.550432,0.10641,...,470.0,-810.0,500.0,500.0,-700.0,-700.0,0.175439,0.89011,0.142664,0.164646
4,2335694/frankie-edgar,2335717/mark-bocek,0.0,0.0,0.0,1.414214,2007-07-07,0.282843,-0.282843,0.0,...,-260.0,180.0,-230.0,-230.0,190.0,190.0,0.722222,0.357143,0.68254,0.669118


In [11]:
feat_cols = [
    "oldEloDiffordinal_fighter_result", 
    "oldEloDiffsubmission_fighter_result",
    "oldEloDifftko_ko_fighter_result",
    "oldEloDiffdecision_fighter_result",
#     "oldEloDifffinish_fighter_result",
#     "oldEloDifffighter_result_time_left",
    
#     'oldEloDiffAD', 
    'oldEloDiffADTB', 
    'oldEloDiffADTM', 
    'oldEloDiffKD',
    'oldEloDiffRV', 
#     'oldEloDiffSCBL', 
#     'oldEloDiffSCHL', 
    'oldEloDiffSGBL',
    'oldEloDiffSGHL', 
    'oldEloDiffSM', 'oldEloDiffSSL', 'oldEloDiffTDL',
    'oldEloDiffTDS', 
#     'oldEloDiffTD_fails', 
    'oldEloDiffTSL',
    'oldEloDiffclinch_strikes_landed', 
    'oldEloDiffdistance_strikes_landed',
    'oldEloDiffground_strikes_landed', 
    'oldEloDiffstanding_strikes',
    
    'oldEloDiffWin',
    
    "t_since_last_fight_log_diff", 
#     "t_since_last_fight_diff",
    "total_fights_sqrt_diff", 
    "total_ufc_fights_diff",
    
    "age_diff", "reach_diff", 
#     "weight_diff", 
    "log_weight_diff",
    "height_diff",
]

max_train_dt = pd.to_datetime("2021-07-01")

train_df = feat_ml_df.loc[feat_ml_df["Date"] <= max_train_dt].dropna(subset=[*feat_cols, "targetWin"])
test_df = feat_ml_df.loc[feat_ml_df["Date"] > max_train_dt].dropna(subset=[*feat_cols, "targetWin"])

In [12]:
from sklearn.preprocessing import StandardScaler

# scaler = StandardScaler(with_mean=False)
scale_ = (train_df[feat_cols]**2).mean(0)
X_train = train_df[feat_cols] / scale_
X_test = test_df[feat_cols] / scale_

y_train = train_df["targetWin"]
y_test = test_df["targetWin"]

In [13]:
def logit(x):
    return np.log(x) - np.log(1-x)

ml_train = logit(train_df["p_fighter_implied"])
ml_test = logit(test_df["p_fighter_implied"])

In [14]:
# okay uhh
import stan

code = """

data {
    int<lower=0> n;                     // number of data points in training data
    int<lower=0> n2;                    // number of data points in test data
    int<lower=1> d;                     // explanatory variable dimension
    int<lower=0,upper=1> y[n];          // response variable
    real<lower=0> beta_prior_std;       // prior scale on beta

    matrix[n, d] X;                     // explanatory variable
    vector[n] ml_logit;                   // logit of the opening money line

    matrix[n2, d] X2;                   // test data
    vector[n2] ml_logit2;                 // test data

}

parameters {
    vector[d] beta;
}

transformed parameters {
    vector[n] eta;
    vector[n2] eta2;
    eta = ml_logit + (X * beta);      // linear predictor
    eta2 = ml_logit2 + (X2 * beta);   // linear predictor for test data
}

model {
    for(i in 1:d){
        beta[i] ~ normal(0, beta_prior_std);
        //beta[i] ~ cauchy(0, beta_prior_std); //prior for slopes following gelman 2008
    }

    // observation model
    y ~ bernoulli_logit(eta);
}

generated quantities {
    vector[n2] y_pred;
    y_pred = inv_logit(eta2);  // y values predicted for test data
}
"""

class SimpleSymmetricModel(object):
    
    def __init__(self, beta_prior_std=0.1, num_chains=4, num_samples=1000):
        self.beta_prior_std = float(beta_prior_std)
        self.code = code
        self.scale_ = None
        self.fit = None
        self.num_chains = num_chains
        self.num_samples = num_samples
        
    def _fit(self, data):
        posterior = stan.build(self.code, data=data, random_seed=1)
        fit = posterior.sample(num_chains=self.num_chains, num_samples=self.num_samples)
        self.fit = fit
        return fit
        
    def fit_predict(self, train_df, test_df, feat_cols):
        scale_ = (train_df[feat_cols]**2).mean(0)
        self.scale_ = scale_
        X_train = train_df[feat_cols] / scale_
        X_test = test_df[feat_cols] / scale_

        y_train = train_df["targetWin"]
        y_test = test_df["targetWin"]

        ml_train = logit(train_df["p_fighter_implied"])
        ml_test = logit(test_df["p_fighter_implied"])
        
        data = {
            "n": train_df.shape[0],
            "n2": test_df.shape[0],
            "d": len(feat_cols),
            "y": y_train.astype(int).values,
            "beta_prior_std": self.beta_prior_std,
            "X": X_train.values,
            "ml_logit": ml_train.values,
            "X2": X_test.values,
            "ml_logit2": ml_test.values,
        }

        fit = self._fit(data)
        return fit["y_pred"].mean(1)

sym_model = SimpleSymmetricModel(beta_prior_std=2.0, num_samples=500)
y_pred = sym_model.fit_predict(train_df, test_df, feat_cols)
(y_pred.round() == test_df["targetWin"]).mean()

[36mBuilding:[0m 0.6s
[1A[0J[36mBuilding:[0m 0.8s
[1A[0J[36mBuilding:[0m 0.9s
[1A[0J[36mBuilding:[0m 1.0s
[1A[0J[36mBuilding:[0m 1.1s
[1A[0J[36mBuilding:[0m 1.2s
[1A[0J[36mBuilding:[0m 1.3s
[1A[0J[36mBuilding:[0m 1.4s
[1A[0J[36mBuilding:[0m 1.5s
[1A[0J[36mBuilding:[0m 1.6s
[1A[0J[36mBuilding:[0m 1.7s
[1A[0J[36mBuilding:[0m 1.8s
[1A[0J[36mBuilding:[0m 1.9s
[1A[0J[36mBuilding:[0m 2.0s
[1A[0J[36mBuilding:[0m 2.1s
[1A[0J[36mBuilding:[0m 2.2s
[1A[0J[36mBuilding:[0m 2.3s
[1A[0J[36mBuilding:[0m 2.4s
[1A[0J[36mBuilding:[0m 2.5s
[1A[0J[36mBuilding:[0m 2.6s
[1A[0J[36mBuilding:[0m 2.7s
[1A[0J[36mBuilding:[0m 2.8s
[1A[0J[36mBuilding:[0m 2.9s
[1A[0J[36mBuilding:[0m 3.0s
[1A[0J[36mBuilding:[0m 3.1s
[1A[0J[36mBuilding:[0m 3.2s
[1A[0J[36mBuilding:[0m 3.3s
[1A[0J[36mBuilding:[0m 3.4s
[1A[0J[36mBuilding:[0m 3.5s
[1A[0J[36mBuilding:[0m 3.6s


In file included from /Users/johncurcio/Library/Caches/httpstan/4.7.2/models/hwcu7qli/model_hwcu7qli.cpp:2:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/model/model_header.hpp:4:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math.hpp:19:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev.hpp:8:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/core.hpp:28:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/core/operator_addition.hpp:6:
  bool error = false;
       ^
    for (int i = 0; i < y1_d.size(); i++) {
                    ~ ^ ~~~~~~~~~~~


[1A[0J[36mBuilding:[0m 3.7s
[1A[0J[36mBuilding:[0m 3.8s
[1A[0J[36mBuilding:[0m 4.0s


In file included from /Users/johncurcio/Library/Caches/httpstan/4.7.2/models/hwcu7qli/model_hwcu7qli.cpp:2:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/model/model_header.hpp:4:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math.hpp:19:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev.hpp:8:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/core.hpp:53:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/core/profiling.hpp:9:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/prim/err.hpp:50:
  int n_transitions = log_omegas.cols() - 1;
   

[1A[0J[36mBuilding:[0m 4.1s
[1A[0J[36mBuilding:[0m 4.2s
[1A[0J[36mBuilding:[0m 4.3s
[1A[0J[36mBuilding:[0m 4.4s
[1A[0J[36mBuilding:[0m 4.5s
[1A[0J[36mBuilding:[0m 4.6s
[1A[0J[36mBuilding:[0m 4.7s
[1A[0J[36mBuilding:[0m 4.8s
[1A[0J[36mBuilding:[0m 4.9s


In file included from /Users/johncurcio/Library/Caches/httpstan/4.7.2/models/hwcu7qli/model_hwcu7qli.cpp:2:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/model/model_header.hpp:4:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math.hpp:19:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev.hpp:10:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/fun.hpp:26:
                           [a, b, digamma_ab](auto& vi) mutable {
                             ~~^
  return make_callback_var(beta_val, [a, b, digamma_ab](auto& vi) mutable {
                                      ^~


[1A[0J[36mBuilding:[0m 5.0s
[1A[0J[36mBuilding:[0m 5.1s
[1A[0J[36mBuilding:[0m 5.2s
[1A[0J[36mBuilding:[0m 5.3s
[1A[0J[36mBuilding:[0m 5.4s


In file included from /Users/johncurcio/Library/Caches/httpstan/4.7.2/models/hwcu7qli/model_hwcu7qli.cpp:2:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/model/model_header.hpp:4:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math.hpp:19:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev.hpp:10:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/fun.hpp:55:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/fun/elt_multiply.hpp:9:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/fun/multiply.hpp:7:
In file included from /usr/local/Caskro

[1A[0J[36mBuilding:[0m 5.5s
[1A[0J[36mBuilding:[0m 5.6s
[1A[0J[36mBuilding:[0m 5.7s
[1A[0J[36mBuilding:[0m 5.8s
[1A[0J[36mBuilding:[0m 5.9s
[1A[0J[36mBuilding:[0m 6.0s
[1A[0J[36mBuilding:[0m 6.1s


In file included from /Users/johncurcio/Library/Caches/httpstan/4.7.2/models/hwcu7qli/model_hwcu7qli.cpp:2:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/model/model_header.hpp:4:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math.hpp:19:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev.hpp:10:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/fun.hpp:124:
  for (size_t i = 2; i <= n; ++i) {
                     ~ ^  ~
In file included from /Users/johncurcio/Library/Caches/httpstan/4.7.2/models/hwcu7qli/model_hwcu7qli.cpp:2:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/model/model_header.hpp:4:
In file included from

[1A[0J[36mBuilding:[0m 6.2s
[1A[0J[36mBuilding:[0m 6.3s


In file included from /Users/johncurcio/Library/Caches/httpstan/4.7.2/models/hwcu7qli/model_hwcu7qli.cpp:2:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/model/model_header.hpp:4:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math.hpp:19:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev.hpp:10:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/fun.hpp:158:
  for (Eigen::Index k = 0; k < N; ++k) {
                           ~ ^ ~
  for (Eigen::Index k = 0; k < N; ++k) {
                           ~ ^ ~


[1A[0J[36mBuilding:[0m 6.4s
[1A[0J[36mBuilding:[0m 6.5s
[1A[0J[36mBuilding:[0m 6.6s
[1A[0J[36mBuilding:[0m 6.7s
[1A[0J[36mBuilding:[0m 6.8s
[1A[0J[36mBuilding:[0m 6.9s
[1A[0J[36mBuilding:[0m 7.0s
[1A[0J[36mBuilding:[0m 7.1s
[1A[0J[36mBuilding:[0m 7.2s
[1A[0J[36mBuilding:[0m 7.3s
[1A[0J[36mBuilding:[0m 7.4s
[1A[0J[36mBuilding:[0m 7.5s
[1A[0J[36mBuilding:[0m 7.7s
[1A[0J[36mBuilding:[0m 7.8s
[1A[0J[36mBuilding:[0m 7.9s
[1A[0J[36mBuilding:[0m 8.0s
[1A[0J[36mBuilding:[0m 8.1s


In file included from /Users/johncurcio/Library/Caches/httpstan/4.7.2/models/hwcu7qli/model_hwcu7qli.cpp:2:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/model/model_header.hpp:4:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math.hpp:19:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev.hpp:11:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/functor.hpp:16:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/functor/dae.hpp:5:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/functor/idas_integrator.hpp:5:
    for (size_t is = 0; is < 

[1A[0J[36mBuilding:[0m 8.2s
[1A[0J[36mBuilding:[0m 8.3s


In file included from /Users/johncurcio/Library/Caches/httpstan/4.7.2/models/hwcu7qli/model_hwcu7qli.cpp:2:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/model/model_header.hpp:4:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math.hpp:19:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev.hpp:13:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/prim.hpp:16:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/prim/prob.hpp:128:
  for (int i = 0; i < M; i++) {
                  ~ ^ ~
  int n = G.rows();  // number of states
      ^
In file included from /Users/johncurcio/Library/Caches/httpstan/4.7.2/models/hwcu7qli/model_hw

[1A[0J[36mBuilding:[0m 8.4s
[1A[0J[36mBuilding:[0m 8.5s
[1A[0J[36mBuilding:[0m 8.6s
[1A[0J[36mBuilding:[0m 8.7s
[1A[0J[36mBuilding:[0m 8.8s
[1A[0J[36mBuilding:[0m 8.9s


In file included from /Users/johncurcio/Library/Caches/httpstan/4.7.2/models/hwcu7qli/model_hwcu7qli.cpp:2:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/model/model_header.hpp:4:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math.hpp:19:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev.hpp:13:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/prim.hpp:16:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/prim/prob.hpp:308:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/prim/prob/skew_double_exponential_ccdf_log.hpp:5:
  const int size_sigma = 

[1A[0J[36mBuilding:[0m 9.0s
[1A[0J[36mBuilding:[0m 9.1s
[1A[0J[36mBuilding:[0m 9.2s


In file included from /Users/johncurcio/Library/Caches/httpstan/4.7.2/models/hwcu7qli/model_hwcu7qli.cpp:2:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/model/model_header.hpp:17:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/model/indexing.hpp:6:
  for (int i = 0; i < idx.ns_.size(); ++i) {
                  ~ ^ ~~~~~~~~~~~~~~
  for (int i = 0; i < col_idx.ns_.size(); ++i) {
                  ~ ^ ~~~~~~~~~~~~~~~~~~
  for (int j = 0; j < col_idx.ns_.size(); ++j) {
                  ~ ^ ~~~~~~~~~~~~~~~~~~
In file included from /Users/johncurcio/Library/Caches/httpstan/4.7.2/models/hwcu7qli/model_hwcu7qli.cpp:2:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/model/model_header.hpp:17:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-p

[1A[0J[36mBuilding:[0m 9.3s
[1A[0J[36mBuilding:[0m 9.4s
[1A[0J[36mBuilding:[0m 9.5s


In file included from /Users/johncurcio/Library/Caches/httpstan/4.7.2/models/hwcu7qli/model_hwcu7qli.cpp:2:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/model/model_header.hpp:4:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math.hpp:19:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev.hpp:8:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/core.hpp:29:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/core/operator_divide_equal.hpp:5:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/core/operator_division.hpp:14:
      [avi = a.vi_

[1A[0J[36mBuilding:[0m 9.6s
[1A[0J[36mBuilding:[0m 9.7s
[1A[0J[36mBuilding:[0m 9.8s
[1A[0J[36mBuilding:[0m 9.9s
[1A[0J[36mBuilding:[0m 10.0s
[1A[0J[36mBuilding:[0m 10.1s
[1A[0J[36mBuilding:[0m 10.2s
[1A[0J[36mBuilding:[0m 10.3s
[1A[0J[36mBuilding:[0m 10.4s


In file included from /Users/johncurcio/Library/Caches/httpstan/4.7.2/models/hwcu7qli/model_hwcu7qli.cpp:2:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/model/model_header.hpp:4:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math.hpp:19:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev.hpp:8:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/core.hpp:28:
      [avi = a.vi_, b](const auto& vi) mutable { avi->adj_ += vi.adj_; });
                    ^
/usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/fun/grad_inc_beta.hpp:45:43: note: in instantiation of function template specialization 'stan::math::operator+<int, nullptr>' requested 

[1A[0J[36mBuilding:[0m 10.5s
[1A[0J[36mBuilding:[0m 10.6s
[1A[0J[36mBuilding:[0m 10.7s
[1A[0J[36mBuilding:[0m 10.8s
[1A[0J[36mBuilding:[0m 10.9s
[1A[0J[36mBuilding:[0m 11.0s
[1A[0J[36mBuilding:[0m 11.1s
[1A[0J[36mBuilding:[0m 11.2s


In file included from /Users/johncurcio/Library/Caches/httpstan/4.7.2/models/hwcu7qli/model_hwcu7qli.cpp:2:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/model/model_header.hpp:4:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math.hpp:19:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev.hpp:8:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/core.hpp:53:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/core/profiling.hpp:9:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/prim/err.hpp:4:
In file included from /usr/local/Caskroom/minicon

[1A[0J[36mBuilding:[0m 11.3s
[1A[0J[36mBuilding:[0m 11.4s
[1A[0J[36mBuilding:[0m 11.6s
[1A[0J[36mBuilding:[0m 11.7s
[1A[0J[36mBuilding:[0m 11.8s
[1A[0J[36mBuilding:[0m 11.9s


In file included from /Users/johncurcio/Library/Caches/httpstan/4.7.2/models/hwcu7qli/model_hwcu7qli.cpp:2:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/model/model_header.hpp:4:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math.hpp:19:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev.hpp:8:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/core.hpp:53:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/rev/core/profiling.hpp:9:
In file included from /usr/local/Caskroom/miniconda/base/envs/sports/lib/python3.9/site-packages/httpstan/include/stan/math/prim/err.hpp:4:
In file included from /usr/local/Caskroom/minicon

[1A[0J[36mBuilding:[0m 12.0s
[1A[0J[36mBuilding:[0m 12.1s
[1A[0J[36mBuilding:[0m 12.2s
[1A[0J[36mBuilding:[0m 12.3s
[1A[0J[36mBuilding:[0m 12.4s
[1A[0J[36mBuilding:[0m 12.5s
[1A[0J[36mBuilding:[0m 12.6s
[1A[0J[36mBuilding:[0m 12.7s
[1A[0J[36mBuilding:[0m 12.8s
[1A[0J[36mBuilding:[0m 12.9s
[1A[0J[36mBuilding:[0m 13.0s
[1A[0J[36mBuilding:[0m 13.1s
[1A[0J[36mBuilding:[0m 13.2s
[1A[0J[36mBuilding:[0m 13.3s
[1A[0J[36mBuilding:[0m 13.4s
[1A[0J[36mBuilding:[0m 13.5s
[1A[0J[36mBuilding:[0m 13.6s
[1A[0J[36mBuilding:[0m 13.7s
[1A[0J[36mBuilding:[0m 13.8s
[1A[0J[36mBuilding:[0m 13.9s
[1A[0J[36mBuilding:[0m 14.0s
[1A[0J[36mBuilding:[0m 14.1s
[1A[0J[36mBuilding:[0m 14.2s
[1A[0J[36mBuilding:[0m 14.3s
[1A[0J[36mBuilding:[0m 14.4s
[1A[0J[36mBuilding:[0m 14.5s
[1A[0J[36mBuilding:[0m 14.6s
[1A[0J[36mBuilding:[0m 14.7s
[1A[0J[36mBuilding:[0m 14.8s
[1A[0J[36mBuilding:[0m 14.9s
[1A[0J[



[1A[0J[36mBuilding:[0m 17.3s
[1A[0J[36mBuilding:[0m 17.4s
[1A[0J[36mBuilding:[0m 17.5s
[1A[0J[36mBuilding:[0m 17.6s
[1A[0J[36mBuilding:[0m 17.7s
[1A[0J[36mBuilding:[0m 17.8s
[1A[0J



[36mMessages from [0m[36;1mstanc[0m[36m:[0m
    of arrays by placing brackets after a variable name is deprecated and
    will be removed in Stan 2.32.0. Instead use the array keyword before the
    type. This can be changed automatically using the auto-format flag to
    stanc
[36mSampling:[0m   0%
[1A[0J[36mSampling:[0m   0% (1/6000)
[1A[0J[36mSampling:[0m   0% (2/6000)
[1A[0J[36mSampling:[0m   0% (3/6000)
[1A[0J[36mSampling:[0m   0% (4/6000)
[1A[0J[36mSampling:[0m   2% (103/6000)
[1A[0J[36mSampling:[0m   3% (202/6000)
[1A[0J[36mSampling:[0m   5% (301/6000)
[1A[0J[36mSampling:[0m   7% (400/6000)
[1A[0J[36mSampling:[0m   8% (500/6000)
[1A[0J[36mSampling:[0m  10% (600/6000)
[1A[0J[36mSampling:[0m  12% (700/6000)
[1A[0J[36mSampling:[0m  13% (800/6000)
[1A[0J[36mSampling:[0m  15% (900/6000)
[1A[0J[36mSampling:[0m  17% (1000/6000)
[1A[0J[36mSampling:[0m  18% (1100/6000)
[1A[0J[36mSampling:[0m  20% (1200/6000)
[1A[0J

0.6525679758308157

In [15]:
from sklearn.metrics import log_loss

log_loss(y_pred=y_pred, y_true=y_test)

0.6099569250376933

In [16]:
pd.DataFrame(list(zip(feat_cols, sym_model.fit["beta"].mean(1)))).sort_values(1)

Unnamed: 0,0,1
0,oldEloDiffordinal_fighter_result,-1.520902
22,total_ufc_fights_diff,-0.418321
17,oldEloDiffground_strikes_landed,-0.172423
14,oldEloDiffTSL,-0.06571
26,height_diff,-0.045838
20,t_since_last_fight_log_diff,-0.036273
6,oldEloDiffKD,-0.018428
5,oldEloDiffADTM,-0.009286
7,oldEloDiffRV,-0.001094
19,oldEloDiffWin,-0.000615


In [17]:
from sklearn.decomposition import PCA, KernelPCA

class PcaSymmetricModel(object):
    
    def __init__(self, beta_prior_std=0.1, n_pca=8, num_chains=4, num_samples=1000):
        self.beta_prior_std = float(beta_prior_std)
        self.n_pca = n_pca
        self.code = code
        self.scale_ = None
        self.pca = PCA(n_components=n_pca, whiten=True)
        self.fit = None
        self.num_chains = num_chains
        self.num_samples = num_samples
        
    def _fit(self, data):
        posterior = stan.build(self.code, data=data, random_seed=1)
        fit = posterior.sample(num_chains=self.num_chains, num_samples=self.num_samples)
        self.fit = fit
        return fit
        
    def fit_predict(self, train_df, test_df, feat_cols):
        scale_ = (train_df[feat_cols]**2).mean(0)
        self.scale_ = scale_
        X_train = train_df[feat_cols] / scale_
        X_test = test_df[feat_cols] / scale_
        
        # pca happens here
        X_pca_train = self.pca.fit_transform(X_train)
        X_pca_test = self.pca.transform(X_test)

        y_train = train_df["targetWin"]
        y_test = test_df["targetWin"]

        ml_train = logit(train_df["p_fighter_implied"])
        ml_test = logit(test_df["p_fighter_implied"])
        
        data = {
            "n": train_df.shape[0],
            "n2": test_df.shape[0],
            "d": X_pca_train.shape[1],
            "y": y_train.astype(int).values,
            "beta_prior_std": self.beta_prior_std,
            "X": X_pca_train,
            "ml_logit": ml_train.values,
            "X2": X_pca_test,
            "ml_logit2": ml_test.values,
        }

        fit = self._fit(data)
        return fit["y_pred"].mean(1)

pca_model = PcaSymmetricModel(n_pca=16, beta_prior_std=1.0, num_samples=500)
y_pred = pca_model.fit_predict(train_df, test_df, feat_cols)
acc = (y_pred.round() == test_df["targetWin"]).mean()
print(f"accuracy: {acc}")
print(f"log loss: {log_loss(y_pred=y_pred, y_true=y_test)}")

[32mBuilding:[0m found in cache, done.
[36mMessages from [0m[36;1mstanc[0m[36m:[0m
    of arrays by placing brackets after a variable name is deprecated and
    will be removed in Stan 2.32.0. Instead use the array keyword before the
    type. This can be changed automatically using the auto-format flag to
    stanc
[36mSampling:[0m   0%
[1A[0J[36mSampling:[0m   7% (400/6000)
[1A[0J[36mSampling:[0m  13% (800/6000)
[1A[0J[36mSampling:[0m  18% (1100/6000)
[1A[0J[36mSampling:[0m  22% (1300/6000)
[1A[0J[36mSampling:[0m  28% (1700/6000)
[1A[0J[36mSampling:[0m  35% (2100/6000)
[1A[0J[36mSampling:[0m  55% (3300/6000)
[1A[0J[36mSampling:[0m  77% (4600/6000)
[1A[0J[36mSampling:[0m  88% (5300/6000)
[1A[0J[36mSampling:[0m 100% (6000/6000)
[1A[0J[32mSampling:[0m 100% (6000/6000), done.
[36mMessages received during sampling:[0m
  Gradient evaluation took 0.000146 seconds
  1000 transitions using 10 leapfrog steps per transition would take 1.46

accuracy: 0.649546827794562
log loss: 0.6090773076393765


In [18]:
gender_df = pd.read_csv("data/fighter_genders.csv")
# gender_map = gender_df.set_index("FighterID").to_dict()["gender"]

train_gender_df = train_df.merge(gender_df, on="FighterID", how="left")
test_gender_df = test_df.merge(gender_df, on="FighterID", how="left")

train_gender_df["gender"].isnull().any(), test_gender_df["gender"].isnull().any()

(False, False)

In [19]:
# okay uhh
import stan

hier_code = """

data {
    int<lower=0> n;                     // number of data points in training data
    int<lower=0> n2;                    // number of data points in test data
    int<lower=1> d;                     // explanatory variable dimension
    int<lower=0,upper=1> y[n];          // response variable
    real<lower=0> beta_prior_std;       // prior scale on beta mean across groups
    real<lower=0> intra_group_std;      // prior scale on beta, std dev of group's beta around mean
    
    int<lower=1> L;                     // number of levels (if this is gender, then it's just 2)
    int<lower=1,upper=L> ll[n];         // level of each fight in train
    int<lower=1,upper=L> ll2[n2];       // level of each fight in test

    matrix[n, d] X;                     // explanatory variable
    vector[n] ml_logit;                   // logit of the opening money line

    matrix[n2, d] X2;                   // test data
    vector[n2] ml_logit2;                 // test data

}

parameters {
    vector[d] beta[L];    // coefficient values for each level
    real mu[d];           // mean of coefficient values for each level
}

transformed parameters {
    //vector[n] eta;
    //vector[n2] eta2;
    //eta = ml_logit + (X * beta);      // linear predictor
    //eta2 = ml_logit2 + (X2 * beta);   // linear predictor for test data
}

model {

    mu ~ normal(0, beta_prior_std);
    for (l in 1:L)
        beta[l] ~ normal(mu, intra_group_std); // TODO magic number - more or less defines covariance btw genders

    vector[n] x_beta_ll;
    for (i in 1:n)
        x_beta_ll[i] = X[i] * beta[ll[i]];
    y ~ bernoulli_logit(ml_logit + x_beta_ll);
}

generated quantities {
    vector[n2] y_pred;
    
    for(i in 1:n2){
        y_pred[i] = inv_logit(ml_logit2[i] + X[i] * beta[ll2[i]]);
    }
    //y_pred = inv_logit(eta2);  // y values predicted for test data
}
"""

class HierarchicalSymmetricModel(SimpleSymmetricModel):
    
    def __init__(self, beta_prior_std=1.0, intra_group_std=0.1, num_chains=4, num_samples=100):
        self.beta_prior_std = float(beta_prior_std)
        self.intra_group_std = float(intra_group_std)
        self.code = hier_code
        self.scale_ = None
        self.fit = None
        self.num_chains = num_chains
        self.num_samples = num_samples
        
    def fit_predict(self, train_df, test_df, feat_cols):
        scale_ = (train_df[feat_cols]**2).mean(0)
        self.scale_ = scale_
        X_train = train_df[feat_cols] / scale_
        X_test = test_df[feat_cols] / scale_

        y_train = train_df["targetWin"]
        y_test = test_df["targetWin"]

        ml_train = logit(train_df["p_fighter_implied"])
        ml_test = logit(test_df["p_fighter_implied"])
        
        gender_int_train = train_df["gender"].map({"M":1, "W":2})
        gender_int_test = test_df["gender"].map({"M":1, "W":2})
        
        data = {
            "n": train_df.shape[0],
            "n2": test_df.shape[0],
            "d": len(feat_cols),
            "y": y_train.astype(int).values,
            "beta_prior_std": self.beta_prior_std,
            "intra_group_std": self.intra_group_std,
            "L": 2,
            "ll": gender_int_train.values,
            "ll2": gender_int_test.values,
            "X": X_train.values,
            "ml_logit": ml_train.values,
            "X2": X_test.values,
            "ml_logit2": ml_test.values,
        }
        fit = self._fit(data)
        return fit["y_pred"].mean(1)

hier_model = HierarchicalSymmetricModel(beta_prior_std=1.0, intra_group_std=0.5)
y_pred = hier_model.fit_predict(train_gender_df, test_gender_df, feat_cols)
(y_pred.round() == test_gender_df["targetWin"]).mean()

[36mBuilding:[0m 0.2s
[1A[0J[36mBuilding:[0m 0.3s
[1A[0J[36mBuilding:[0m 0.5s
[1A[0J[36mBuilding:[0m 0.6s
[1A[0J[36mBuilding:[0m 0.7s
[1A[0J[36mBuilding:[0m 0.8s
[1A[0J[36mBuilding:[0m 0.9s
[1A[0J[36mBuilding:[0m 1.0s
[1A[0J[36mBuilding:[0m 1.1s
[1A[0J[36mBuilding:[0m 1.2s
[1A[0J[36mBuilding:[0m 1.3s
[1A[0J[36mBuilding:[0m 1.4s
[1A[0J[36mBuilding:[0m 1.5s
[1A[0J[36mBuilding:[0m 1.6s
[1A[0J[36mBuilding:[0m 1.7s
[1A[0J[36mBuilding:[0m 1.8s
[1A[0J[36mBuilding:[0m 1.9s
[1A[0J[36mBuilding:[0m 2.0s
[1A[0J[36mBuilding:[0m 2.1s
[1A[0J[36mBuilding:[0m 2.3s
[1A[0J[36mBuilding:[0m 2.4s
[1A[0J[36mBuilding:[0m 2.5s
[1A[0J[36mBuilding:[0m 2.6s
[1A[0J[36mBuilding:[0m 2.7s
[1A[0J[36mBuilding:[0m 2.8s
[1A[0J[36mBuilding:[0m 2.9s
[1A[0J[36mBuilding:[0m 3.0s
[1A[0J[36mBuilding:[0m 3.1s
[1A[0J[36mBuilding:[0m 3.2s
[1A[0J[36mBuilding:[0m 3.3s
[1A[0J[36mBuilding:[0m 3.4s
[1A[0J[36mBui

[1A[0J[36mBuilding:[0m 26.4s
[1A[0J[36mBuilding:[0m 26.5s
[1A[0J[36mBuilding:[0m 26.7s
[1A[0J[36mBuilding:[0m 26.8s
[1A[0J[36mBuilding:[0m 26.9s
[1A[0J[36mBuilding:[0m 27.0s
[1A[0J[36mBuilding:[0m 27.1s
[1A[0J[36mBuilding:[0m 27.2s
[1A[0J[36mBuilding:[0m 27.3s
[1A[0J[36mBuilding:[0m 27.4s
[1A[0J[36mBuilding:[0m 27.5s
[1A[0J[36mBuilding:[0m 27.6s
[1A[0J[36mBuilding:[0m 27.7s
[1A[0J[36mBuilding:[0m 27.8s
[1A[0J[36mBuilding:[0m 27.9s
[1A[0J[36mBuilding:[0m 28.0s
[1A[0J[36mBuilding:[0m 28.1s
[1A[0J[36mBuilding:[0m 28.2s
[1A[0J[36mBuilding:[0m 28.3s
[1A[0J[36mBuilding:[0m 28.4s
[1A[0J[36mBuilding:[0m 28.5s
[1A[0J[36mBuilding:[0m 28.6s
[1A[0J[36mBuilding:[0m 28.7s
[1A[0J[36mBuilding:[0m 28.9s
[1A[0J[36mBuilding:[0m 29.0s
[1A[0J[36mBuilding:[0m 29.1s
[1A[0J[36mBuilding:[0m 29.2s
[1A[0J[36mBuilding:[0m 29.3s
[1A[0J[36mBuilding:[0m 29.4s
[1A[0J[36mBuilding:[0m 29.5s
[1A[0J[

[1A[0J[36mBuilding:[0m 52.3s
[1A[0J[36mBuilding:[0m 52.4s
[1A[0J[36mBuilding:[0m 52.5s
[1A[0J[36mBuilding:[0m 52.6s
[1A[0J[36mBuilding:[0m 52.7s
[1A[0J[36mBuilding:[0m 52.9s
[1A[0J[36mBuilding:[0m 53.0s
[1A[0J[36mBuilding:[0m 53.1s
[1A[0J[36mBuilding:[0m 53.2s
[1A[0J[36mBuilding:[0m 53.3s
[1A[0J[36mBuilding:[0m 53.4s
[1A[0J[36mBuilding:[0m 53.5s
[1A[0J[36mBuilding:[0m 53.6s
[1A[0J[36mBuilding:[0m 53.7s
[1A[0J[36mBuilding:[0m 53.8s
[1A[0J[36mBuilding:[0m 53.9s
[1A[0J[36mBuilding:[0m 54.0s
[1A[0J[36mBuilding:[0m 54.1s
[1A[0J[36mBuilding:[0m 54.2s
[1A[0J[36mBuilding:[0m 54.3s
[1A[0J[36mBuilding:[0m 54.4s
[1A[0J[36mBuilding:[0m 54.5s
[1A[0J[36mBuilding:[0m 54.6s
[1A[0J[36mBuilding:[0m 54.7s
[1A[0J[36mBuilding:[0m 54.8s
[1A[0J[36mBuilding:[0m 54.9s
[1A[0J[36mBuilding:[0m 55.0s
[1A[0J[36mBuilding:[0m 55.1s
[1A[0J[36mBuilding:[0m 55.2s
[1A[0J[36mBuilding:[0m 55.3s
[1A[0J[

[1A[0J[36mBuilding:[0m 78.2s
[1A[0J[36mBuilding:[0m 78.3s
[1A[0J[36mBuilding:[0m 78.4s
[1A[0J[36mBuilding:[0m 78.5s
[1A[0J[36mBuilding:[0m 78.6s
[1A[0J[36mBuilding:[0m 78.7s
[1A[0J[36mBuilding:[0m 78.8s
[1A[0J[36mBuilding:[0m 78.9s
[1A[0J[36mBuilding:[0m 79.1s
[1A[0J[36mBuilding:[0m 79.2s
[1A[0J[36mBuilding:[0m 79.3s
[1A[0J[36mBuilding:[0m 79.4s
[1A[0J[36mBuilding:[0m 79.5s
[1A[0J[36mBuilding:[0m 79.6s
[1A[0J[36mBuilding:[0m 79.7s
[1A[0J[36mBuilding:[0m 79.8s
[1A[0J[36mBuilding:[0m 79.9s
[1A[0J[36mBuilding:[0m 80.0s
[1A[0J[36mBuilding:[0m 80.1s
[1A[0J[36mBuilding:[0m 80.2s
[1A[0J[36mBuilding:[0m 80.3s
[1A[0J[36mBuilding:[0m 80.4s
[1A[0J[36mBuilding:[0m 80.5s
[1A[0J[36mBuilding:[0m 80.6s
[1A[0J[36mBuilding:[0m 80.7s
[1A[0J[36mBuilding:[0m 80.8s
[1A[0J[36mBuilding:[0m 80.9s
[1A[0J[36mBuilding:[0m 81.0s
[1A[0J[36mBuilding:[0m 81.1s
[1A[0J[36mBuilding:[0m 81.3s
[1A[0J[

[1A[0J[36mBuilding:[0m 103.9s
[1A[0J[36mBuilding:[0m 104.0s
[1A[0J[36mBuilding:[0m 104.1s
[1A[0J[36mBuilding:[0m 104.3s
[1A[0J[36mBuilding:[0m 104.4s
[1A[0J[36mBuilding:[0m 104.5s
[1A[0J[36mBuilding:[0m 104.6s
[1A[0J[36mBuilding:[0m 104.7s
[1A[0J[36mBuilding:[0m 104.8s
[1A[0J[36mBuilding:[0m 104.9s
[1A[0J[36mBuilding:[0m 105.0s
[1A[0J[36mBuilding:[0m 105.1s
[1A[0J[36mBuilding:[0m 105.2s
[1A[0J[36mBuilding:[0m 105.3s
[1A[0J[36mBuilding:[0m 105.4s
[1A[0J[36mBuilding:[0m 105.5s
[1A[0J[36mBuilding:[0m 105.6s
[1A[0J[36mBuilding:[0m 105.7s
[1A[0J[36mBuilding:[0m 105.8s
[1A[0J[36mBuilding:[0m 105.9s
[1A[0J[36mBuilding:[0m 106.0s
[1A[0J[36mBuilding:[0m 106.1s
[1A[0J[36mBuilding:[0m 106.2s
[1A[0J[36mBuilding:[0m 106.3s
[1A[0J[36mBuilding:[0m 106.4s
[1A[0J[36mBuilding:[0m 106.5s
[1A[0J[36mBuilding:[0m 106.6s
[1A[0J[36mBuilding:[0m 106.7s
[1A[0J[36mBuilding:[0m 106.8s
[1A[0J[36mB

[1A[0J[36mBuilding:[0m 129.0s
[1A[0J[36mBuilding:[0m 129.2s
[1A[0J[36mBuilding:[0m 129.3s
[1A[0J[36mBuilding:[0m 129.4s
[1A[0J[36mBuilding:[0m 129.5s
[1A[0J[36mBuilding:[0m 129.6s
[1A[0J[36mBuilding:[0m 129.7s
[1A[0J[36mBuilding:[0m 129.8s
[1A[0J[36mBuilding:[0m 129.9s
[1A[0J[36mBuilding:[0m 130.0s
[1A[0J[36mBuilding:[0m 130.1s
[1A[0J[36mBuilding:[0m 130.2s
[1A[0J[36mBuilding:[0m 130.3s
[1A[0J[36mBuilding:[0m 130.4s
[1A[0J[36mBuilding:[0m 130.5s
[1A[0J[36mBuilding:[0m 130.6s
[1A[0J[36mBuilding:[0m 130.7s
[1A[0J[36mBuilding:[0m 130.8s
[1A[0J[36mBuilding:[0m 130.9s
[1A[0J[36mBuilding:[0m 131.0s
[1A[0J[36mBuilding:[0m 131.1s
[1A[0J[36mBuilding:[0m 131.2s
[1A[0J[36mBuilding:[0m 131.3s
[1A[0J[36mBuilding:[0m 131.4s
[1A[0J[36mBuilding:[0m 131.5s
[1A[0J[36mBuilding:[0m 131.6s
[1A[0J[36mBuilding:[0m 131.7s
[1A[0J[36mBuilding:[0m 131.8s
[1A[0J[36mBuilding:[0m 131.9s
[1A[0J[36mB

[1A[0J[36mBuilding:[0m 154.1s
[1A[0J[36mBuilding:[0m 154.2s
[1A[0J[36mBuilding:[0m 154.3s
[1A[0J[36mBuilding:[0m 154.4s
[1A[0J[36mBuilding:[0m 154.5s
[1A[0J[36mBuilding:[0m 154.7s
[1A[0J[36mBuilding:[0m 154.8s
[1A[0J[36mBuilding:[0m 154.9s
[1A[0J[36mBuilding:[0m 155.0s
[1A[0J[36mBuilding:[0m 155.1s
[1A[0J[36mBuilding:[0m 155.2s
[1A[0J[36mBuilding:[0m 155.3s
[1A[0J[36mBuilding:[0m 155.4s
[1A[0J[36mBuilding:[0m 155.5s
[1A[0J[36mBuilding:[0m 155.6s
[1A[0J[36mBuilding:[0m 155.7s
[1A[0J[36mBuilding:[0m 155.8s
[1A[0J[36mBuilding:[0m 155.9s
[1A[0J[36mBuilding:[0m 156.0s
[1A[0J[36mBuilding:[0m 156.1s
[1A[0J[36mBuilding:[0m 156.2s
[1A[0J[36mBuilding:[0m 156.3s
[1A[0J[36mBuilding:[0m 156.4s
[1A[0J[36mBuilding:[0m 156.5s
[1A[0J[36mBuilding:[0m 156.6s
[1A[0J[36mBuilding:[0m 156.7s
[1A[0J[36mBuilding:[0m 156.8s
[1A[0J[36mBuilding:[0m 156.9s
[1A[0J[36mBuilding:[0m 157.0s
[1A[0J[36mB

[1A[0J[36mBuilding:[0m 179.2s
[1A[0J[36mBuilding:[0m 179.3s
[1A[0J[36mBuilding:[0m 179.4s
[1A[0J[36mBuilding:[0m 179.5s
[1A[0J[36mBuilding:[0m 179.6s
[1A[0J[36mBuilding:[0m 179.7s
[1A[0J[36mBuilding:[0m 179.8s
[1A[0J[36mBuilding:[0m 179.9s
[1A[0J[36mBuilding:[0m 180.0s
[1A[0J[36mBuilding:[0m 180.1s
[1A[0J[36mBuilding:[0m 180.2s
[1A[0J[36mBuilding:[0m 180.3s
[1A[0J[36mBuilding:[0m 180.4s
[1A[0J[36mBuilding:[0m 180.5s
[1A[0J[36mBuilding:[0m 180.6s
[1A[0J[36mBuilding:[0m 180.7s
[1A[0J[36mBuilding:[0m 180.8s
[1A[0J[36mBuilding:[0m 180.9s
[1A[0J[36mBuilding:[0m 181.0s
[1A[0J[36mBuilding:[0m 181.1s
[1A[0J[36mBuilding:[0m 181.2s
[1A[0J[36mBuilding:[0m 181.3s
[1A[0J[36mBuilding:[0m 181.4s
[1A[0J[36mBuilding:[0m 181.5s
[1A[0J[36mBuilding:[0m 181.7s
[1A[0J[36mBuilding:[0m 181.8s
[1A[0J[36mBuilding:[0m 181.9s
[1A[0J[36mBuilding:[0m 182.0s
[1A[0J[36mBuilding:[0m 182.1s
[1A[0J[36mB

[1A[0J[36mBuilding:[0m 204.2s
[1A[0J[36mBuilding:[0m 204.3s
[1A[0J[36mBuilding:[0m 204.4s
[1A[0J[36mBuilding:[0m 204.5s
[1A[0J[36mBuilding:[0m 204.6s
[1A[0J[36mBuilding:[0m 204.8s
[1A[0J[36mBuilding:[0m 204.9s
[1A[0J[36mBuilding:[0m 205.0s
[1A[0J[36mBuilding:[0m 205.1s
[1A[0J[36mBuilding:[0m 205.2s
[1A[0J[36mBuilding:[0m 205.3s
[1A[0J[36mBuilding:[0m 205.4s
[1A[0J[36mBuilding:[0m 205.5s
[1A[0J[36mBuilding:[0m 205.6s
[1A[0J[36mBuilding:[0m 205.7s
[1A[0J[36mBuilding:[0m 205.8s
[1A[0J[36mBuilding:[0m 205.9s
[1A[0J[36mBuilding:[0m 206.0s
[1A[0J[36mBuilding:[0m 206.1s
[1A[0J[36mBuilding:[0m 206.2s
[1A[0J[36mBuilding:[0m 206.3s
[1A[0J[36mBuilding:[0m 206.4s
[1A[0J[36mBuilding:[0m 206.5s
[1A[0J[36mBuilding:[0m 206.6s
[1A[0J[36mBuilding:[0m 206.7s
[1A[0J[36mBuilding:[0m 206.8s
[1A[0J[36mBuilding:[0m 207.0s
[1A[0J[36mBuilding:[0m 207.1s
[1A[0J[36mBuilding:[0m 207.2s
[1A[0J[36mB

[1A[0J[36mBuilding:[0m 229.3s
[1A[0J[36mBuilding:[0m 229.4s
[1A[0J[36mBuilding:[0m 229.5s
[1A[0J[36mBuilding:[0m 229.6s
[1A[0J[36mBuilding:[0m 229.7s
[1A[0J[36mBuilding:[0m 229.8s
[1A[0J[36mBuilding:[0m 229.9s
[1A[0J[36mBuilding:[0m 230.1s
[1A[0J[36mBuilding:[0m 230.2s
[1A[0J[36mBuilding:[0m 230.3s
[1A[0J[36mBuilding:[0m 230.4s
[1A[0J[36mBuilding:[0m 230.5s
[1A[0J[36mBuilding:[0m 230.6s
[1A[0J[36mBuilding:[0m 230.7s
[1A[0J[36mBuilding:[0m 230.8s
[1A[0J[36mBuilding:[0m 230.9s
[1A[0J[36mBuilding:[0m 231.0s
[1A[0J[36mBuilding:[0m 231.1s
[1A[0J[36mBuilding:[0m 231.2s
[1A[0J[36mBuilding:[0m 231.3s
[1A[0J[36mBuilding:[0m 231.4s
[1A[0J[36mBuilding:[0m 231.5s
[1A[0J[36mBuilding:[0m 231.6s
[1A[0J[36mBuilding:[0m 231.7s
[1A[0J[36mBuilding:[0m 231.8s
[1A[0J[36mBuilding:[0m 231.9s
[1A[0J[36mBuilding:[0m 232.0s
[1A[0J[36mBuilding:[0m 232.1s
[1A[0J[36mBuilding:[0m 232.2s
[1A[0J[36mB

[1A[0J[36mBuilding:[0m 254.4s
[1A[0J[36mBuilding:[0m 254.5s
[1A[0J[36mBuilding:[0m 254.6s
[1A[0J[36mBuilding:[0m 254.7s
[1A[0J[36mBuilding:[0m 254.8s
[1A[0J[36mBuilding:[0m 254.9s
[1A[0J[36mBuilding:[0m 255.0s
[1A[0J[36mBuilding:[0m 255.1s
[1A[0J[36mBuilding:[0m 255.2s
[1A[0J[36mBuilding:[0m 255.3s
[1A[0J[36mBuilding:[0m 255.4s
[1A[0J[36mBuilding:[0m 255.5s
[1A[0J[36mBuilding:[0m 255.6s
[1A[0J[36mBuilding:[0m 255.7s
[1A[0J[36mBuilding:[0m 255.8s
[1A[0J[36mBuilding:[0m 255.9s
[1A[0J[36mBuilding:[0m 256.0s
[1A[0J[36mBuilding:[0m 256.1s
[1A[0J[36mBuilding:[0m 256.2s
[1A[0J[36mBuilding:[0m 256.3s
[1A[0J[36mBuilding:[0m 256.4s
[1A[0J[36mBuilding:[0m 256.6s
[1A[0J[36mBuilding:[0m 256.7s
[1A[0J[36mBuilding:[0m 256.8s
[1A[0J[36mBuilding:[0m 256.9s
[1A[0J[36mBuilding:[0m 257.0s
[1A[0J[36mBuilding:[0m 257.1s
[1A[0J[36mBuilding:[0m 257.2s
[1A[0J[36mBuilding:[0m 257.3s
[1A[0J[36mB

[1A[0J[36mBuilding:[0m 279.5s
[1A[0J[36mBuilding:[0m 279.6s
[1A[0J[36mBuilding:[0m 279.7s
[1A[0J[36mBuilding:[0m 279.8s
[1A[0J[36mBuilding:[0m 279.9s
[1A[0J[36mBuilding:[0m 280.0s
[1A[0J[36mBuilding:[0m 280.1s
[1A[0J[36mBuilding:[0m 280.2s
[1A[0J[36mBuilding:[0m 280.3s
[1A[0J[36mBuilding:[0m 280.4s
[1A[0J[36mBuilding:[0m 280.5s
[1A[0J[36mBuilding:[0m 280.6s
[1A[0J[36mBuilding:[0m 280.7s
[1A[0J[36mBuilding:[0m 280.9s
[1A[0J[36mBuilding:[0m 281.0s
[1A[0J[36mBuilding:[0m 281.1s
[1A[0J[36mBuilding:[0m 281.2s
[1A[0J[36mBuilding:[0m 281.3s
[1A[0J[36mBuilding:[0m 281.4s
[1A[0J[36mBuilding:[0m 281.5s
[1A[0J[36mBuilding:[0m 281.6s
[1A[0J[36mBuilding:[0m 281.7s
[1A[0J[36mBuilding:[0m 281.8s
[1A[0J[36mBuilding:[0m 281.9s
[1A[0J[36mBuilding:[0m 282.0s
[1A[0J[36mBuilding:[0m 282.1s
[1A[0J[36mBuilding:[0m 282.2s
[1A[0J[36mBuilding:[0m 282.3s
[1A[0J[36mBuilding:[0m 282.4s
[1A[0J[36mB

TimeoutError: 

In [None]:
# since i chose an aggressively tight spread btw m and w, 
# this should be close to 0.6087682255527661
log_loss(y_pred=y_pred, y_true=y_test)

In [None]:
m_beta = hier_model.fit["beta"][0].mean(1)
w_beta = hier_model.fit["beta"][1].mean(1)

beta_df = pd.DataFrame({
    "m_beta": m_beta,
    "w_beta": w_beta,
    "diff": m_beta - w_beta,
    "feat": feat_cols,
})
beta_df.sort_values("diff")

# Trying some weird indexing

In [20]:
# okay uhh
import stan

hier_code = """

data {
    int<lower=0> n;                     // number of data points in training data
    int<lower=0> n2;                    // number of data points in test data
    int<lower=1> d;                     // explanatory variable dimension
    int<lower=0,upper=1> y[n];          // response variable
    real<lower=0> beta_prior_std;       // prior scale on beta mean across groups
    real<lower=0> intra_group_std;      // prior scale on beta, std dev of group's beta around mean
    
    vector[n] is_m;      // 0 if woman, 1 if man
    vector[n2] is_m2;    // 0 if woman, 1 if man
    
    matrix[n, d] X;                     // explanatory variable
    vector[n] ml_logit;                   // logit of the opening money line

    matrix[n2, d] X2;                   // test data
    vector[n2] ml_logit2;                 // test data

}

parameters {
    vector[d] beta_m;
    vector[d] beta_w;
}

transformed parameters {
    vector[n] eta;
    vector[n2] eta2;
    eta = (
        ml_logit + 
        ((X * beta_m) .* is_m) + 
        ((X * beta_w) .* (1 - is_m))
    );      // linear predictor
    eta2 = (
        ml_logit2 + 
        ((X2 * beta_m) .* is_m2) + 
        ((X2 * beta_w) .* (1 - is_m2))
    );   // linear predictor for test data
}

model {
    beta_m ~ normal(0, beta_prior_std);
    beta_w ~ normal(beta_m, intra_group_std); // damn i hope this works

    y ~ bernoulli_logit(eta);
}

generated quantities {
    vector[n2] y_pred;
    
    y_pred = inv_logit(eta2);  // y values predicted for test data
}
"""

class HierarchicalSymmetricModel(SimpleSymmetricModel):
    
    def __init__(self, beta_prior_std=1.0, intra_group_std=0.1, num_chains=4, num_samples=100):
        self.beta_prior_std = float(beta_prior_std)
        self.intra_group_std = float(intra_group_std)
        self.code = hier_code
        self.scale_ = None
        self.fit = None
        self.num_chains = num_chains
        self.num_samples = num_samples
        
    def fit_predict(self, train_df, test_df, feat_cols):
        scale_ = (train_df[feat_cols]**2).mean(0)
        self.scale_ = scale_
        X_train = train_df[feat_cols] / scale_
        X_test = test_df[feat_cols] / scale_

        y_train = train_df["targetWin"]
        y_test = test_df["targetWin"]

        ml_train = logit(train_df["p_fighter_implied"])
        ml_test = logit(test_df["p_fighter_implied"])
        
        is_m_train = train_df["gender"].map({"M":1, "W":0})
        is_m_test = test_df["gender"].map({"M":1, "W":0})
        
        data = {
            "n": train_df.shape[0],
            "n2": test_df.shape[0],
            "d": len(feat_cols),
            "y": y_train.astype(int).values,
            "beta_prior_std": self.beta_prior_std,
            "intra_group_std": self.intra_group_std,
            "is_m": is_m_train.values,
            "is_m2": is_m_test.values,
            "X": X_train.values,
            "ml_logit": ml_train.values,
            "X2": X_test.values,
            "ml_logit2": ml_test.values,
        }
        fit = self._fit(data)
        return fit["y_pred"].mean(1)

hier_model = HierarchicalSymmetricModel(beta_prior_std=1.0, intra_group_std=0.1, num_samples=500)
y_pred = hier_model.fit_predict(train_gender_df, test_gender_df, feat_cols)
acc = (y_pred.round() == test_df["targetWin"]).mean()
print(f"accuracy: {acc}")
print(f"log loss: {log_loss(y_pred=y_pred, y_true=y_test)}")

[36mBuilding:[0m 0.2s
[1A[0J[36mBuilding:[0m 0.3s
[1A[0J[36mBuilding:[0m 0.5s
[1A[0J[36mBuilding:[0m 0.6s
[1A[0J[36mBuilding:[0m 0.7s
[1A[0J[36mBuilding:[0m 0.8s
[1A[0J[36mBuilding:[0m 0.9s
[1A[0J[36mBuilding:[0m 1.0s
[1A[0J[36mBuilding:[0m 1.1s
[1A[0J[36mBuilding:[0m 1.2s
[1A[0J[36mBuilding:[0m 1.3s
[1A[0J[36mBuilding:[0m 1.4s
[1A[0J[36mBuilding:[0m 1.5s
[1A[0J[36mBuilding:[0m 1.6s
[1A[0J[36mBuilding:[0m 1.7s
[1A[0J[36mBuilding:[0m 1.8s
[1A[0J[36mBuilding:[0m 1.9s
[1A[0J[36mBuilding:[0m 2.0s
[1A[0J[36mBuilding:[0m 2.1s
[1A[0J[36mBuilding:[0m 2.2s
[1A[0J[36mBuilding:[0m 2.3s
[1A[0J[36mBuilding:[0m 2.4s
[1A[0J[36mBuilding:[0m 2.5s
[1A[0J[36mBuilding:[0m 2.6s
[1A[0J[36mBuilding:[0m 2.7s
[1A[0J[36mBuilding:[0m 2.8s
[1A[0J[36mBuilding:[0m 2.9s
[1A[0J[36mBuilding:[0m 3.1s
[1A[0J[36mBuilding:[0m 3.2s
[1A[0J[36mBuilding:[0m 3.3s
[1A[0J[36mBuilding:[0m 3.4s
[1A[0J[36mBui

[1A[0J[36mBuilding:[0m 26.4s
[1A[0J[36mBuilding:[0m 26.6s
[1A[0J[36mBuilding:[0m 26.7s
[1A[0J[36mBuilding:[0m 26.8s
[1A[0J[36mBuilding:[0m 26.9s
[1A[0J[36mBuilding:[0m 27.0s
[1A[0J[36mBuilding:[0m 27.1s
[1A[0J[36mBuilding:[0m 27.2s
[1A[0J[36mBuilding:[0m 27.3s
[1A[0J[36mBuilding:[0m 27.4s
[1A[0J[36mBuilding:[0m 27.5s
[1A[0J[36mBuilding:[0m 27.6s
[1A[0J[36mBuilding:[0m 27.7s
[1A[0J[36mBuilding:[0m 27.8s
[1A[0J[36mBuilding:[0m 27.9s
[1A[0J[36mBuilding:[0m 28.0s
[1A[0J[36mBuilding:[0m 28.1s
[1A[0J[36mBuilding:[0m 28.2s
[1A[0J[36mBuilding:[0m 28.3s
[1A[0J[36mBuilding:[0m 28.4s
[1A[0J[36mBuilding:[0m 28.5s
[1A[0J[36mBuilding:[0m 28.6s
[1A[0J[36mBuilding:[0m 28.7s
[1A[0J[36mBuilding:[0m 28.8s
[1A[0J[36mBuilding:[0m 28.9s
[1A[0J[36mBuilding:[0m 29.0s
[1A[0J[36mBuilding:[0m 29.1s
[1A[0J[36mBuilding:[0m 29.2s
[1A[0J[36mBuilding:[0m 29.3s
[1A[0J[36mBuilding:[0m 29.4s
[1A[0J[

[1A[0J[36mBuilding:[0m 52.4s
[1A[0J[36mBuilding:[0m 52.5s
[1A[0J[36mBuilding:[0m 52.6s
[1A[0J[36mBuilding:[0m 52.7s
[1A[0J[36mBuilding:[0m 52.8s
[1A[0J[36mBuilding:[0m 52.9s
[1A[0J[36mBuilding:[0m 53.0s
[1A[0J[36mBuilding:[0m 53.1s
[1A[0J[36mBuilding:[0m 53.2s
[1A[0J[36mBuilding:[0m 53.3s
[1A[0J[36mBuilding:[0m 53.4s
[1A[0J[36mBuilding:[0m 53.6s
[1A[0J[36mBuilding:[0m 53.7s
[1A[0J[36mBuilding:[0m 53.8s
[1A[0J[36mBuilding:[0m 53.9s
[1A[0J[36mBuilding:[0m 54.0s
[1A[0J[36mBuilding:[0m 54.1s
[1A[0J[36mBuilding:[0m 54.2s
[1A[0J[36mBuilding:[0m 54.3s
[1A[0J[36mBuilding:[0m 54.4s
[1A[0J[36mBuilding:[0m 54.5s
[1A[0J[36mBuilding:[0m 54.6s
[1A[0J[36mBuilding:[0m 54.7s
[1A[0J[36mBuilding:[0m 54.8s
[1A[0J[36mBuilding:[0m 54.9s
[1A[0J[36mBuilding:[0m 55.0s
[1A[0J[36mBuilding:[0m 55.1s
[1A[0J[36mBuilding:[0m 55.2s
[1A[0J[36mBuilding:[0m 55.3s
[1A[0J[36mBuilding:[0m 55.4s
[1A[0J[

[1A[0J[36mBuilding:[0m 78.3s
[1A[0J[36mBuilding:[0m 78.4s
[1A[0J[36mBuilding:[0m 78.5s
[1A[0J[36mBuilding:[0m 78.6s
[1A[0J[36mBuilding:[0m 78.7s
[1A[0J[36mBuilding:[0m 78.8s
[1A[0J[36mBuilding:[0m 78.9s
[1A[0J[36mBuilding:[0m 79.0s
[1A[0J[36mBuilding:[0m 79.1s
[1A[0J[36mBuilding:[0m 79.2s
[1A[0J[36mBuilding:[0m 79.3s
[1A[0J[36mBuilding:[0m 79.4s
[1A[0J[36mBuilding:[0m 79.5s
[1A[0J[36mBuilding:[0m 79.6s
[1A[0J[36mBuilding:[0m 79.7s
[1A[0J[36mBuilding:[0m 79.8s
[1A[0J[36mBuilding:[0m 79.9s
[1A[0J[36mBuilding:[0m 80.0s
[1A[0J[36mBuilding:[0m 80.1s
[1A[0J[36mBuilding:[0m 80.2s
[1A[0J[36mBuilding:[0m 80.3s
[1A[0J[36mBuilding:[0m 80.4s
[1A[0J[36mBuilding:[0m 80.5s
[1A[0J[36mBuilding:[0m 80.6s
[1A[0J[36mBuilding:[0m 80.7s
[1A[0J[36mBuilding:[0m 80.8s
[1A[0J[36mBuilding:[0m 80.9s
[1A[0J[36mBuilding:[0m 81.1s
[1A[0J[36mBuilding:[0m 81.2s
[1A[0J[36mBuilding:[0m 81.3s
[1A[0J[

[1A[0J[36mBuilding:[0m 103.9s
[1A[0J[36mBuilding:[0m 104.0s
[1A[0J[36mBuilding:[0m 104.1s
[1A[0J[36mBuilding:[0m 104.2s
[1A[0J[36mBuilding:[0m 104.3s
[1A[0J[36mBuilding:[0m 104.4s
[1A[0J[36mBuilding:[0m 104.5s
[1A[0J[36mBuilding:[0m 104.6s
[1A[0J[36mBuilding:[0m 104.7s
[1A[0J[36mBuilding:[0m 104.8s
[1A[0J[36mBuilding:[0m 104.9s
[1A[0J[36mBuilding:[0m 105.0s
[1A[0J[36mBuilding:[0m 105.1s
[1A[0J[36mBuilding:[0m 105.2s
[1A[0J[36mBuilding:[0m 105.3s
[1A[0J[36mBuilding:[0m 105.4s
[1A[0J[36mBuilding:[0m 105.5s
[1A[0J[36mBuilding:[0m 105.7s
[1A[0J[36mBuilding:[0m 105.8s
[1A[0J[36mBuilding:[0m 105.9s
[1A[0J[36mBuilding:[0m 106.0s
[1A[0J[36mBuilding:[0m 106.1s
[1A[0J[36mBuilding:[0m 106.2s
[1A[0J[36mBuilding:[0m 106.3s
[1A[0J[36mBuilding:[0m 106.4s
[1A[0J[36mBuilding:[0m 106.5s
[1A[0J[36mBuilding:[0m 106.6s
[1A[0J[36mBuilding:[0m 106.7s
[1A[0J[36mBuilding:[0m 106.8s
[1A[0J[36mB

[1A[0J[36mBuilding:[0m 129.0s
[1A[0J[36mBuilding:[0m 129.1s
[1A[0J[36mBuilding:[0m 129.2s
[1A[0J[36mBuilding:[0m 129.3s
[1A[0J[36mBuilding:[0m 129.4s
[1A[0J[36mBuilding:[0m 129.5s
[1A[0J[36mBuilding:[0m 129.6s
[1A[0J[36mBuilding:[0m 129.7s
[1A[0J[36mBuilding:[0m 129.8s
[1A[0J[36mBuilding:[0m 129.9s
[1A[0J[36mBuilding:[0m 130.0s
[1A[0J[36mBuilding:[0m 130.1s
[1A[0J[36mBuilding:[0m 130.2s
[1A[0J[36mBuilding:[0m 130.3s
[1A[0J[36mBuilding:[0m 130.4s
[1A[0J[36mBuilding:[0m 130.5s
[1A[0J[36mBuilding:[0m 130.6s
[1A[0J[36mBuilding:[0m 130.8s
[1A[0J[36mBuilding:[0m 130.9s
[1A[0J[36mBuilding:[0m 131.0s
[1A[0J[36mBuilding:[0m 131.1s
[1A[0J[36mBuilding:[0m 131.2s
[1A[0J[36mBuilding:[0m 131.3s
[1A[0J[36mBuilding:[0m 131.4s
[1A[0J[36mBuilding:[0m 131.5s
[1A[0J[36mBuilding:[0m 131.6s
[1A[0J[36mBuilding:[0m 131.7s
[1A[0J[36mBuilding:[0m 131.8s
[1A[0J[36mBuilding:[0m 131.9s
[1A[0J[36mB

[1A[0J[36mBuilding:[0m 154.0s
[1A[0J[36mBuilding:[0m 154.1s
[1A[0J[36mBuilding:[0m 154.2s
[1A[0J[36mBuilding:[0m 154.4s
[1A[0J[36mBuilding:[0m 154.5s
[1A[0J[36mBuilding:[0m 154.6s
[1A[0J[36mBuilding:[0m 154.7s
[1A[0J[36mBuilding:[0m 154.8s
[1A[0J[36mBuilding:[0m 154.9s
[1A[0J[36mBuilding:[0m 155.0s
[1A[0J[36mBuilding:[0m 155.1s
[1A[0J[36mBuilding:[0m 155.2s
[1A[0J[36mBuilding:[0m 155.3s
[1A[0J[36mBuilding:[0m 155.4s
[1A[0J[36mBuilding:[0m 155.5s
[1A[0J[36mBuilding:[0m 155.6s
[1A[0J[36mBuilding:[0m 155.7s
[1A[0J[36mBuilding:[0m 155.8s
[1A[0J[36mBuilding:[0m 155.9s
[1A[0J[36mBuilding:[0m 156.0s
[1A[0J[36mBuilding:[0m 156.1s
[1A[0J[36mBuilding:[0m 156.2s
[1A[0J[36mBuilding:[0m 156.3s
[1A[0J[36mBuilding:[0m 156.4s
[1A[0J[36mBuilding:[0m 156.5s
[1A[0J[36mBuilding:[0m 156.6s
[1A[0J[36mBuilding:[0m 156.7s
[1A[0J[36mBuilding:[0m 156.8s
[1A[0J[36mBuilding:[0m 156.9s
[1A[0J[36mB

[1A[0J[36mBuilding:[0m 178.9s
[1A[0J[36mBuilding:[0m 179.0s
[1A[0J[36mBuilding:[0m 179.1s
[1A[0J[36mBuilding:[0m 179.2s
[1A[0J[36mBuilding:[0m 179.3s
[1A[0J[36mBuilding:[0m 179.4s
[1A[0J[36mBuilding:[0m 179.5s
[1A[0J[36mBuilding:[0m 179.6s
[1A[0J[36mBuilding:[0m 179.7s
[1A[0J[36mBuilding:[0m 179.8s
[1A[0J[36mBuilding:[0m 179.9s
[1A[0J[36mBuilding:[0m 180.0s
[1A[0J[36mBuilding:[0m 180.1s
[1A[0J[36mBuilding:[0m 180.3s
[1A[0J[36mBuilding:[0m 180.4s
[1A[0J[36mBuilding:[0m 180.5s
[1A[0J[36mBuilding:[0m 180.6s
[1A[0J[36mBuilding:[0m 180.7s
[1A[0J[36mBuilding:[0m 180.8s
[1A[0J[36mBuilding:[0m 180.9s
[1A[0J[36mBuilding:[0m 181.0s
[1A[0J[36mBuilding:[0m 181.1s
[1A[0J[36mBuilding:[0m 181.2s
[1A[0J[36mBuilding:[0m 181.3s
[1A[0J[36mBuilding:[0m 181.4s
[1A[0J[36mBuilding:[0m 181.5s
[1A[0J[36mBuilding:[0m 181.6s
[1A[0J[36mBuilding:[0m 181.7s
[1A[0J[36mBuilding:[0m 181.8s
[1A[0J[36mB

[1A[0J[36mBuilding:[0m 204.1s
[1A[0J[36mBuilding:[0m 204.2s
[1A[0J[36mBuilding:[0m 204.3s
[1A[0J[36mBuilding:[0m 204.4s
[1A[0J[36mBuilding:[0m 204.5s
[1A[0J[36mBuilding:[0m 204.6s
[1A[0J[36mBuilding:[0m 204.7s
[1A[0J[36mBuilding:[0m 204.8s
[1A[0J[36mBuilding:[0m 204.9s
[1A[0J[36mBuilding:[0m 205.0s
[1A[0J[36mBuilding:[0m 205.1s
[1A[0J[36mBuilding:[0m 205.2s
[1A[0J[36mBuilding:[0m 205.3s
[1A[0J[36mBuilding:[0m 205.4s
[1A[0J[36mBuilding:[0m 205.5s
[1A[0J[36mBuilding:[0m 205.6s
[1A[0J[36mBuilding:[0m 205.7s
[1A[0J[36mBuilding:[0m 205.8s
[1A[0J[36mBuilding:[0m 205.9s
[1A[0J[36mBuilding:[0m 206.0s
[1A[0J[36mBuilding:[0m 206.1s
[1A[0J[36mBuilding:[0m 206.2s
[1A[0J[36mBuilding:[0m 206.3s
[1A[0J[36mBuilding:[0m 206.4s
[1A[0J[36mBuilding:[0m 206.5s
[1A[0J[36mBuilding:[0m 206.6s
[1A[0J[36mBuilding:[0m 206.7s
[1A[0J[36mBuilding:[0m 206.8s
[1A[0J[36mBuilding:[0m 207.0s
[1A[0J[36mB

[1A[0J[36mBuilding:[0m 229.1s
[1A[0J[36mBuilding:[0m 229.2s
[1A[0J[36mBuilding:[0m 229.3s
[1A[0J[36mBuilding:[0m 229.4s
[1A[0J[36mBuilding:[0m 229.5s
[1A[0J[36mBuilding:[0m 229.6s
[1A[0J[36mBuilding:[0m 229.7s
[1A[0J[36mBuilding:[0m 229.8s
[1A[0J[36mBuilding:[0m 229.9s
[1A[0J[36mBuilding:[0m 230.0s
[1A[0J[36mBuilding:[0m 230.1s
[1A[0J[36mBuilding:[0m 230.2s
[1A[0J[36mBuilding:[0m 230.4s
[1A[0J[36mBuilding:[0m 230.5s
[1A[0J[36mBuilding:[0m 230.6s
[1A[0J[36mBuilding:[0m 230.7s
[1A[0J[36mBuilding:[0m 230.8s
[1A[0J[36mBuilding:[0m 230.9s
[1A[0J[36mBuilding:[0m 231.0s
[1A[0J[36mBuilding:[0m 231.1s
[1A[0J[36mBuilding:[0m 231.2s
[1A[0J[36mBuilding:[0m 231.3s
[1A[0J[36mBuilding:[0m 231.4s
[1A[0J[36mBuilding:[0m 231.5s
[1A[0J[36mBuilding:[0m 231.6s
[1A[0J[36mBuilding:[0m 231.7s
[1A[0J[36mBuilding:[0m 231.8s
[1A[0J[36mBuilding:[0m 231.9s
[1A[0J[36mBuilding:[0m 232.0s
[1A[0J[36mB

[1A[0J[36mBuilding:[0m 254.2s
[1A[0J[36mBuilding:[0m 254.3s
[1A[0J[36mBuilding:[0m 254.4s
[1A[0J[36mBuilding:[0m 254.5s
[1A[0J[36mBuilding:[0m 254.6s
[1A[0J[36mBuilding:[0m 254.7s
[1A[0J[36mBuilding:[0m 254.8s
[1A[0J[36mBuilding:[0m 254.9s
[1A[0J[36mBuilding:[0m 255.0s
[1A[0J[36mBuilding:[0m 255.1s
[1A[0J[36mBuilding:[0m 255.2s
[1A[0J[36mBuilding:[0m 255.3s
[1A[0J[36mBuilding:[0m 255.4s
[1A[0J[36mBuilding:[0m 255.5s
[1A[0J[36mBuilding:[0m 255.6s
[1A[0J[36mBuilding:[0m 255.8s
[1A[0J[36mBuilding:[0m 255.9s
[1A[0J[36mBuilding:[0m 256.0s
[1A[0J[36mBuilding:[0m 256.1s
[1A[0J[36mBuilding:[0m 256.2s
[1A[0J[36mBuilding:[0m 256.3s
[1A[0J[36mBuilding:[0m 256.4s
[1A[0J[36mBuilding:[0m 256.5s
[1A[0J[36mBuilding:[0m 256.6s
[1A[0J[36mBuilding:[0m 256.7s
[1A[0J[36mBuilding:[0m 256.8s
[1A[0J[36mBuilding:[0m 256.9s
[1A[0J[36mBuilding:[0m 257.0s
[1A[0J[36mBuilding:[0m 257.1s
[1A[0J[36mB

[1A[0J[36mBuilding:[0m 279.2s
[1A[0J[36mBuilding:[0m 279.3s
[1A[0J[36mBuilding:[0m 279.4s
[1A[0J[36mBuilding:[0m 279.5s
[1A[0J[36mBuilding:[0m 279.6s
[1A[0J[36mBuilding:[0m 279.8s
[1A[0J[36mBuilding:[0m 279.9s
[1A[0J[36mBuilding:[0m 280.0s
[1A[0J[36mBuilding:[0m 280.1s
[1A[0J[36mBuilding:[0m 280.2s
[1A[0J[36mBuilding:[0m 280.3s
[1A[0J[36mBuilding:[0m 280.4s
[1A[0J[36mBuilding:[0m 280.5s
[1A[0J[36mBuilding:[0m 280.6s
[1A[0J[36mBuilding:[0m 280.7s
[1A[0J[36mBuilding:[0m 280.8s
[1A[0J[36mBuilding:[0m 280.9s
[1A[0J[36mBuilding:[0m 281.0s
[1A[0J[36mBuilding:[0m 281.1s
[1A[0J[36mBuilding:[0m 281.2s
[1A[0J[36mBuilding:[0m 281.3s
[1A[0J[36mBuilding:[0m 281.4s
[1A[0J[36mBuilding:[0m 281.5s
[1A[0J[36mBuilding:[0m 281.6s
[1A[0J[36mBuilding:[0m 281.7s
[1A[0J[36mBuilding:[0m 281.8s
[1A[0J[36mBuilding:[0m 282.0s
[1A[0J[36mBuilding:[0m 282.1s
[1A[0J[36mBuilding:[0m 282.2s
[1A[0J[36mB

TimeoutError: 

In [None]:
class HierPcaSymmetricModel(HierarchicalSymmetricModel):
    
    def __init__(self, n_pca=8, beta_prior_std=1.0, intra_group_std=0.1, num_chains=4, num_samples=100):
        self.beta_prior_std = float(beta_prior_std)
        self.intra_group_std = float(intra_group_std)
        self.code = hier_code
        self.scale_ = None
        self.n_pca = n_pca
        self.pca = PCA(n_components=n_pca, whiten=True)
        self.fit = None
        self.num_chains = num_chains
        self.num_samples = num_samples
        
    def fit_predict(self, train_df, test_df, feat_cols):
        scale_ = (train_df[feat_cols]**2).mean(0)
        self.scale_ = scale_
        X_train = train_df[feat_cols] / scale_
        X_test = test_df[feat_cols] / scale_
        
        X_pca_train = self.pca.fit_transform(X_train)
        X_pca_test = self.pca.transform(X_test)

        y_train = train_df["targetWin"]
        y_test = test_df["targetWin"]

        ml_train = logit(train_df["p_fighter_implied"])
        ml_test = logit(test_df["p_fighter_implied"])
        
        is_m_train = train_df["gender"].map({"M":1, "W":0})
        is_m_test = test_df["gender"].map({"M":1, "W":0})
        
        data = {
            "n": train_df.shape[0],
            "n2": test_df.shape[0],
            "d": self.n_pca,
            "y": y_train.astype(int).values,
            "beta_prior_std": self.beta_prior_std,
            "intra_group_std": self.intra_group_std,
            "is_m": is_m_train.values,
            "is_m2": is_m_test.values,
            "X": X_pca_train,
            "ml_logit": ml_train.values,
            "X2": X_pca_test,
            "ml_logit2": ml_test.values,
        }
        fit = self._fit(data)
        return fit["y_pred"].mean(1)

hier_model = HierPcaSymmetricModel(n_pca=16, beta_prior_std=1.0, intra_group_std=0.1, num_samples=200)
y_pred = hier_model.fit_predict(train_gender_df, test_gender_df, feat_cols)
acc = (y_pred.round() == test_df["targetWin"]).mean()
print(f"accuracy: {acc}")
print(f"log loss: {log_loss(y_pred=y_pred, y_true=y_test)}")