In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm 

feat_ml_df = pd.read_pickle("data/deleteme.pkl")
feat_ml_df.head()

Unnamed: 0,FighterID,OpponentID,oldFighterEloAD,oldOpponentEloAD,predTargetAD,targetAD,Date,newFighterEloAD,newOpponentEloAD,oldEloDiffAD,...,FighterOpen,OpponentOpen,FighterCloseLeft,FighterCloseRight,OpponentCloseLeft,OpponentCloseRight,p_fighter,p_opponent,p_fighter_midpoint,p_fighter_implied
0,2335629/bj-penn,2335885/jens-pulver,0.842355,0.364097,0.478257,2.645751,2007-06-23,1.275853,-0.069401,0.478257,...,-490.0,340.0,-357.0,-330.0,225.0,280.0,0.830508,0.227273,0.801618,0.785142
1,2335676/joe-lauzon,2354360/brandon-melendez,0.091024,0.0,0.091024,2.0,2007-06-23,0.472819,-0.381795,0.091024,...,-430.0,330.0,-700.0,-475.0,355.0,425.0,0.811321,0.232558,0.789381,0.777217
2,2335447/anderson-silva,2335475/nate-marquardt,0.227196,0.736307,-0.509111,-1.0,2007-07-07,0.129018,0.834485,-0.509111,...,-170.0,150.0,-145.0,-145.0,125.0,125.0,0.62963,0.4,0.614815,0.611511
3,2335302/heath-herring,2335521/antonio-rodrigo-nogueira,0.10641,0.0,0.10641,-2.645751,2007-07-07,-0.444022,0.550432,0.10641,...,470.0,-810.0,500.0,500.0,-700.0,-700.0,0.175439,0.89011,0.142664,0.164646
4,2335694/frankie-edgar,2335717/mark-bocek,0.0,0.0,0.0,1.414214,2007-07-07,0.282843,-0.282843,0.0,...,-260.0,180.0,-230.0,-230.0,190.0,190.0,0.722222,0.357143,0.68254,0.669118


In [17]:
import nest_asyncio
nest_asyncio.apply()

In [18]:
# okay uhh
import stan

code = """

data {
    int<lower=0> n;                     // number of data points in training data
    int<lower=0> n2;                    // number of data points in test data
    int<lower=1> d;                     // explanatory variable dimension
    int<lower=0,upper=1> y[n];          // response variable
    real<lower=0> beta_prior_std;       // prior scale on beta

    matrix[n, d] X;                     // explanatory variable
    vector[n] ml_logit;                   // logit of the opening money line

    matrix[n2, d] X2;                   // test data
    vector[n2] ml_logit2;                 // test data

}

parameters {
    vector[d] beta;
}

transformed parameters {
    vector[n] eta;
    vector[n2] eta2;
    eta = ml_logit + (X * beta);      // linear predictor
    eta2 = ml_logit2 + (X2 * beta);   // linear predictor for test data
}

model {
    for(i in 1:d){
        beta[i] ~ normal(0, beta_prior_std);
        //beta[i] ~ cauchy(0, beta_prior_std); //prior for slopes following gelman 2008
    }

    // observation model
    y ~ bernoulli_logit(eta);
}

generated quantities {
    vector[n2] y_pred;
    y_pred = inv_logit(eta2);  // y values predicted for test data
}
"""

In [56]:
feat_cols = [
    "oldEloDiffordinal_fighter_result", 
    "oldEloDiffsubmission_fighter_result",
    "oldEloDifftko_ko_fighter_result",
    "oldEloDiffdecision_fighter_result",
#     "oldEloDifffinish_fighter_result",
#     "oldEloDifffighter_result_time_left",
    
#     'oldEloDiffAD', 
    'oldEloDiffADTB', 
    'oldEloDiffADTM', 
    'oldEloDiffKD',
    'oldEloDiffRV', 
    'oldEloDiffSCBL', 
    'oldEloDiffSCHL', 
    'oldEloDiffSGBL',
    'oldEloDiffSGHL', 
    'oldEloDiffSM', 'oldEloDiffSSL', 'oldEloDiffTDL',
    'oldEloDiffTDS', 
    'oldEloDiffTD_fails', 
    'oldEloDiffTSL',
    'oldEloDiffclinch_strikes_landed', 
    'oldEloDiffdistance_strikes_landed',
    'oldEloDiffground_strikes_landed', 
    'oldEloDiffstanding_strikes',
    
    'oldEloDiffWin',
    
    "t_since_last_fight_log_diff", 
#     "t_since_last_fight_diff",
    "total_fights_sqrt_diff", 
    "total_ufc_fights_diff",
    
    "age_diff", "reach_diff", 
#     "weight_diff", 
    "log_weight_diff",
    "height_diff",
]

# max_train_dt = pd.to_datetime("2021-07-01")

# train_df = feat_ml_df.loc[feat_ml_df["Date"] <= max_train_dt].dropna(subset=[*feat_cols, "targetWin"])
# test_df = feat_ml_df.loc[feat_ml_df["Date"] > max_train_dt].dropna(subset=[*feat_cols, "targetWin"])

In [178]:
from sklearn.decomposition import PCA

def logit(x):
    return np.log(x) - np.log(1-x)

class PcaSymmetricModel(object):
    
    def __init__(self, feat_cols, beta_prior_std=0.1, n_pca=8, num_chains=4, num_samples=1000):
        self.feat_cols = feat_cols
        self.beta_prior_std = float(beta_prior_std)
        self.n_pca = n_pca
        self.code = code
        self.scale_ = None
        self.pca = PCA(n_components=n_pca, whiten=True)
        self.fit = None
        self.num_chains = num_chains
        self.num_samples = num_samples
        
    def _fit(self, data):
        posterior = stan.build(self.code, data=data, random_seed=1)
        fit = posterior.sample(num_chains=self.num_chains, num_samples=self.num_samples)
        self.fit = fit
        return fit
        
    def fit_predict(self, train_df, test_df, feat_cols=None):
        if not feat_cols:
            feat_cols = self.feat_cols
        scale_ = (train_df[feat_cols]**2).mean(0)
        self.scale_ = scale_
        X_train = train_df[feat_cols] / scale_
        X_test = test_df[feat_cols] / scale_
        
        # pca happens here
        X_pca_train = self.pca.fit_transform(X_train)
        X_pca_test = self.pca.transform(X_test)

        y_train = train_df["targetWin"]
        y_test = test_df["targetWin"]

        ml_train = logit(train_df["p_fighter_implied"])
        ml_test = logit(test_df["p_fighter_implied"])
        
        data = {
            "n": train_df.shape[0],
            "n2": test_df.shape[0],
            "d": X_pca_train.shape[1],
            "y": y_train.astype(int).values,
            "beta_prior_std": self.beta_prior_std,
            "X": X_pca_train,
            "ml_logit": ml_train.values,
            "X2": X_pca_test,
            "ml_logit2": ml_test.values,
        }

        fit = self._fit(data)
        return fit["y_pred"].mean(1)

pca_model = PcaSymmetricModel(feat_cols=feat_cols, n_pca=8, beta_prior_std=2.0, num_samples=1000)

In [179]:
from sklearn.metrics import log_loss, accuracy_score

class TimeSeriesCrossVal(object):
    # Just regular time series cross validation,
    # but ensures dates don't end up in multiple folds

    def __init__(self, n_splits=4):
        self.n_splits = n_splits
        self.fold_pred_df = None

    def get_folds(self, df):
        df = df.sort_values("Date")
        dates = sorted(df["Date"].unique())
        n_dates_per_fold = len(dates) // (self.n_splits + 1)
        for i in range(self.n_splits + 1):
            start = i * n_dates_per_fold
            stop = min(start + n_dates_per_fold, len(dates)-1)
            min_date = dates[start]
            max_date = dates[stop]
            inds = (df["Date"] >= min_date) & (df["Date"] < max_date)
            yield df.loc[inds]
            
    def get_cross_val_preds(self, model, df):
        train_df = pd.DataFrame()
        fold_pred_df_list = []        
        for i, test_df in enumerate(self.get_folds(df)):
            if len(train_df) > 0:
                print("training on date range:", 
                      train_df["Date"].dt.date.min(), 
                      train_df["Date"].dt.date.max())
                y_pred = model.fit_predict(train_df, test_df)
                y_pred_df = test_df.assign(
                    y_pred=y_pred,
                    test_fold=i,
                )
                fold_pred_df_list.append(y_pred_df)
            train_df = pd.concat([train_df, test_df])
        self.fold_pred_df = pd.concat(fold_pred_df_list)
        return self.fold_pred_df
    
    def score_preds(self, score_fn_dict=None):
        if score_fn_dict is None:
            score_fn_dict = dict()
        score_fn_dict.update({
            "log_loss": lambda fold_df: log_loss(y_pred=fold_df["y_pred"], 
                                                 y_true=fold_df["targetWin"]),
            "accuracy_score": lambda fold_df: accuracy_score(y_pred=fold_df["y_pred"].round(), 
                                                             y_true=fold_df["targetWin"]),
            "ml_log_loss": lambda fold_df: log_loss(y_pred=fold_df["p_fighter_implied"], 
                                                    y_true=fold_df["targetWin"]),
        })
        metrics_df_list = []
        for i, fold_df in self.fold_pred_df.groupby("test_fold"):
            if len(train_df) > 0:
                curr_metrics = {
                    "test_fold": i,
                    "min_test_date": fold_df["Date"].min(),
                    "max_test_date": fold_df["Date"].max(),
                    "n_test_days": fold_df["Date"].nunique(),
                    "n_test_fights": len(fold_df),
                }
                for score_nm, score_fn in score_fn_dict.items():
                    curr_metrics[score_nm] = score_fn(fold_df)
                metrics_df_list.append(curr_metrics)
        self.metrics_df = pd.DataFrame(metrics_df_list)
        return self.metrics_df
    
temp_df = feat_ml_df.dropna(subset=[*feat_cols, "targetWin"])
tscv = TimeSeriesCrossVal()
fold_pred_df = tscv.get_cross_val_preds(pca_model, temp_df)
fold_pred_df.head()

training on date range: 2007-06-23 2012-04-14
Building...



Building: found in cache, done.Messages from stanc:
    of arrays by placing brackets after a variable name is deprecated and
    will be removed in Stan 2.32.0. Instead use the array keyword before the
    type. This can be changed automatically using the auto-format flag to
    stanc
Sampling:   0%
Sampling:  25% (2000/8000)
Sampling:  50% (4000/8000)
Sampling:  75% (6000/8000)
Sampling: 100% (8000/8000)
Sampling: 100% (8000/8000), done.
Messages received during sampling:
  Gradient evaluation took 5.5e-05 seconds
  1000 transitions using 10 leapfrog steps per transition would take 0.55 seconds.
  Adjust your expectations accordingly!
  Gradient evaluation took 4.6e-05 seconds
  1000 transitions using 10 leapfrog steps per transition would take 0.46 seconds.
  Adjust your expectations accordingly!
  Gradient evaluation took 4.9e-05 seconds
  1000 transitions using 10 leapfrog steps per transition would take 0.49 seconds.
  Adjust your expectations accordingly!
  Gradient evaluation 

training on date range: 2007-06-23 2015-04-04
Building...



Building: found in cache, done.Messages from stanc:
    of arrays by placing brackets after a variable name is deprecated and
    will be removed in Stan 2.32.0. Instead use the array keyword before the
    type. This can be changed automatically using the auto-format flag to
    stanc
Sampling:   0%
Sampling:  11% (900/8000)
Sampling:  36% (2900/8000)
Sampling:  61% (4900/8000)
Sampling:  86% (6900/8000)
Sampling: 100% (8000/8000)
Sampling: 100% (8000/8000), done.
Messages received during sampling:
  Gradient evaluation took 7.4e-05 seconds
  1000 transitions using 10 leapfrog steps per transition would take 0.74 seconds.
  Adjust your expectations accordingly!
  Gradient evaluation took 8.6e-05 seconds
  1000 transitions using 10 leapfrog steps per transition would take 0.86 seconds.
  Adjust your expectations accordingly!
  Gradient evaluation took 8.9e-05 seconds
  1000 transitions using 10 leapfrog steps per transition would take 0.89 seconds.
  Adjust your expectations according

training on date range: 2007-06-23 2017-11-18
Building...



Building: found in cache, done.Messages from stanc:
    of arrays by placing brackets after a variable name is deprecated and
    will be removed in Stan 2.32.0. Instead use the array keyword before the
    type. This can be changed automatically using the auto-format flag to
    stanc
Sampling:   0%
Sampling:   8% (600/8000)
Sampling:  15% (1200/8000)
Sampling:  22% (1800/8000)
Sampling:  48% (3800/8000)
Sampling:  65% (5200/8000)
Sampling:  82% (6600/8000)
Sampling: 100% (8000/8000)
Sampling: 100% (8000/8000), done.
Messages received during sampling:
  Gradient evaluation took 0.000135 seconds
  1000 transitions using 10 leapfrog steps per transition would take 1.35 seconds.
  Adjust your expectations accordingly!
  Gradient evaluation took 8.3e-05 seconds
  1000 transitions using 10 leapfrog steps per transition would take 0.83 seconds.
  Adjust your expectations accordingly!
  Gradient evaluation took 0.000163 seconds
  1000 transitions using 10 leapfrog steps per transition would

training on date range: 2007-06-23 2020-02-15
Building...



Building: found in cache, done.Messages from stanc:
    of arrays by placing brackets after a variable name is deprecated and
    will be removed in Stan 2.32.0. Instead use the array keyword before the
    type. This can be changed automatically using the auto-format flag to
    stanc
Sampling:   0%
Sampling:   5% (400/8000)
Sampling:  10% (800/8000)
Sampling:  15% (1200/8000)
Sampling:  20% (1600/8000)
Sampling:  40% (3200/8000)
Sampling:  60% (4800/8000)
Sampling:  80% (6400/8000)
Sampling: 100% (8000/8000)
Sampling: 100% (8000/8000), done.
Messages received during sampling:
  Gradient evaluation took 0.000721 seconds
  1000 transitions using 10 leapfrog steps per transition would take 7.21 seconds.
  Adjust your expectations accordingly!
  Gradient evaluation took 0.000114 seconds
  1000 transitions using 10 leapfrog steps per transition would take 1.14 seconds.
  Adjust your expectations accordingly!
  Gradient evaluation took 0.000112 seconds
  1000 transitions using 10 leapfrog

Unnamed: 0,FighterID,OpponentID,oldFighterEloAD,oldOpponentEloAD,predTargetAD,targetAD,Date,newFighterEloAD,newOpponentEloAD,oldEloDiffAD,...,FighterCloseLeft,FighterCloseRight,OpponentCloseLeft,OpponentCloseRight,p_fighter,p_opponent,p_fighter_midpoint,p_fighter_implied,y_pred,test_fold
533,2473720/ben-rothwell,2500160/brendan-schaub,0.095559,0.234393,-0.138835,0.0,2012-04-21,0.123326,0.206627,-0.138835,...,195.0,195.0,-215.0,-215.0,0.285714,0.777778,0.253968,0.268657,0.201846,1
534,2500461/keith-wisniewski,2500664/chris-clements,-0.243861,0.0,-0.243861,2.236068,2012-04-21,0.252125,-0.495986,-0.243861,...,135.0,135.0,-145.0,-145.0,0.357143,0.722222,0.31746,0.330882,0.288975,1
535,2501722/mark-hominick,2502027/eddie-yagin,0.102589,0.0,0.102589,0.0,2012-04-21,0.082071,0.020518,0.102589,...,-480.0,-480.0,423.0,423.0,0.777778,0.285714,0.746032,0.731343,0.748479,1
536,2504648/travis-browne,2505262/chad-griggs,0.502405,0.0,0.502405,1.414214,2012-04-21,0.684766,-0.182362,0.502405,...,-400.0,-275.0,215.0,270.0,0.607843,0.444444,0.581699,0.57764,0.608794,1
537,2335666/matt-brown,2615077/stephen-thompson,-0.218244,0.0,-0.218244,2.236068,2012-04-21,0.272618,-0.490862,-0.218244,...,200.0,200.0,-220.0,-220.0,0.333333,0.736842,0.298246,0.311475,0.326831,1


In [180]:
tscv.score_preds({
    "log_loss_diff": lambda fold_df: (
        log_loss(y_pred=fold_df["y_pred"], y_true=fold_df["targetWin"]) - 
        log_loss(y_pred=fold_df["p_fighter_implied"], y_true=fold_df["targetWin"])
    )
})

Unnamed: 0,test_fold,min_test_date,max_test_date,n_test_days,n_test_fights,log_loss_diff,log_loss,accuracy_score,ml_log_loss
0,1,2012-04-21,2015-04-04,108,478,0.013052,0.626233,0.633891,0.613181
1,2,2015-04-11,2017-11-18,108,557,-0.002821,0.658351,0.59605,0.661171
2,3,2017-11-25,2020-02-15,108,750,-0.002269,0.6398,0.626667,0.642069
3,4,2020-02-22,2022-03-19,107,891,-0.004251,0.624712,0.638608,0.628963


In [127]:
print(pca_model.fit)

<stan.Fit>
Parameters:
    beta: (8,)
    eta: (2318,)
    eta2: (891,)
    y_pred: (891,)
Draws: 4000


In [None]:
# I should probably try checking mcmc diagnostics one of these days

# Hey, how does the hierarchical model compare now?

In [83]:
gender_df = pd.read_csv("data/fighter_genders.csv")
feat_ml_df2 = feat_ml_df.merge(gender_df, on="FighterID", how="left")
feat_ml_df2["gender"].isnull().any()

False

In [173]:
# okay uhh
import stan

hier_code = """

data {
    int<lower=0> n;                     // number of data points in training data
    int<lower=0> n2;                    // number of data points in test data
    int<lower=1> d;                     // explanatory variable dimension
    int<lower=0,upper=1> y[n];          // response variable
    real<lower=0> beta_prior_std;       // prior scale on beta mean across groups
    real<lower=0> intra_group_std;      // prior scale on beta, std dev of group's beta around mean
    
    vector[n] is_m;      // 0 if woman, 1 if man
    vector[n2] is_m2;    // 0 if woman, 1 if man
    
    matrix[n, d] X;                     // explanatory variable
    vector[n] ml_logit;                   // logit of the opening money line

    matrix[n2, d] X2;                   // test data
    vector[n2] ml_logit2;                 // test data

}

parameters {
    vector[d] beta_m;
    vector[d] beta_w;
}

transformed parameters {
    vector[n] eta;
    vector[n2] eta2;
    eta = (
        ml_logit + 
        ((X * beta_m) .* is_m) + 
        ((X * beta_w) .* (1 - is_m))
    );      // linear predictor
    eta2 = (
        ml_logit2 + 
        ((X2 * beta_m) .* is_m2) + 
        ((X2 * beta_w) .* (1 - is_m2))
    );   // linear predictor for test data
}

model {
    beta_m ~ normal(0, beta_prior_std);
    beta_w ~ normal(beta_m, intra_group_std); // damn i hope this works

    y ~ bernoulli_logit(eta);
}

generated quantities {
    vector[n2] y_pred;
    
    y_pred = inv_logit(eta2);  // y values predicted for test data
}
"""

class HierPcaSymmetricModel(HierarchicalSymmetricModel):
    
    def __init__(self, feat_cols, n_pca=8, beta_prior_std=1.0, intra_group_std=0.1, 
                 num_chains=4, num_samples=100):
        self.feat_cols = feat_cols
        self.beta_prior_std = float(beta_prior_std)
        self.intra_group_std = float(intra_group_std)
        self.code = hier_code
        self.scale_ = None
        self.n_pca = n_pca
        self.pca = PCA(n_components=n_pca, whiten=True)
        self.fit = None
        self.num_chains = num_chains
        self.num_samples = num_samples
        
    def fit_predict(self, train_df, test_df, feat_cols=None):
        if not feat_cols:
            feat_cols = self.feat_cols
        scale_ = (train_df[feat_cols]**2).mean(0)
        self.scale_ = scale_
        X_train = train_df[feat_cols] / scale_
        X_test = test_df[feat_cols] / scale_
        
        X_pca_train = self.pca.fit_transform(X_train)
        X_pca_test = self.pca.transform(X_test)

        y_train = train_df["targetWin"]
        y_test = test_df["targetWin"]

        ml_train = logit(train_df["p_fighter_implied"])
        ml_test = logit(test_df["p_fighter_implied"])
        
        is_m_train = train_df["gender"].map({"M":1, "W":0})
        is_m_test = test_df["gender"].map({"M":1, "W":0})
        
        data = {
            "n": train_df.shape[0],
            "n2": test_df.shape[0],
            "d": self.n_pca,
            "y": y_train.astype(int).values,
            "beta_prior_std": self.beta_prior_std,
            "intra_group_std": self.intra_group_std,
            "is_m": is_m_train.values,
            "is_m2": is_m_test.values,
            "X": X_pca_train,
            "ml_logit": ml_train.values,
            "X2": X_pca_test,
            "ml_logit2": ml_test.values,
        }
        fit = self._fit(data)
        return fit["y_pred"].mean(1)

hier_model = HierPcaSymmetricModel(feat_cols, n_pca=8, beta_prior_std=2.0, intra_group_std=0.5, num_samples=1000)

In [174]:
temp_df = feat_ml_df2.dropna(subset=[*feat_cols, "targetWin"])
gender_tscv = TimeSeriesCrossVal()
fold_pred_df = gender_tscv.get_cross_val_preds(hier_model, temp_df)

training on date range: 2007-06-23 2012-04-14
Building...



Building: found in cache, done.Messages from stanc:
    of arrays by placing brackets after a variable name is deprecated and
    will be removed in Stan 2.32.0. Instead use the array keyword before the
    type. This can be changed automatically using the auto-format flag to
    stanc
Sampling:   0%
Sampling:   1% (100/8000)
Sampling:   2% (200/8000)
Sampling:   4% (300/8000)
Sampling:   6% (500/8000)
Sampling:  15% (1200/8000)
Sampling:  24% (1900/8000)
Sampling:  48% (3800/8000)
Sampling:  70% (5600/8000)
Sampling:  85% (6800/8000)
Sampling: 100% (8000/8000)
Sampling: 100% (8000/8000), done.
Messages received during sampling:
  Gradient evaluation took 0.001545 seconds
  1000 transitions using 10 leapfrog steps per transition would take 15.45 seconds.
  Adjust your expectations accordingly!
  Gradient evaluation took 0.002869 seconds
  1000 transitions using 10 leapfrog steps per transition would take 28.69 seconds.
  Adjust your expectations accordingly!
  Gradient evaluation took

training on date range: 2007-06-23 2015-04-04
Building...



Building: found in cache, done.Messages from stanc:
    of arrays by placing brackets after a variable name is deprecated and
    will be removed in Stan 2.32.0. Instead use the array keyword before the
    type. This can be changed automatically using the auto-format flag to
    stanc
Sampling:   0%
Sampling:   1% (100/8000)
Sampling:   2% (200/8000)
Sampling:   4% (300/8000)
Sampling:   5% (400/8000)
Sampling:  11% (900/8000)
Sampling:  18% (1400/8000)
Sampling:  24% (1900/8000)
Sampling:  29% (2300/8000)
Sampling:  46% (3700/8000)
Sampling:  64% (5100/8000)
Sampling:  81% (6500/8000)
Sampling: 100% (8000/8000)
Sampling: 100% (8000/8000), done.
Messages received during sampling:
  Gradient evaluation took 0.000347 seconds
  1000 transitions using 10 leapfrog steps per transition would take 3.47 seconds.
  Adjust your expectations accordingly!
  Gradient evaluation took 0.00207 seconds
  1000 transitions using 10 leapfrog steps per transition would take 20.7 seconds.
  Adjust your ex

training on date range: 2007-06-23 2017-11-18
Building...



Building: found in cache, done.Messages from stanc:
    of arrays by placing brackets after a variable name is deprecated and
    will be removed in Stan 2.32.0. Instead use the array keyword before the
    type. This can be changed automatically using the auto-format flag to
    stanc
Sampling:   0%
Sampling:   0% (1/8000)
Sampling:   0% (2/8000)
Sampling:   0% (3/8000)
Sampling:   0% (4/8000)
Sampling:   3% (203/8000)
Sampling:   5% (402/8000)
Sampling:   9% (701/8000)
Sampling:  11% (900/8000)
Sampling:  15% (1200/8000)
Sampling:  19% (1500/8000)
Sampling:  21% (1700/8000)
Sampling:  24% (1900/8000)
Sampling:  26% (2100/8000)
Sampling:  29% (2300/8000)
Sampling:  31% (2500/8000)
Sampling:  35% (2800/8000)
Sampling:  51% (4100/8000)
Sampling:  68% (5400/8000)
Sampling:  84% (6700/8000)
Sampling: 100% (8000/8000)
Sampling: 100% (8000/8000), done.
Messages received during sampling:
  Gradient evaluation took 0.000804 seconds
  1000 transitions using 10 leapfrog steps per transition wo

training on date range: 2007-06-23 2020-02-15
Building...



Building: found in cache, done.Messages from stanc:
    of arrays by placing brackets after a variable name is deprecated and
    will be removed in Stan 2.32.0. Instead use the array keyword before the
    type. This can be changed automatically using the auto-format flag to
    stanc
Sampling:   0%
Sampling:   0% (1/8000)
Sampling:   0% (2/8000)
Sampling:   0% (3/8000)
Sampling:   0% (4/8000)
Sampling:   3% (203/8000)
Sampling:   5% (402/8000)
Sampling:   8% (601/8000)
Sampling:  10% (800/8000)
Sampling:  12% (1000/8000)
Sampling:  15% (1200/8000)
Sampling:  18% (1400/8000)
Sampling:  20% (1600/8000)
Sampling:  22% (1800/8000)
Sampling:  25% (2000/8000)
Sampling:  28% (2200/8000)
Sampling:  30% (2400/8000)
Sampling:  32% (2600/8000)
Sampling:  35% (2800/8000)
Sampling:  38% (3000/8000)
Sampling:  40% (3200/8000)
Sampling:  55% (4400/8000)
Sampling:  70% (5600/8000)
Sampling:  85% (6800/8000)
Sampling: 100% (8000/8000)
Sampling: 100% (8000/8000), done.
Messages received during sampli

In [176]:
gender_tscv.score_preds({
    "log_loss_diff": lambda fold_df: (
        log_loss(y_pred=fold_df["y_pred"], y_true=fold_df["targetWin"]) - 
        log_loss(y_pred=fold_df["p_fighter_implied"], y_true=fold_df["targetWin"])
    )
})

Unnamed: 0,test_fold,min_test_date,max_test_date,n_test_days,n_test_fights,log_loss_diff,log_loss,accuracy_score,ml_log_loss
0,1,2012-04-21,2015-04-04,108,478,0.009552,0.622733,0.640167,0.613181
1,2,2015-04-11,2017-11-18,108,557,0.010263,0.671434,0.572711,0.661171
2,3,2017-11-25,2020-02-15,108,750,-0.000481,0.641589,0.633333,0.642069
3,4,2020-02-22,2022-03-19,107,891,-0.003468,0.625495,0.635241,0.628963


In [177]:
# okay nice, so gender_tscv has a slightly better log loss
(tscv.metrics_df["log_loss"] - gender_tscv.metrics_df["log_loss"]).mean()

-0.003032315369562971

In [138]:
# is the numer is bigger than denom? 
(tscv.metrics_df["log_loss"] / gender_tscv.metrics_df["log_loss"]).mean()

0.9955830371794216

In [139]:
(hier_model.fit["beta_m"] - hier_model.fit["beta_w"]).mean(1).std()

0.10460956407766873

In [145]:
y_pred = 1/(1 + np.exp(-hier_model.fit["eta2"])).mean(1)
y_pred.shape

(891,)

In [149]:
y_true = gender_tscv.fold_pred_df.query("test_fold == 4")["targetWin"]
log_loss(y_pred=y_pred, y_true=y_true)
# okay, we got an improvement this way. maybe this is good...

0.6248980309288936

In [170]:
def naive_returns(fold_df):
    # payout in addition to wager
    f_payout = (1/fold_df["p_fighter"])
    o_payout = (1/fold_df["p_opponent"])
    # expected return is positive
    f_bet = fold_df["y_pred"] > fold_df["p_fighter"]
    o_bet = (1 - fold_df["y_pred"]) > fold_df["p_opponent"]
    f_won = test_df["targetWin"] == 1
    o_won = test_df["targetWin"] == 0
    
    f_gains = (f_bet * f_won * (f_payout - 1)) # gains over the initial wager
    o_gains = (o_bet * o_won * (o_payout - 1)) # gains over the initial wager
    f_losses = (-1 * f_bet * o_won)
    o_losses = (-1 * o_bet * f_won)
    return f_gains.sum() + o_gains.sum() + f_losses.sum() + o_losses.sum()

def eval_kelly(fold_df):
    y_pred = fold_df["y_pred"]
    # b is % of wager gained on a win (not counting original wager)
    b_fighter = (1/fold_df["p_fighter"]) - 1
    b_opponent = (1/fold_df["p_opponent"]) - 1
    
    kelly_bet_fighter = y_pred + ((y_pred - 1) / b_fighter)
    kelly_bet_opponent = (1 - y_pred) + ((1 - y_pred - 1) / b_opponent)
    kelly_bet_fighter = np.maximum(0, kelly_bet_fighter)
    kelly_bet_opponent = np.maximum(0, kelly_bet_opponent)
    
    f_won = fold_df["targetWin"] == 1
    o_won = fold_df["targetWin"] == 0
    
    fighter_return = (kelly_bet_fighter * b_fighter * f_won) - (kelly_bet_fighter * o_won)
    opponent_return = (kelly_bet_opponent * b_opponent * o_won) - (kelly_bet_opponent * f_won)
    
    total_returns = 1 + fighter_return + opponent_return
    return np.prod(total_returns)    
    
gender_tscv.score_preds({
    "naive_returns": naive_returns,
    "kelly_growth": eval_kelly,
})

Unnamed: 0,test_fold,min_test_date,max_test_date,n_test_days,n_test_fights,naive_returns,kelly_growth,log_loss,accuracy_score,ml_log_loss
0,1,2012-04-21,2015-04-04,108,478,0.0,0.010767,0.62277,0.642259,0.613181
1,2,2015-04-11,2017-11-18,108,557,0.0,0.005714,0.671395,0.576302,0.661171
2,3,2017-11-25,2020-02-15,108,750,0.0,0.352076,0.641545,0.633333,0.642069
3,4,2020-02-22,2022-03-19,107,891,12.615835,0.780628,0.625406,0.634119,0.628963
