In [5]:
#!pip install keras_tuner
#!pip install --upgrade tensorflow-lattice
#!pip install tensorflow==2.8.0
#!pip install lightgbm
#!pip install xgboost
#!pip show tensorflow
!python --version

Python 3.8.12


In [6]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import make_scorer, brier_score_loss, log_loss, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import lightgbm as lgb
from lightgbm import early_stopping
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

import tensorflow as tf
import tensorflow_lattice as tfl
import keras_tuner as kt
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

import xgboost as xgb

In [7]:
print(tf.__version__)

2.8.0


In [2]:
###############################################################################
# 1. Data Loading and Preprocessing
###############################################################################

def load_model_data(dataset_path):
    """
    Load CSV containing:
      Season, LowerTeamID, HigherTeamID, Target
      plus columns for your matchup features (e.g. ..._diff, ..._absdiff, etc.)
    We'll drop [Season, LowerTeamID, HigherTeamID, Target, GameID, ID] from features.
    """
    df = pd.read_csv(dataset_path)
    feature_cols = [
        c for c in df.columns
        if c not in ['Season','LowerTeamID','HigherTeamID','Target','GameID','ID']
    ]
    X = df[feature_cols].copy()
    y = df['Target'].copy()
    return X, y, df

In [3]:
###############################################################################
# 2. Metrics & Helpers
###############################################################################

def brier_score(y_true, y_prob):
    """ Brier score = mean((y_true - y_prob)^2). Lower is better. """
    return brier_score_loss(y_true, y_prob)

def log_loss_metric(y_true, y_prob):
    """ scikit-learn log_loss. """
    return log_loss(y_true, y_prob)

def accuracy_metric(y_true, y_pred_binary):
    """ Standard accuracy comparing y_true vs binary predictions. """
    return accuracy_score(y_true, y_pred_binary)

In [54]:
###############################################################################
# 3. XGB (Optimizing for Brier Score)
###############################################################################

def brier_scorer(estimator, X, y):
    """
    Custom scikit-learn scorer for Brier:
    we return -brier_score so GridSearchCV will 'maximize' it.
    """
    prob = estimator.predict_proba(X)[:, 1]
    return -brier_score_loss(y, prob)

def train_xgb_model(X, y):
    """
    Uses GridSearchCV to pick the best LightGBM hyperparams by Brier score.
    Example param grid with monotonic constraints. 
    """
    # We'll do an 80/20 split for training vs. validation
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Suppose we assume features that end with "_diff" are monotonic +1
    # If that doesn't match your data, adjust accordingly or set them all 0.
    monotonic_constraints = []
    for col in X_train.columns:
        if (col == 'window_TO_avg_diff') or (col == 'window_PF_avg_diff'):
            monotonic_constraints.append(-1)
        elif col == 'window_clutch_count_diff':
            monotonic_constraints.append(0)
        elif col.endswith("_diff"):
            monotonic_constraints.append(1)
        else:
            monotonic_constraints.append(0)

    mono_str = "(" + ",".join(str(x) for x in monotonic_constraints) + ")"

    #param_grid = {
    #    'n_estimators': [200, 500],
    #    'learning_rate': [0.01, 0.05],
    #    'num_leaves': [50, 100],
    #    'max_depth': [10,50],
    #    'min_child_samples': [10, 20],
    #    #'monotone_constraints': [monotonic_constraints]
    #}

    param_grid = {
        'n_estimators': [100, 200, 300, 500],
        'learning_rate': [0.01, 0.1],
        'max_depth': [3, 5, 10],
        'min_child_weight': [1,5, 6],
        #'monotone_constraints': [mono_str]
    }

    scorer = make_scorer(brier_scorer, greater_is_better=True)
    model = xgb.XGBClassifier(objective='binary:logistic', eval_metric='logloss', random_state=42, tree_method = 'hist')
    #lgb_model = lgb.LGBMClassifier(objective='binary', random_state=42)
    grid = GridSearchCV(
        estimator=model,
        param_grid=param_grid,
        scoring='neg_log_loss',
        cv=3,
        n_jobs=-1,
        verbose=1,
        error_score='raise'
    )

    grid.fit(X_train, y_train,
             #val_set=[(X_val, y_val)],
             #early_stopping_rounds=20,
             verbose=True
            )

    best_model = grid.best_estimator_

    # Evaluate on validation
    val_probs = best_model.predict_proba(X_val)[:,1]
    val_preds = (val_probs > 0.5).astype(int)
    val_brier = brier_score(y_val, val_probs)
    val_logloss = log_loss_metric(y_val, val_probs)
    val_acc = accuracy_metric(y_val, val_preds)

    print("Best XGB hyperparams:", grid.best_params_)
    print(f"XGB val Brier: {val_brier:.4f}, LogLoss: {val_logloss:.4f}, Accuracy: {val_acc:.4f}")

    return best_model

In [5]:
###############################################################################
# 4. Logistic Regression (scikit-learn) - Brier Score
###############################################################################

def train_logistic_regression(X, y):
    """
    scikit-learn LogisticRegression, small param grid, picking best by Brier score.
    """
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    pipe = Pipeline([
        ('scaler' , StandardScaler() ),
        ('logreg' , LogisticRegression(random_state=42))
    ])

    param_grid = {
        'logreg__C': [0.01, 0.1, 1, 10],
        'logreg__solver': ['lbfgs', 'liblinear'],
        'logreg__max_iter': [100, 300, 500, 1000]
    }
    #scorer = make_scorer(brier_scorer, greater_is_better=True)
    #model = LogisticRegression(random_state=42)

    grid = GridSearchCV(
        estimator=pipe,
        param_grid=param_grid,
        scoring='neg_log_loss',
        cv=3,
        n_jobs=-1,
        verbose=1
    )
    grid.fit(X_train, y_train)

    best_model = grid.best_estimator_
    val_probs = best_model.predict_proba(X_val)[:, 1]
    val_preds = (val_probs > 0.5).astype(int)
    val_brier = brier_score(y_val, val_probs)
    val_ll = log_loss_metric(y_val, val_probs)
    val_acc = accuracy_metric(y_val, val_preds)

    print("Best LogisticRegression hyperparams:", grid.best_params_)
    print(f"LogReg val Brier: {val_brier:.4f}, LogLoss: {val_ll:.4f}, Accuracy: {val_acc:.4f}")
    return best_model

In [6]:
###############################################################################
# 5. TensorFlow Lattice (Older PWLCalibration API, custom monotonicities)
###############################################################################

# We'll define a custom Brier metric in TF
def brier_score_tf(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    return tf.reduce_mean(tf.square(y_true - y_pred))

def build_tf_lattice_custom_monotonic_model_legacy_lattice_sizes(hp, feature_names, X_train):
    """
    Older TF Lattice code requiring 'lattice_sizes' in Lattice(...).
    We'll do:
      - For each feature, a PWLCalibration with 'input_keypoints' array (no num_keypoints param).
      - Then a Lattice layer with lattice_sizes=[2,2,...] (or [3,3,...]) plus monotonicities.
    """

    # Suppose we define some custom monotonic map:
    custom_monotonic_map = {
        'window_TO_avg_diff': 'decreasing',
        'window_PF_avg_diff': 'decreasing',
        'window_clutch_count_diff': 'none'
    }

    # We'll pretend we have a tuner param for how many keypoints to use in PWLCalibration
    num_keypoints = hp.Int('num_keypoints', min_value=5, max_value=15, step=5, default=10)
    lr = hp.Float('learning_rate', 1e-4, 1e-2, sampling='log', default=1e-3)

    inputs = {}
    calibrators = []
    for feat in feature_names:
        inputs[feat] = tf.keras.Input(shape=(1,), name=feat)

        # monotonic direction for PWL
        if feat in custom_monotonic_map:
            this_monotonic = custom_monotonic_map[feat]
        else:
            this_monotonic = 'increasing'

        # build input_keypoints array
        f_min = float(X_train[feat].min())
        f_max = float(X_train[feat].max())
        keypoints = np.linspace(f_min, f_max, num_keypoints)

        # PWLCalibration older signature
        c = tfl.layers.PWLCalibration(
            input_keypoints=keypoints,
            units=1,
            output_min=0.0,
            output_max=1.0,
            clamp_min=False,
            clamp_max=False,
            monotonicity=this_monotonic
        )(inputs[feat])
        calibrators.append(c)

    # Concatenate calibrator outputs
    concat_calibrators = tf.keras.layers.Concatenate()(calibrators)

    # Next, older Lattice requires 'lattice_sizes'
    # e.g. if you have len(feature_names)=10, you might do [2]*10 => each dimension has 2 vertices
    n_dims = len(feature_names)
    
    # Build the integer monotonicities for Lattice: +1 => increasing, -1 => decreasing, 0 => none
    lattice_monotonicities = []
    for feat in feature_names:
        if feat in custom_monotonic_map:
            if custom_monotonic_map[feat] == 'increasing':
                lattice_monotonicities.append(1)
            elif custom_monotonic_map[feat] == 'decreasing':
                lattice_monotonicities.append(0)
            else:
                lattice_monotonicities.append(0)
        else:
            lattice_monotonicities.append(1)

    # Suppose we want 2 vertices per dimension (2^n total corners).
    # If you have fewer features or need more resolution, you can try [3]*n_dims.
    lattice_out = tfl.layers.Lattice(
        lattice_sizes=[2]*n_dims,
        monotonicities=lattice_monotonicities,
        units=1  # for binary classification
    )(concat_calibrators)

    # Final output => Sigmoid for probability
    outputs = tf.keras.layers.Activation('sigmoid')(lattice_out)
    model = tf.keras.Model(inputs=list(inputs.values()), outputs=outputs)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

def train_tf_lattice_model_legacy_sizes(X, y):
    from sklearn.model_selection import train_test_split
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    feature_names = X.columns.tolist()

    # Keras Tuner 
    import keras_tuner as kt
    
    # We'll define a custom Brier metric in TF:
    def brier_score_tf(y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        return tf.reduce_mean(tf.square(y_true - y_pred))

    train_dict = {col: X_train[col].values for col in feature_names}
    val_dict = {col: X_val[col].values for col in feature_names}

    def model_builder(hp):
        model = build_tf_lattice_custom_monotonic_model_legacy_lattice_sizes(
            hp, feature_names, X_train
        )
        # recompile with brier metric
        model.compile(
            optimizer=model.optimizer,
            loss='binary_crossentropy',
            metrics=[
                'accuracy',
                tf.keras.metrics.BinaryCrossentropy(name='log_loss'),
                brier_score_tf
            ]
        )
        return model

    tuner = kt.RandomSearch(
        model_builder,
        objective=kt.Objective('val_brier_score_tf', direction='min'),
        max_trials=5,
        executions_per_trial=1,
        project_name='tf_lattice_sizes',
        overwrite=True
    )

    stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_brier_score_tf', patience=5, mode='min')

    tuner.search(
        train_dict, y_train,
        validation_data=(val_dict, y_val),
        epochs=50,
        batch_size=128,
        callbacks=[stop_early],
        verbose=1
    )

    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    best_model = tuner.hypermodel.build(best_hps)
    best_model.fit(
        train_dict, y_train,
        validation_data=(val_dict, y_val),
        epochs=50,
        batch_size=128,
        callbacks=[stop_early],
        verbose=0
    )

    # Evaluate final
    res = best_model.evaluate(val_dict, y_val, verbose=0)
    names = best_model.metrics_names
    res_dict = dict(zip(names, res))
    print("Best TF Lattice hyperparams:", best_hps.values)
    print(f"Brier: {res_dict['brier_score_tf']:.4f}, log_loss: {res_dict['log_loss']:.4f}, acc: {res_dict['accuracy']:.4f}")

    return best_model

In [7]:
###############################################################################
# 6. Ensemble
###############################################################################

def ensemble_predict_proba(X, model_xgb, model_tf, model_lr):
    """
    Generates ensemble probabilities by averaging predictions from
    XGB, TF Lattice, and LogisticRegression.

    Parameters
    ----------
    X : pd.DataFrame
        Feature matrix.
    model_xgb : XGBClassifier
        Trained XGB model.
    model_tf : tf.keras.Model
        Trained TensorFlow Lattice model.
    model_lr : sklearn.linear_model.LogisticRegression or Pipeline
        Trained Logistic Regression (or Pipeline).
    
    Returns
    -------
    np.ndarray
        Ensemble probabilities (average of the three model probabilities).
    """

    # 1. XGB predictions
    preds_xgb = model_xgb.predict_proba(X)[:, 1]

    # 2. TF Lattice predictions
    #   For TF Lattice, we typically pass a dictionary {feature_name: np.array(values)},
    #   one entry per column.
    tf_inputs = {col: X[col].values for col in X.columns}
    preds_tf = model_tf.predict(tf_inputs).flatten()

    # 3. Logistic Regression predictions
    preds_lr = model_lr.predict_proba(X)[:, 1]

    # Simple average
    ensemble_probs = (preds_xgb + preds_tf + preds_lr) / 3.0
    return ensemble_probs

def predict_submission_ensemble(
    dataset_path,
    model_xgb,
    model_tf,
    model_lr,
    out_filename='submission_ensemble.csv'
):
    """
    Creates a submission file for the ensemble by averaging predicted probabilities
    from the three models: XGB, TF Lattice, and LogisticRegression.

    The CSV at dataset_path must have the columns:
    [Season, LowerTeamID, HigherTeamID, (optional Target), plus feature columns].

    Output: a CSV with columns [ID, Pred], where ID = "YYYY_LLLL_HHHH"

    Parameters
    ----------
    dataset_path : str
        File path to your test dataset (CSV).
    model_xgb : XGBClassifier
        Trained XGB model.
    model_tf : tf.keras.Model
        Trained TF Lattice model.
    model_lr : sklearn.linear_model.LogisticRegression or Pipeline
        Trained LogisticRegression (or pipeline with StandardScaler).
    out_filename : str, optional
        Filename for the output CSV, by default 'submission_ensemble.csv'
    
    Returns
    -------
    pd.DataFrame
        A DataFrame containing [ID, Pred] for the ensemble submission.
    """
    # Load data
    df = pd.read_csv(dataset_path)
    # Create the ID
    df['ID'] = df.apply(
        lambda row: f"{int(row['Season']):04d}_{int(row['LowerTeamID']):04d}_{int(row['HigherTeamID']):04d}",
        axis=1
    )

    # Build test feature matrix
    feature_cols = [
        c for c in df.columns
        if c not in ['Season', 'LowerTeamID', 'HigherTeamID', 'Target', 'GameID', 'ID']
    ]
    X_test = df[feature_cols].copy()

    # Ensemble predictions
    preds = ensemble_predict_proba(X_test, model_xgb, model_tf, model_lr)

    # Build submission dataframe
    submission = pd.DataFrame({'ID': df['ID'], 'Pred': preds})
    submission.to_csv(out_filename, index=False)
    print(f"Ensemble submission saved to {out_filename}")
    return submission

In [8]:
###############################################################################
# 7. Predict Submission for Single Model
###############################################################################

def predict_submission(
    model,
    dataset_path,
    model_type='xgb',
    out_filename='submission.csv'
):
    """
    Generates a submission CSV for a single model of type 'xgb', 'tf', or 'lr'.

    The CSV at dataset_path must have the columns:
    [Season, LowerTeamID, HigherTeamID, (optional Target), plus feature columns].

    Output: CSV with columns [ID, Pred].

    Parameters
    ----------
    model : 
        - XGBClassifier if model_type='xgb'
        - tf.keras.Model if model_type='tf'
        - LogisticRegression (or Pipeline) if model_type='lr'
    dataset_path : str
        File path to your test dataset (CSV).
    model_type : str, optional
        One of ['xgb', 'tf', 'lr'].
    out_filename : str, optional
        Filename for the output CSV, by default 'submission.csv'
    
    Returns
    -------
    pd.DataFrame
        A DataFrame containing [ID, Pred] for the single-model submission.
    """
    # Load data
    df = pd.read_csv(dataset_path)
    # Create the ID for submission
    df['ID'] = df.apply(
        lambda row: f"{int(row['Season']):04d}_{int(row['LowerTeamID']):04d}_{int(row['HigherTeamID']):04d}",
        axis=1
    )

    # Build test feature matrix
    feature_cols = [
        c for c in df.columns
        if c not in ['Season', 'LowerTeamID', 'HigherTeamID', 'Target', 'GameID', 'ID']
    ]
    X_test = df[feature_cols].copy()

    # Predict probability from the correct model
    if model_type == 'xgb':
        probs = model.predict_proba(X_test)[:, 1]
    elif model_type == 'tf':
        tf_inputs = {col: X_test[col].values for col in X_test.columns}
        probs = model.predict(tf_inputs).flatten()
    elif model_type == 'lr':
        probs = model.predict_proba(X_test)[:, 1]
    else:
        raise ValueError("model_type must be 'xgb', 'tf', or 'lr'.")

    # Build submission
    submission = pd.DataFrame({'ID': df['ID'], 'Pred': probs})
    submission.to_csv(out_filename, index=False)
    print(f"{model_type.upper()} submission saved to {out_filename}")
    return submission

In [9]:
# Change the dataset path if needed:
dataset_path = "7_game_window_dataset.csv"

In [10]:
X, y, df = load_model_data(dataset_path)

In [11]:
y.value_counts(normalize=True)

Target
0    0.512318
1    0.487682
Name: proportion, dtype: float64

In [55]:
############################################################################
# Train XGB
############################################################################
print("\n==== Training XGB (Optimizing Brier) ====")
lgb_model = train_xgb_model(X, y)


==== Training XGB (Optimizing Brier) ====
Fitting 3 folds for each of 72 candidates, totalling 216 fits
Best XGB hyperparams: {'learning_rate': 0.1, 'max_depth': 3, 'min_child_weight': 5, 'n_estimators': 200}
XGB val Brier: 0.2151, LogLoss: 0.6183, Accuracy: 0.6530


In [56]:
############################################################################
# Train TF Lattice
############################################################################
print("\n=== Training TF Lattice (Older PWLCalibration) ===")
tf_lattice_model = train_tf_lattice_model_legacy_sizes(X, y)


=== Training TF Lattice (Older PWLCalibration) ===


ValueError: Exception encountered when calling layer 'pwl_calibration' (type PWLCalibration).

A KerasTensor cannot be used as input to a TensorFlow function. A KerasTensor is a symbolic placeholder for a shape and dtype, used when constructing Keras Functional models or Keras Functions. You can only use it as input to a Keras layer or a Keras operation (from the namespaces `keras.layers` and `keras.ops`). You are likely doing something like:

```
x = Input(...)
...
tf_fn(x)  # Invalid.
```

What you should do instead is wrap `tf_fn` in a layer:

```
class MyLayer(Layer):
    def call(self, x):
        return tf_fn(x)

x = MyLayer()(x)
```


Call arguments received by layer 'pwl_calibration' (type PWLCalibration):
  • inputs=<KerasTensor shape=(None, 1), dtype=float32, sparse=False, ragged=False, name=window_score_avg_diff>

In [12]:
############################################################################
# Train Logistic Regression
############################################################################
print("\n==== Training Logistic Regression (Optimizing Brier) ====")
pt_model = train_logistic_regression(X, y)


==== Training Logistic Regression (Optimizing Brier) ====
Fitting 3 folds for each of 32 candidates, totalling 96 fits
Best LogisticRegression hyperparams: {'logreg__C': 1, 'logreg__max_iter': 100, 'logreg__solver': 'lbfgs'}
LogReg val Brier: 0.2138, LogLoss: 0.6153, Accuracy: 0.6573


In [None]:
############################################################################
# Generate Submission
############################################################################
# Typically you'd have a separate "test" or future dataset for 2025 predictions
# but here we'll just reuse the same dataset for demonstration.
print("\n==== Generating Submissions ====")
predict_submission(lgb_model, dataset_path, model_type='lgb', out_filename='submission_lgb.csv')
predict_submission(tf_lattice_model, dataset_path, model_type='tf', out_filename='submission_tf.csv')
predict_submission(pt_model, dataset_path, model_type='pt', pt_device=pt_device, out_filename='submission_pt.csv')