In [None]:
import h2o
import mlflow
import mlflow.h2o
import numpy as np
import pandas as pd
from mlflow.tracking import MlflowClient
from mlflow.entities import ViewType
from scipy.stats import poisson
from sklearn.metrics import classification_report

In [None]:
client = MlflowClient()

In [None]:
h2o.init()

In [None]:
model_pth = "mlruns/{}/{}/artifacts/model/"

#### Goals Probability

In [None]:
experiment_name = "Probability Matrix"

experiment = mlflow.set_experiment(experiment_name)
experiment_id = experiment.experiment_id

runs = mlflow.search_runs(
    experiment_ids=experiment_id,
    run_view_type=ViewType.ALL,
)
run_id = runs.sort_values(by="metrics.aic").iloc[0]["run_id"]

In [None]:
glm = mlflow.statsmodels.load_model(model_pth.format(experiment_id, run_id))

In [None]:
j1 = pd.read_parquet("../data/cleansed/cleansed_data.parquet")

col = ["home", "away", "hg", "ag"]
opponent = {"away": "home", "home": "away"}

j1 = j1.query("season == 2024")[col].copy()

In [None]:
def predict_xg(model, home, away, max_goals=6):
    xg_h = model.predict(
        pd.DataFrame(
            data={"team": home, "opponent": away, "stadium": 1}, 
            index=[0],
        )
    ).values[0]
    
    xg_a = model.predict(
        pd.DataFrame(
            data={"team": away, "opponent": home, "stadium": 0},
            index=[0],
        )
    ).values[0]
    
    prediction = [
        [poisson.pmf(i, lambda_) for i in range(0, max_goals+1)]
        for lambda_ in [xg_h, xg_a]
    ]
    
    prob_matrix = np.outer(
        np.array(prediction[0]),
        np.array(prediction[1]),
    )
    
    return prob_matrix

In [None]:
xg = predict_xg(glm, "20", "24")
xg

#### Handicap Results

In [None]:
experiment_name = "Handicap Prediction"

experiment = mlflow.set_experiment(experiment_name)
experiment_id = experiment.experiment_id

runs = mlflow.search_runs(
    experiment_ids=experiment_id,
    run_view_type=ViewType.ALL,
)
run_id = runs.sort_values(by="metrics.logloss").iloc[0]["run_id"]

In [None]:
clf = mlflow.h2o.load_model(model_pth.format(experiment_id, run_id))

In [None]:
experiment_name = "Bet Decision"

experiment = mlflow.set_experiment(experiment_name)
experiment_id = experiment.experiment_id

runs = mlflow.search_runs(
    experiment_ids=experiment_id,
    run_view_type=ViewType.ALL,
)
run_id = runs.sort_values(by="metrics.logloss").iloc[0]["run_id"]

In [None]:
decider = mlflow.h2o.load_model(model_pth.format(experiment_id, run_id))

In [None]:
j1 = pd.read_parquet("../data/featured/j1_league.parquet")

test = j1.query("season == 2024").drop(columns="season")

In [None]:
def recommend_bet(clf, decider, data):
    test = pd.DataFrame(data, index=[0])
    
    test_h2o = h2o.H2OFrame(test)
    prediction = clf.predict(test_h2o)
    
    test_pred = prediction.as_data_frame()
    test_pred["probability"] = test_pred[["A", "H"]].max(axis=1)
    
    dscn_h2o = h2o.H2OFrame(test_pred[["predict", "probability"]])
    dscn_pred = decider.predict(dscn_h2o)
    
    test_pred["decision"] = dscn_pred.as_data_frame()["predict"]
    
    pred = test_pred.iloc[0].to_dict()
    
    if pred["decision"]:
        y_hat = pred["predict"]
        res = ["Away Team", "Home Team"][y_hat == "H"]
    else:
        res = "No Bet"
    
    return res

In [None]:
recommend_bet(clf, decider, test.iloc[-1].to_dict())

In [None]:
test_h2o = h2o.H2OFrame(test)

prediction = clf.predict(test_h2o)

test_pred = prediction.as_data_frame()
test_pred["probability"] = test_pred[["A", "H"]].max(axis=1)
test_pred["actual"] = test.reset_index(drop=True)["res"]

In [None]:
dscn_h2o = h2o.H2OFrame(test_pred[["predict", "probability"]])
dscn_pred = decider.predict(dscn_h2o)

In [None]:
test_pred["decision"] = dscn_pred.as_data_frame()["predict"]

In [None]:
pred = test_pred.query("decision == 1")
y_hat = pred["predict"]
y_true = pred["actual"]

In [None]:
print(classification_report(y_true, y_hat))