In [4]:
import pandas as pd
import numpy as np

# =========================================================
# 1. DATA LOADING
# =========================================================

def load_data():
    base = "C:/Users/Seamus-admin/Documents/GitHub/AC-Horsens-First-Team/DNK_1_Division_2025_2026/"

    df_xg = pd.read_csv(base + "xg_all DNK_1_Division_2025_2026.csv")
    df_xg["label"] = df_xg["label"] + " " + df_xg["date"]

    df_xa = pd.read_csv(base + "xA_all DNK_1_Division_2025_2026.csv")
    df_xa["label"] = df_xa["label"] + " " + df_xa["date"]


    return (
        df_xg, df_xa
    )

def filter_before_date(df, cutoff_date):
    cutoff = pd.to_datetime(cutoff_date)
    return df[df["date"] < cutoff]

# =========================================================
# 2. MATCH SIMULATION & EXPECTED POINTS (xG / xA)
# =========================================================

def simulate_goals(values, num_simulations=10000):
    rand = np.random.random((len(values), num_simulations))
    goals = (rand < values.reshape(-1, 1)).sum(axis=0)
    return goals

def simulate_match(values_a, values_b, num_simulations=10000):
    goals_a = simulate_goals(values_a, num_simulations)
    goals_b = simulate_goals(values_b, num_simulations)

    wins_a = np.sum(goals_a > goals_b)
    draws  = np.sum(goals_a == goals_b)
    wins_b = np.sum(goals_a < goals_b)

    points_a = (wins_a * 3 + draws) / num_simulations
    points_b = (wins_b * 3 + draws) / num_simulations

    win_prob_a  = wins_a / num_simulations
    draw_prob   = draws  / num_simulations
    win_prob_b  = wins_b / num_simulations

    return points_a, points_b, win_prob_a, draw_prob, win_prob_b

def calculate_expected_points(df, value_column):
    records = []

    for label, match_df in df.groupby("label"):
        teams = match_df["team_name"].unique()
        if len(teams) != 2:
            continue

        team_a, team_b = teams
        vals_a = match_df[match_df["team_name"] == team_a][value_column].values
        vals_b = match_df[match_df["team_name"] == team_b][value_column].values

        if len(vals_a) == 0 or len(vals_b) == 0:
            continue

        pts_a, pts_b, win_a, draw, win_b = simulate_match(vals_a, vals_b)
        date = match_df["date"].iloc[0]

        records.append({
            "label": label,
            "date": date,
            "team_name": team_a,
            "expected_points": pts_a,
            "win_probability": win_a,
            "draw_probability": draw,
            "loss_probability": win_b
        })
        records.append({
            "label": label,
            "date": date,
            "team_name": team_b,
            "expected_points": pts_b,
            "win_probability": win_b,
            "draw_probability": draw,
            "loss_probability": win_a
        })

    return pd.DataFrame(records)

def process_data(df_xg, df_xa, xg_col="321", xa_col="318.0"):
    expected_xg = calculate_expected_points(df_xg, xg_col)
    expected_xa = calculate_expected_points(df_xa, xa_col)

    merged = expected_xg.merge(
        expected_xa,
        on=["label", "date", "team_name"],
        suffixes=("_xg", "_xa")
    )

    merged["expected_points"] = (merged["expected_points_xg"] + merged["expected_points_xa"]) / 2
    merged["win_probability"]  = (merged["win_probability_xg"]  + merged["win_probability_xa"])  / 2
    merged["draw_probability"] = (merged["draw_probability_xg"] + merged["draw_probability_xa"]) / 2
    merged["loss_probability"] = (merged["loss_probability_xg"] + merged["loss_probability_xa"]) / 2

    merged = merged[[
        "team_name", "label", "date",
        "expected_points", "win_probability",
        "draw_probability", "loss_probability"
    ]]

    merged["date"] = pd.to_datetime(merged["date"])
    return merged

# =========================================================
# 3. HOME / AWAY EXTRACTION
# =========================================================

def extract_home_away(df):
    # label format: "Esbjerg vs Hobro 2025-10-16"
    df = df.copy()
    df["home_team"] = df["label"].apply(lambda x: x.split(" vs ")[0])
    df["away_team"] = df["label"].apply(lambda x: x.split(" vs ")[1].rsplit(" ", 1)[0])
    df["is_home"]   = df.apply(lambda row: row["team_name"] == row["home_team"], axis=1)
    return df

# =========================================================
# 4. EXPONENTIAL DECAY FORM
# =========================================================

def exp_decay_weights(n, decay=0.85):
    return np.array([decay ** i for i in range(n)], dtype=float)

def compute_form(df, team_name, n=None, home_only=False, away_only=False, decay=True):
    team_df = df[df["team_name"] == team_name].sort_values("date", ascending=False)

    if home_only:
        team_df = team_df[team_df["is_home"] == True]
    if away_only:
        team_df = team_df[team_df["is_home"] == False]

    if n is not None:
        team_df = team_df.head(n)

    if team_df.empty:
        # Neutral fallback
        return {"win": 1/3, "draw": 1/3, "loss": 1/3}

    if decay:
        w = exp_decay_weights(len(team_df))
        w = w / w.sum()
        win  = np.sum(team_df["win_probability"].values  * w)
        draw = np.sum(team_df["draw_probability"].values * w)
        loss = np.sum(team_df["loss_probability"].values * w)
    else:
        win  = team_df["win_probability"].mean()
        draw = team_df["draw_probability"].mean()
        loss = team_df["loss_probability"].mean()

    total = win + draw + loss
    if total <= 0:
        return {"win": 1/3, "draw": 1/3, "loss": 1/3}

    return {"win": win/total, "draw": draw/total, "loss": loss/total}

# =========================================================
# 5. ELO SYSTEM
# =========================================================

def elo_expected(elo_a, elo_b):
    return 1 / (1 + 10 ** ((elo_b - elo_a) / 400))

def compute_elos(df, k=200):
    df = df.sort_values("date")
    teams = df["team_name"].unique()
    elos = {t: 1500.0 for t in teams}

    for label in df["label"].unique():
        match_df = df[df["label"] == label]
        teams = match_df["team_name"].unique()
        if len(teams) != 2:
            continue

        home_team = match_df["home_team"].iloc[0]
        away_team = match_df["away_team"].iloc[0]

        row_home = match_df[match_df["team_name"] == home_team].iloc[0]
        row_away = match_df[match_df["team_name"] == away_team].iloc[0]

        # Expected result from probabilities (0 to 1)
        res_home = row_home["win_probability"] + 0.5 * row_home["draw_probability"]

        elo_home = elos[home_team]
        elo_away = elos[away_team]

        # Apply home advantage in expectation
        elo_home_adj = elo_home
        exp_home = elo_expected(elo_home_adj, elo_away)

        change = k * (res_home - exp_home)

        elos[home_team] = elo_home + change
        elos[away_team] = elo_away - change

    return elos

# =========================================================
# 6. WEIGHTED FORM COMBINATION (HOME / AWAY AWARE)
# =========================================================

def compute_weighted_form(team, df, is_home_match):
    # Weights in order:
    # last 3 overall, last 5 overall,
    # last 3 home/away, last 5 home/away,
    # season home/away, season overall
    w_last3       = 0.25
    w_last5       = 0.20
    w_last3_ha    = 0.20
    w_last5_ha    = 0.15
    w_season_ha   = 0.10
    w_season_all  = 0.10

    # Overall form
    f_last3      = compute_form(df, team, n=3)
    f_last5      = compute_form(df, team, n=5)
    f_season_all = compute_form(df, team, n=None)

    # Home/away-specific form buckets
    if is_home_match:
        f_last3_ha   = compute_form(df, team, n=3, home_only=True)
        f_last5_ha   = compute_form(df, team, n=5, home_only=True)
        f_season_ha  = compute_form(df, team, n=None, home_only=True)
    else:
        f_last3_ha   = compute_form(df, team, n=3, away_only=True)
        f_last5_ha   = compute_form(df, team, n=5, away_only=True)
        f_season_ha  = compute_form(df, team, n=None, away_only=True)

    win = (
        w_last3      * f_last3["win"] +
        w_last5      * f_last5["win"] +
        w_last3_ha   * f_last3_ha["win"] +
        w_last5_ha   * f_last5_ha["win"] +
        w_season_ha  * f_season_ha["win"] +
        w_season_all * f_season_all["win"]
    )
    draw = (
        w_last3      * f_last3["draw"] +
        w_last5      * f_last5["draw"] +
        w_last3_ha   * f_last3_ha["draw"] +
        w_last5_ha   * f_last5_ha["draw"] +
        w_season_ha  * f_season_ha["draw"] +
        w_season_all * f_season_all["draw"]
    )
    loss = (
        w_last3      * f_last3["loss"] +
        w_last5      * f_last5["loss"] +
        w_last3_ha   * f_last3_ha["loss"] +
        w_last5_ha   * f_last5_ha["loss"] +
        w_season_ha  * f_season_ha["loss"] +
        w_season_all * f_season_all["loss"]
    )

    total = win + draw + loss
    if total <= 0:
        return {"win": 1/3, "draw": 1/3, "loss": 1/3}

    return {"win": win/total, "draw": draw/total, "loss": loss/total}

# =========================================================
# 7. FINAL MATCH PREDICTION
# =========================================================

def predict_match(team1, team2, df, elos, team1_is_home=True):
    """
    team1_is_home: pass True if team1 is the home team in the upcoming match.
    """

    form1 = compute_weighted_form(team1, df, is_home_match=team1_is_home)
    form2 = compute_weighted_form(team2, df, is_home_match=not team1_is_home)

    elo1 = elos.get(team1, 1500.0)
    elo2 = elos.get(team2, 1500.0)

    elo1_adj = elo1
    elo2_adj = elo2

    elo_win1 = elo_expected(elo1_adj, elo2_adj)
    elo_win2 = 1 - elo_win1

    # Blend ELO and form
    w_elo  = 0.6
    w_form = 0.4

    win1 = w_elo * elo_win1 + w_form * form1["win"]
    win2 = w_elo * elo_win2 + w_form * form2["win"]
    draw = (form1["draw"] + form2["draw"]) / 2  # draw mostly from form

    total = win1 + win2 + draw
    if total > 0:
        win1 /= total
        win2 /= total
        draw /= total
    else:
        win1 = win2 = draw = 1/3
    ep_team1 = 3 * win1 + draw
    ep_team2 = 3 * win2 + draw

    return {
        "team1": team1,
        "team2": team2,
        "team1_is_home": team1_is_home,
        "team1_win_prob": win1,
        "draw_prob": draw,
        "team2_win_prob": win2,
        "expected_points_team1": ep_team1,
        "expected_points_team2": ep_team2,
        "elo_team1": elo1,
        "elo_team2": elo2
    }

# =========================================================
# 8. MAIN EXECUTION EXAMPLE
# =========================================================

if __name__ == "__main__":
    df_xg, df_xa = load_data()
    merged_df = process_data(df_xg, df_xa, xg_col="321", xa_col="318.0")
    merged_df = extract_home_away(merged_df)

    # -----------------------------------------
    # ðŸ”¥ NEW: Choose your cutoff date
    # -----------------------------------------
    cutoff_date = "2025-11-30"   # <-- MATCH DAY
    merged_before = filter_before_date(merged_df, cutoff_date)
    # -----------------------------------------

    elos = compute_elos(merged_before)

    # Elo table
    df_elos = (
        pd.DataFrame(list(elos.items()), columns=["team", "elo"])
        .sort_values("elo", ascending=False)
        .reset_index(drop=True)
    )

    df_elos["rank"] = df_elos.index + 1
    df_elos = df_elos[["rank", "team", "elo"]]
    df_elos = df_elos.round(2)
    print(df_elos.to_string(index=False))

    # Prediction using ONLY data before the match
    team1 = "Horsens"
    team2 = "AaB"
    team1_is_home = True

    result = predict_match(team1, team2, merged_before, elos, team1_is_home=team1_is_home)

    print(f"Match: {result['team1']} ({'Home' if result['team1_is_home'] else 'Away'}) vs {result['team2']}")
    print(f"{result['team1']} win probability: {result['team1_win_prob']:.1%}")
    print(f"Draw probability: {result['draw_prob']:.1%}")
    print(f"{result['team2']} win probability: {result['team2_win_prob']:.1%}")
    print(f"ELO {result['team1']}: {result['elo_team1']:.1f}")
    print(f"ELO {result['team2']}: {result['elo_team2']:.1f}")
    print(f"{team1} expected points: {result['expected_points_team1']:.2f}")
    print(f"{team2} expected points: {result['expected_points_team2']:.2f}")


 rank          team     elo
    1        Lyngby 1623.14
    2       Esbjerg 1549.34
    3       Kolding 1541.40
    4      Hvidovre 1532.26
    5      HillerÃ¸d 1510.33
    6 Aarhus Fremad 1507.04
    7           AaB 1502.60
    8    Middelfart 1478.23
    9       HB KÃ¸ge 1464.47
   10       Horsens 1458.84
   11          B 93 1428.07
   12         Hobro 1404.27
Match: Horsens (Home) vs AaB
Horsens win probability: 34.8%
Draw probability: 23.1%
AaB win probability: 42.1%
ELO Horsens: 1458.8
ELO AaB: 1502.6
Horsens expected points: 1.28
AaB expected points: 1.49
