In [6]:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('data/historical/historical_data.csv')

In [11]:
# Drop rows with missing key values
df = df.dropna(subset=["FTR", "B365H", "B365D", "B365A"])

# Identify the market favorite
df["MarketFavorite"] = df[["B365H", "B365D", "B365A"]].idxmin(axis=1)

# Determine if the favorite won
def favorite_result(row):
    if row["MarketFavorite"] == "B365H" and row["FTR"] == "H":
        return "Correct"
    elif row["MarketFavorite"] == "B365D" and row["FTR"] == "D":
        return "Correct"
    elif row["MarketFavorite"] == "B365A" and row["FTR"] == "A":
        return "Correct"
    return "Wrong"

df["FavoriteResult"] = df.apply(favorite_result, axis=1)

# Get the odds of the market favorite
df["FavOdd"] = df[["B365H", "B365D", "B365A"]].min(axis=1)

# Bucket the odds
df["OddBucket"] = pd.cut(
    df["FavOdd"],
    bins=[0.99, 1.5, 2.0, 3.0, 5.0, 10.0, 20.0],
    include_lowest=True
)

# Group and summarize
bucket_stats = (
    df.groupby("OddBucket", observed=True)["FavoriteResult"]
    .value_counts()
    .unstack(fill_value=0)
)

bucket_stats["Total"] = bucket_stats.sum(axis=1)
bucket_stats["Accuracy %"] = (bucket_stats["Correct"] / bucket_stats["Total"]) * 100

# Final output
print("📊 Overall Accuracy of Market Favorite:")
overall = (df["FavoriteResult"] == "Correct").mean() * 100
print(f"   {overall:.2f}%\n")

print("📈 Market Favorite Accuracy by Odds Bucket:\n")
print(bucket_stats[["Correct", "Wrong", "Total", "Accuracy %"]].round(2))

# Highlight top and bottom buckets
best = bucket_stats.sort_values("Accuracy %", ascending=False).head(1)
worst = bucket_stats[bucket_stats["Total"] > 0].sort_values("Accuracy %", ascending=True).head(1)

print("\n✅ Best Performing Bucket:")
print(best[["Correct", "Wrong", "Total", "Accuracy %"]].round(2))

print("\n❌ Worst Performing Bucket:")
print(worst[["Correct", "Wrong", "Total", "Accuracy %"]].round(2))

# Show high-odds favorites that won
high_odds_wins = df[(df["FavOdd"] > 3.0) & (df["FavoriteResult"] == "Correct")]
print("\n📌 Matches Where Favorite Had Odds > 3.0 but Still Won:")
print(high_odds_wins[["Date", "HomeTeam", "AwayTeam", "FTR", "MarketFavorite", "FavOdd"]].head(10))


📊 Overall Accuracy of Market Favorite:
   53.76%

📈 Market Favorite Accuracy by Odds Bucket:

FavoriteResult  Correct  Wrong  Total  Accuracy %
OddBucket                                        
(0.989, 1.5]       2396    815   3211       74.62
(1.5, 2.0]         2669   2244   4913       54.33
(2.0, 3.0]         2294   3271   5565       41.22

✅ Best Performing Bucket:
FavoriteResult  Correct  Wrong  Total  Accuracy %
OddBucket                                        
(0.989, 1.5]       2396    815   3211       74.62

❌ Worst Performing Bucket:
FavoriteResult  Correct  Wrong  Total  Accuracy %
OddBucket                                        
(2.0, 3.0]         2294   3271   5565       41.22

📌 Matches Where Favorite Had Odds > 3.0 but Still Won:
Empty DataFrame
Columns: [Date, HomeTeam, AwayTeam, FTR, MarketFavorite, FavOdd]
Index: []


In [12]:
import numpy as np

# Drop rows with missing values
df = df.dropna(subset=["FTR", "B365H", "B365D", "B365A"])

# Market favorite and odds
df["MarketFavorite"] = df[["B365H", "B365D", "B365A"]].idxmin(axis=1)
df["FavOdd"] = df[["B365H", "B365D", "B365A"]].min(axis=1)

# Whether favorite won
def favorite_result(row):
    if row["MarketFavorite"] == "B365H" and row["FTR"] == "H":
        return "Correct"
    elif row["MarketFavorite"] == "B365D" and row["FTR"] == "D":
        return "Correct"
    elif row["MarketFavorite"] == "B365A" and row["FTR"] == "A":
        return "Correct"
    return "Wrong"

df["FavoriteResult"] = df.apply(favorite_result, axis=1)

# Simulate flat betting outcome: +profit if correct, -1 unit if wrong
df["Profit"] = np.where(
    df["FavoriteResult"] == "Correct", df["FavOdd"] - 1, -1
)

# Create fine-grained buckets (e.g., 1.00–1.10, 1.10–1.20, ..., 5.0)
bins = np.arange(0.99, 5.1, 0.1)
df["OddBucket"] = pd.cut(df["FavOdd"], bins=bins)

# Group by bucket
bucket_summary = df.groupby("OddBucket", observed=True).agg(
    TotalBets=("Profit", "count"),
    Wins=("FavoriteResult", lambda x: (x == "Correct").sum()),
    Losses=("FavoriteResult", lambda x: (x == "Wrong").sum()),
    TotalProfit=("Profit", "sum"),
)

bucket_summary["WinRate %"] = (bucket_summary["Wins"] / bucket_summary["TotalBets"]) * 100
bucket_summary["ROI %"] = (bucket_summary["TotalProfit"] / bucket_summary["TotalBets"]) * 100

# Filter to buckets with at least 30 bets to avoid tiny sample sizes
bucket_summary = bucket_summary[bucket_summary["TotalBets"] >= 30]

# Display best performing buckets
print("📊 Flat Betting ROI by Favorite Odds Bucket:\n")
print(bucket_summary[["TotalBets", "Wins", "Losses", "WinRate %", "TotalProfit", "ROI %"]].round(2))

# Highlight profitable zones
profitable = bucket_summary[bucket_summary["TotalProfit"] > 0]
if not profitable.empty:
    print("\n✅ Profitable Odds Buckets (flat betting):\n")
    print(profitable[["TotalBets", "TotalProfit", "ROI %"]].sort_values("ROI %", ascending=False).round(2))
else:
    print("\n❌ No profitable buckets found — try adjusting your ranges or using a filtered strategy.")


📊 Flat Betting ROI by Favorite Odds Bucket:

              TotalBets  Wins  Losses  WinRate %  TotalProfit  ROI %
OddBucket                                                           
(0.99, 1.09]         74    66       8      89.19        -3.15  -4.26
(1.09, 1.19]        445   367      78      82.47       -23.43  -5.27
(1.19, 1.29]        641   497     144      77.54       -23.18  -3.62
(1.29, 1.39]        825   603     222      73.09       -18.35  -2.22
(1.39, 1.49]        859   615     244      71.59        18.18   2.12
(1.49, 1.59]        979   623     356      63.64       -26.37  -2.69
(1.59, 1.69]       1024   593     431      57.91       -55.55  -5.42
(1.69, 1.79]        983   549     434      55.85       -35.61  -3.62
(1.79, 1.89]        929   505     424      54.36        -7.08  -0.76
(1.89, 1.99]        868   429     439      49.42       -42.69  -4.92
(1.99, 2.09]       1001   444     557      44.36      -102.10 -10.20
(2.09, 2.19]        908   414     494      45.59       -29

In [14]:

import numpy as np

# Drop rows with missing values
df = df.dropna(subset=["FTR", "B365H", "B365D", "B365A"])

# Identify market favorite and its odds
df["MarketFavorite"] = df[["B365H", "B365D", "B365A"]].idxmin(axis=1)
df["FavOdd"] = df[["B365H", "B365D", "B365A"]].min(axis=1)

# Determine if favorite won
def favorite_result(row):
    if row["MarketFavorite"] == "B365H" and row["FTR"] == "H":
        return "Correct"
    elif row["MarketFavorite"] == "B365D" and row["FTR"] == "D":
        return "Correct"
    elif row["MarketFavorite"] == "B365A" and row["FTR"] == "A":
        return "Correct"
    return "Wrong"

df["FavoriteResult"] = df.apply(favorite_result, axis=1)

# Simulate profit/loss per bet
df["Profit"] = np.where(df["FavoriteResult"] == "Correct", df["FavOdd"] - 1, -1)

# Create fine-grained odds buckets
bins = np.arange(0.99, 3.5, 0.1)
df["OddBucket"] = pd.cut(df["FavOdd"], bins=bins)

# Group stats by bucket
bucket_stats = df.groupby("OddBucket", observed=True).agg(
    TotalBets=("Profit", "count"),
    Wins=("FavoriteResult", lambda x: (x == "Correct").sum()),
    Losses=("FavoriteResult", lambda x: (x == "Wrong").sum()),
    TotalProfit=("Profit", "sum"),
)

# Additional metrics
bucket_stats["WinRate %"] = (bucket_stats["Wins"] / bucket_stats["TotalBets"]) * 100
bucket_stats["ROI %"] = (bucket_stats["TotalProfit"] / bucket_stats["TotalBets"]) * 100
bucket_stats["EV per bet"] = bucket_stats["TotalProfit"] / bucket_stats["TotalBets"]
bucket_stats["CumulativeProfit"] = bucket_stats["TotalProfit"].cumsum()

# Filter out tiny sample sizes
bucket_stats = bucket_stats[bucket_stats["TotalBets"] >= 50]

# Round for cleaner display
bucket_stats = bucket_stats.round(2)

# Display full result
print("\n📊 Full ROI Analysis by Favorite Odds Bucket:\n")
print(bucket_stats[["TotalBets", "Wins", "Losses", "WinRate %", "EV per bet", "TotalProfit", "ROI %", "CumulativeProfit"]])

# Show buckets with strong profitability (ROI > 2%)
profitable = bucket_stats[(bucket_stats["ROI %"] > 2)]

if not profitable.empty:
    print("\n✅ High-Confidence Profitable Buckets (ROI > 2% and >= 50 bets):\n")
    print(profitable[["TotalBets", "WinRate %", "TotalProfit", "ROI %", "EV per bet"]].sort_values("ROI %", ascending=False))
else:
    print("\n❌ No profitable odds buckets with ROI > 2% and enough samples.")



📊 Full ROI Analysis by Favorite Odds Bucket:

              TotalBets  Wins  Losses  WinRate %  EV per bet  TotalProfit  \
OddBucket                                                                   
(0.99, 1.09]         74    66       8      89.19       -0.04        -3.15   
(1.09, 1.19]        445   367      78      82.47       -0.05       -23.43   
(1.19, 1.29]        641   497     144      77.54       -0.04       -23.18   
(1.29, 1.39]        825   603     222      73.09       -0.02       -18.35   
(1.39, 1.49]        859   615     244      71.59        0.02        18.18   
(1.49, 1.59]        979   623     356      63.64       -0.03       -26.37   
(1.59, 1.69]       1024   593     431      57.91       -0.05       -55.55   
(1.69, 1.79]        983   549     434      55.85       -0.04       -35.61   
(1.79, 1.89]        929   505     424      54.36       -0.01        -7.08   
(1.89, 1.99]        868   429     439      49.42       -0.05       -42.69   
(1.99, 2.09]       1001   444

In [15]:
import numpy as np
import pandas as pd

# Drop rows with missing values
df = df.dropna(subset=["FTR", "B365H", "B365D", "B365A"])

# Identify market favorite and its odds
df["MarketFavorite"] = df[["B365H", "B365D", "B365A"]].idxmin(axis=1)
df["FavOdd"] = df[["B365H", "B365D", "B365A"]].min(axis=1)

# Mark favorite type: Home or Away (ignore Draw favorites for this split)
def fav_type(row):
    if row["MarketFavorite"] == "B365H":
        return "Home"
    elif row["MarketFavorite"] == "B365A":
        return "Away"
    return "Other"

df["FavType"] = df.apply(fav_type, axis=1)

# Filter only Home or Away favorites
df = df[df["FavType"].isin(["Home", "Away"])]

# Determine if favorite won
def favorite_result(row):
    if row["MarketFavorite"] == "B365H" and row["FTR"] == "H":
        return "Correct"
    elif row["MarketFavorite"] == "B365A" and row["FTR"] == "A":
        return "Correct"
    return "Wrong"

df["FavoriteResult"] = df.apply(favorite_result, axis=1)

# Simulate profit/loss per bet
df["Profit"] = np.where(df["FavoriteResult"] == "Correct", df["FavOdd"] - 1, -1)

# Create fine-grained odds buckets
bins = np.arange(0.99, 3.5, 0.1)
df["OddBucket"] = pd.cut(df["FavOdd"], bins=bins)

# Group stats by bucket and FavType
bucket_stats = df.groupby(["FavType", "OddBucket"], observed=True).agg(
    TotalBets=("Profit", "count"),
    Wins=("FavoriteResult", lambda x: (x == "Correct").sum()),
    Losses=("FavoriteResult", lambda x: (x == "Wrong").sum()),
    TotalProfit=("Profit", "sum"),
)

# Add additional metrics
bucket_stats["WinRate %"] = (bucket_stats["Wins"] / bucket_stats["TotalBets"]) * 100
bucket_stats["ROI %"] = (bucket_stats["TotalProfit"] / bucket_stats["TotalBets"]) * 100
bucket_stats["EV per bet"] = bucket_stats["TotalProfit"] / bucket_stats["TotalBets"]

# Reset index for easier filtering
bucket_stats = bucket_stats.reset_index()

# Filter out small sample sizes
bucket_stats = bucket_stats[bucket_stats["TotalBets"] >= 50]

# Round for display
bucket_stats = bucket_stats.round(2)

# === Print Home Favorites ===
print("\n🏠 ROI Analysis for Home Favorites:\n")
home_stats = bucket_stats[bucket_stats["FavType"] == "Home"]
print(home_stats[["OddBucket", "TotalBets", "Wins", "Losses", "WinRate %", "ROI %", "EV per bet"]])

# === Print Away Favorites ===
print("\n🚗 ROI Analysis for Away Favorites:\n")
away_stats = bucket_stats[bucket_stats["FavType"] == "Away"]
print(away_stats[["OddBucket", "TotalBets", "Wins", "Losses", "WinRate %", "ROI %", "EV per bet"]])

# === Profitable Buckets Only ===
profitable = bucket_stats[bucket_stats["ROI %"] > 2]

if not profitable.empty:
    print("\n✅ Profitable Buckets (ROI > 2%):\n")
    print(profitable[["FavType", "OddBucket", "TotalBets", "WinRate %", "ROI %", "EV per bet"]].sort_values("ROI %", ascending=False))
else:
    print("\n❌ No profitable odds buckets found with ROI > 2%.")



🏠 ROI Analysis for Home Favorites:

       OddBucket  TotalBets  Wins  Losses  WinRate %  ROI %  EV per bet
19  (0.99, 1.09]         73    65       8      89.04  -4.41       -0.04
20  (1.09, 1.19]        385   321      64      83.38  -4.41       -0.04
21  (1.19, 1.29]        512   390     122      76.17  -5.35       -0.05
22  (1.29, 1.39]        593   431     162      72.68  -2.68       -0.03
23  (1.39, 1.49]        611   440     171      72.01   2.65        0.03
24  (1.49, 1.59]        651   407     244      62.52  -4.48       -0.04
25  (1.59, 1.69]        689   394     295      57.18  -6.67       -0.07
26  (1.69, 1.79]        637   366     271      57.46  -0.87       -0.01
27  (1.79, 1.89]        603   314     289      52.07  -5.02       -0.05
28  (1.89, 1.99]        565   283     282      50.09  -3.66       -0.04
29  (1.99, 2.09]        687   292     395      42.50 -13.99       -0.14
30  (2.09, 2.19]        577   259     318      44.89  -4.71       -0.05
31  (2.19, 2.29]        617