In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from typing import List, Tuple, Dict, Any, Optional

In [None]:
df_probabilities = pd.read_csv('./Datasets/pi_rating_probabilities.csv')
df_games = pd.read_csv('./Datasets/premier/full_df.csv')

In [None]:
df_probabilities.head()
df_games.describe()

In [None]:
BOOKIE = 'B365'

ODDS_BOOKIE_H = f"{BOOKIE}H"
ODDS_BOOKIE_D = f"{BOOKIE}D"
ODDS_BOOKIE_A = f"{BOOKIE}A"

P_BOOKIE_H = f"P_{BOOKIE}_H"
P_BOOKIE_D = f"P_{BOOKIE}_D"
P_BOOKIE_A = f"P_{BOOKIE}_A"

In [None]:
def get_probabilities(rating_diff: int) -> Tuple[float, float, float]:

  min_rating_diff = df_probabilities["BIN_LEFT"].min()
  max_rating_diff = df_probabilities["BIN_RIGHT"].max()

  if rating_diff < min_rating_diff:
    return (0, 0, 1)
  elif rating_diff > max_rating_diff:
    return (1, 0, 0)

  for _, row in df_probabilities.iterrows():
    bin_start, bin_end = row["BIN_LEFT"], row["BIN_RIGHT"]
    if bin_start < rating_diff <= bin_end:
      return (row["H_WINS"], row["DRAWS"], row["A_WINS"])

get_probabilities(0)

In [None]:
for index, row in df_games.iterrows():
  sum = 1 / row[ODDS_BOOKIE_H] + 1 / row[ODDS_BOOKIE_D] + 1 / row[ODDS_BOOKIE_A]
  df_games.loc[index, P_BOOKIE_H] = 1 / row[ODDS_BOOKIE_H] / sum
  df_games.loc[index, P_BOOKIE_D] = 1 / row[ODDS_BOOKIE_D] / sum
  df_games.loc[index, P_BOOKIE_A] = 1 / row[ODDS_BOOKIE_A] / sum

df_games.tail(10)

In [None]:
INITIAL_BANKROLL = 100
BANKROLL_HISTORY = [INITIAL_BANKROLL]

EV_THRESHOLD = 0

df_games["BET"] = ""
df_games["EV"] = 0
df_games["PROB"] = 0

for index, row in df_games.tail(3000).iterrows():
  rating_diff = row["RATING_DIFF"]
  p_H, p_D, p_A = get_probabilities(rating_diff)

  kelly_size_H = p_H * (row[ODDS_BOOKIE_H] - 1) - (1 - p_H)
  kelly_size_D = p_D * (row[ODDS_BOOKIE_D] - 1) - (1 - p_D)
  kelly_size_A = p_A * (row[ODDS_BOOKIE_A] - 1) - (1 - p_A)

  ev_H = kelly_size_H * (row[ODDS_BOOKIE_H] - 1) * p_H - kelly_size_H * (1 - p_H)
  ev_D = kelly_size_D * (row[ODDS_BOOKIE_D] - 1) * p_D - kelly_size_D * (1 - p_D)
  ev_A = kelly_size_A * (row[ODDS_BOOKIE_A] - 1) * p_A - kelly_size_A * (1 - p_A)

  if max(ev_H, ev_D, ev_A) > EV_THRESHOLD:
    if ev_H > ev_D and ev_H > ev_A:
      df_games.loc[index, "BET"] = "H"
      df_games.loc[index, "EV"] = ev_H
      df_games.loc[index, "PROB"] = p_H
      bet_size = kelly_size_H
    elif ev_D > ev_H and ev_D > ev_A:
      df_games.loc[index, "BET"] = "D"
      df_games.loc[index, "EV"] = ev_D
      df_games.loc[index, "PROB"] = p_D
      bet_size = kelly_size_D
    elif ev_A > ev_H and ev_A > ev_D:
      df_games.loc[index, "BET"] = "A"
      df_games.loc[index, "EV"] = ev_A
      df_games.loc[index, "PROB"] = p_A
      bet_size = kelly_size_A
  else:
    continue

  if df_games.loc[index, "BET"] == df_games.loc[index, "FTR"] and df_games.loc[index, "BET"] == "H":
    BANKROLL_HISTORY.append(BANKROLL_HISTORY[-1] + bet_size * (row[ODDS_BOOKIE_H] - 1))
  elif df_games.loc[index, "BET"] == df_games.loc[index, "FTR"] and df_games.loc[index, "BET"] == "D":
    BANKROLL_HISTORY.append(BANKROLL_HISTORY[-1] + bet_size * (row[ODDS_BOOKIE_D] - 1))
  elif df_games.loc[index, "BET"] == df_games.loc[index, "FTR"] and df_games.loc[index, "BET"] == "A":
    BANKROLL_HISTORY.append(BANKROLL_HISTORY[-1] + bet_size * (row[ODDS_BOOKIE_A] - 1))
  else:
    BANKROLL_HISTORY.append(BANKROLL_HISTORY[-1] - bet_size)

In [None]:
# plot bankroll history
plt.rcParams["figure.figsize"] = (12, 8)
plt.grid(True)
plt.title("Bankroll History")
plt.xlabel("Games")
plt.ylabel("Bankroll")
plt.plot(BANKROLL_HISTORY)

In [None]:
# Histogram of probabilities of bets won
plt.rcParams["figure.figsize"] = (12, 8)
plt.grid(True)
plt.title("Histogram of probabilities of bets won")
plt.xlabel("Probabilities")
plt.ylabel("Count")
plt.hist(df_games[df_games["BET"] == df_games["FTR"]]["PROB"])



In [None]:
# head of records with bets won
COLUMNS = ["HomeTeam", "AwayTeam", "BET", "FTR", "EV", "PROB", ODDS_BOOKIE_H, ODDS_BOOKIE_D, ODDS_BOOKIE_A]
df_games[df_games["BET"] == df_games["FTR"]][COLUMNS].tail(20)

In [None]:
# Bar chart of bets made

plt.rcParams["figure.figsize"] = (12, 8)
plt.grid(True)
plt.title("Bar chart of bets made")
plt.xlabel("Bets")
plt.ylabel("Count")
plt.bar(["H", "D", "A", ""], df_games["BET"].value_counts())

In [None]:
# Histogram of EVs in bets lost (ignore bets where BET = "")
plt.rcParams["figure.figsize"] = (12, 8)
plt.grid(True)
plt.title("Histogram of EVs in bets lost (ignore bets where BET = '')")
plt.xlabel("EV")
plt.ylabel("Count")
plt.hist(df_games[df_games["BET"] != "" & df_games["BET"] != df_games["FTR"]]["EV"])