In [51]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
from joblib import dump,load

mpl.rcParams['axes.grid'] = "True"
mpl.rcParams['axes.spines.top'] = "False"
mpl.rcParams['axes.spines.right'] = "False"
mpl.rcParams['xtick.labelsize'] = 20
mpl.rcParams['ytick.labelsize'] = 20

df_mls_matches = pd.read_csv("../../data/MLS_Matches_1823_clean.csv")
df_mls_matches = df_mls_matches[df_mls_matches['Season'] == 2022]
df_mls_matches.replace('Montreal Impact', 'CF MontrÃ©al',  inplace=True)
df_mls_matches.replace('CF Montréal', 'CF MontrÃ©al',  inplace=True)
df_mls_matches.replace('Dynamo FC', 'Dynamo',  inplace=True)
df_mls_matches = df_mls_matches.sort_values(by="Date")

all_teams = df_mls_matches["Team"].unique()

df_mls_odds = pd.read_csv("../../data/MLS_Matches_odds_clean.csv")
df_mls_odds = df_mls_odds[df_mls_odds['Season'] == 2022]

df_mls_odds = df_mls_odds.sort_values(by='Date')

df_mls_odds.tail(28)

Unnamed: 0.1,Unnamed: 0,Season,Date,Home Team,Away Team,Full Time Result,Industry Average Away Team Win Odds (American),Industry Average Draw Odds (American),Industry Average Home Team Win Odds (American)
413,413,2022,2022-10-06,Charlotte,Columbus Crew,D,155,222,177
368,368,2022,2022-10-09,Philadelphia,Toronto FC,H,679,453,-294
367,367,2022,2022-10-09,Orlando City,Columbus Crew,H,242,241,111
370,370,2022,2022-10-09,FC Dallas,Sporting KC,H,276,277,-112
372,372,2022,2022-10-09,Los Angeles FC,Nashville,A,331,327,-147
373,373,2022,2022-10-09,Minnesota Utd,Vancouver,H,309,315,-135
374,374,2022,2022-10-09,Real Salt Lake,Portland Timbers,H,358,323,-154
366,366,2022,2022-10-09,NY Red Bulls,Charlotte,H,436,291,-161
365,365,2022,2022-10-09,Inter Miami,CF MontrÃ©al,A,156,258,156
364,364,2022,2022-10-09,D.C. United,FC Cincinnati,A,-149,310,354


In [378]:
def calculate_winnings(odds, bet_amount=1):
    if np.isnan(odds):
        return 0
    elif (odds > 0):
        # Positive odds
        return (odds / 100) * bet_amount
    else:
        # Negative odds
        return (100 / abs(odds)) * bet_amount

In [379]:
# calculate_winnings(-114)

In [380]:
def split_data_odds(fixture):
    matches = fixture*14
    df1 = df_mls_odds.iloc[:matches, :]
    df2 = df_mls_odds.iloc[matches:matches+14, :]
    return df1, df2


In [381]:
def split_data_matches(fixture):
    matches = fixture*28
    df1 = df_mls_matches.iloc[:matches, :]
    df2 = df_mls_matches.iloc[matches:matches+28, :]
    return df1, df2



In [382]:
def calc_feats_match(df, team):
    team_stats = dict()
    df_team = df[(df["Team"]==team)]
    team_shots = df_team.apply(lambda x: x["Sh.2"], axis=1).sum()
    team_shots_ot = df_team.apply(lambda x: x["SoT"], axis=1).sum()
    team_shots_sca = df_team.apply(lambda x: x["SCA"], axis=1).sum()
    team_shots_gca = df_team.apply(lambda x: x["GCA"], axis=1).sum()
    team_shots_npxG = df_team.apply(lambda x: x["npxG"], axis=1).sum()
    team_shots_xA = df_team.apply(lambda x: x["xA"], axis=1).sum()
    team_points = df_team.apply(lambda x: 3 if (x["Result"].startswith("W")) else (1 if x["Result"].startswith("D") else 0), axis=1).sum()

    team_stats["team"] = team
    team_stats["shots"] = team_shots
    team_stats["shots_ot"] = team_shots_ot
    team_stats["sca"] = team_shots_sca
    team_stats["gca"] = team_shots_gca
    team_stats["npxG"] = team_shots_npxG
    team_stats["xA"] = team_shots_xA
    team_stats["points"] = team_points
    return team_stats



In [383]:
def calc_points(df, team, team_stats):
    df_team = df[(df["Home Team"]==team) | (df["Away Team"]==team)]
    team_points = df_team.apply(lambda x: 3 if ((x["Home Team"] == team and x["Full Time Result"].startswith("H")) or (x["Away Team"] == team and x["Full Time Result"].startswith("A"))) else (1 if x["Full Time Result"] == "D" else 0), axis=1).sum() 
    team_stats["points_2"] = team_points
    return team_stats

In [423]:
def get_match_day_results(match_day, ht, at, dt, hc, ac, dc):
    df_mls_fix , df_mls_match = split_data_matches(match_day)
    df_mls_odds_fix , df_mls_odds = split_data_odds(match_day)

    all_teams_stats = []

    for team in all_teams:
        team_stats = calc_feats_match(df_mls_fix, team)
        team_stats = calc_points(df_mls_odds_fix, team, team_stats)
        all_teams_stats.append(team_stats)

    df_all_stats = pd.DataFrame(all_teams_stats)

    from joblib import dump,load

    features = df_all_stats[["npxG","sca","gca","xA", "shots", "shots_ot"]]

    model = load('../../data/mls_model.joblib')

    model.predict(features)

    df_all_stats["exp_points"] = model.predict(features)
    df_all_stats["exp_points_diff"] = (df_all_stats["exp_points"] - df_all_stats["points"])

    df_mls_odds[["Home Team", "Away Team", "Industry Average Away Team Win Odds (American)", "Industry Average Draw Odds (American)", "Industry Average Home Team Win Odds (American)"]]

    predictions = []

    for ind, row in df_mls_odds.iterrows():
        home_team = row["Home Team"]
        away_team = row["Away Team"]
        home_exp_points = df_all_stats[ df_all_stats["team"]==home_team]["exp_points" ].values[0]
        away_exp_points = df_all_stats[ df_all_stats["team"]==away_team]["exp_points" ].values[0]

        exp_points_diff_teams = (home_exp_points - away_exp_points)
        if row["Full Time Result"].startswith("H"):
            ht.append(exp_points_diff_teams)
            hc.append(1)
        elif row["Full Time Result"].startswith("A"):
            at.append(exp_points_diff_teams)
            ac.append(1)
        else:
            dt.append(exp_points_diff_teams)
            dc.append(1)

        if exp_points_diff_teams > 5:
            result = "H"
            odds = row["Industry Average Home Team Win Odds (American)"]
        elif exp_points_diff_teams < -3:
            result = "A"
            odds = row["Industry Average Away Team Win Odds (American)"]
            # result = None
            # odds = None
        else:
            result = None
            odds = None
        # else:
        #     result = "D"
        #     odds = row["Industry Average Draw Odds (American)"]

        pred = {"Home Team": home_team, "Away Team": away_team, "Prediction": result, "Odds": odds, "EXP": exp_points_diff_teams}
        predictions.append(pred)

    df_slip = pd.DataFrame(predictions)

    df_slip = df_mls_odds.merge(df_slip, on=["Home Team", "Away Team"])[["Home Team", "Away Team", "Prediction", "Odds", "Full Time Result", "EXP"]]

    vectorized_calculate_winnings = np.vectorize(calculate_winnings)

    conditions = [
    (df_slip['Prediction'] == df_slip['Full Time Result']),  # Prediction matches result
    (df_slip['Prediction'].isna())  # No prediction
    ]

    # Choices corresponding to each condition
    choices = [
    vectorized_calculate_winnings(df_slip['Odds'], 1),  # Calculate winnings
    0  # Set to 0 for no prediction
    ]

    df_slip['Win'] = np.select(conditions, choices, default=-1)

    # df_slip["Win"] = np.where( df_slip["Prediction"]==df_slip["Full Time Result"], calculate_winnings(df_slip["Odds"], 1), -1)
    return {"sum": df_slip["Win"].sum(), "ht": ht, "hc": hc, "at": at, "ac": ac, "dt": dt, "dc": dc}

In [424]:
summ = 0
lower_bound = 10
upper_bound = 34

bound_dif = upper_bound - lower_bound

average = (bound_dif * 28) if bound_dif > 0 else 28

home_total = []
away_total = []
draw_total = []

home_count = []
away_count = []
draw_count = []

test = []
for x in range(lower_bound, upper_bound):
    result = get_match_day_results(x, home_total, away_total, draw_total, home_count, away_count, draw_count)
    summ += result["sum"]

print(np.median(home_total), np.median(away_total), np.median(draw_total))
print(summ, np.sum(home_total) / len(home_count), np.sum(away_total) / len(away_count), np.sum(draw_total) / len(draw_count))

0.8405210231032356 -2.462767579899346 0.9655945704198414
6.968455679263236 0.8058899016805292 -2.799492147088048 1.3069772150686696


In [386]:
df_mls_fix4 , df_mls_match10 = split_data_matches(33)
df_mls_odds_fix4 , df_mls_odds_10 = split_data_odds(33)

print(df_mls_fix4)

all_teams_stats = []

for team in all_teams:
    team_stats = calc_feats_match(df_mls_fix4, team)
    team_stats = calc_points(df_mls_odds_fix4, team, team_stats)
    all_teams_stats.append(team_stats)

df_all_stats = pd.DataFrame(all_teams_stats)


      Unnamed: 0             Team               Opp  Season  npxG  SCA  GCA  \
3222        3222    Minnesota Utd      Philadelphia    2022   1.1   25    2   
3226        3226  Colorado Rapids    Los Angeles FC    2022   0.4   12    0   
3225        3225      D.C. United         Charlotte    2022   0.8   17    3   
3224        3224     Philadelphia     Minnesota Utd    2022   1.1   36    2   
3223        3223      New England  Portland Timbers    2022   1.2   25    3   
...          ...              ...               ...     ...   ...  ...  ...   
3523        3523        FC Dallas       Sporting KC    2022   1.3   30    4   
3521        3521            NYCFC       Atlanta Utd    2022   1.4   25    3   
3520        3520         San Jose           Seattle    2022   1.5   22    3   
3519        3519   Los Angeles FC         Nashville    2022   3.7   55    0   
3518        3518      Atlanta Utd             NYCFC    2022   1.5   20    2   

      GF  Sh.2   xA  SoT Result        Date  
3222 

In [387]:


features = df_all_stats[["npxG","sca","gca","xA", "shots", "shots_ot"]]
features
model = load('../../data/mls_model.joblib')

model.predict(features)

array([45.33441227, 44.52242544, 42.27886866, 59.30257774, 48.97962201,
       50.64091691, 41.36344468, 56.75549277, 66.33662235, 45.74038438,
       56.94536668, 40.81253512, 41.51623291, 56.60033823, 42.32255835,
       54.47476822, 39.99883381, 41.80683801, 40.68801929, 42.0572002 ,
       47.84374498, 56.05487073, 48.29649956, 38.24448169, 59.56659911,
       36.54703712, 39.25468695, 46.31276603])

In [388]:
df_all_stats["exp_points"] = model.predict(features)
df_all_stats["exp_points_diff"] = (df_all_stats["exp_points"] - df_all_stats["points"])
df_all_stats

Unnamed: 0,team,shots,shots_ot,sca,gca,npxG,xA,points,points_2,exp_points,exp_points_diff
0,Minnesota Utd,375,120,652,75,37.4,29.7,42,45,45.334412,3.334412
1,Colorado Rapids,456,145,828,80,47.7,35.9,43,42,44.522425,1.522425
2,D.C. United,357,108,617,67,34.3,28.7,27,27,42.278869,15.278869
3,Philadelphia,401,152,705,108,47.7,36.5,61,64,59.302578,-1.697422
4,New England,431,145,791,87,40.5,36.7,42,41,48.979622,6.979622
5,NY Red Bulls,440,141,731,78,37.1,26.6,50,50,50.640917,0.640917
6,Vancouver,386,122,693,69,34.2,31.6,40,43,41.363445,1.363445
7,Los Angeles FC,489,174,840,101,53.2,35.0,64,67,56.755493,-7.244507
8,Austin,446,141,764,108,49.9,41.8,56,55,66.336622,10.336622
9,Toronto FC,364,133,651,82,36.0,32.4,34,34,45.740384,11.740384


In [389]:
df_mls_odds_10[["Home Team", "Away Team", "Industry Average Away Team Win Odds (American)", "Industry Average Draw Odds (American)", "Industry Average Home Team Win Odds (American)"]]

predictions = []
for ind, row in df_mls_odds_10.iterrows():
    home_team = row["Home Team"]
    away_team = row["Away Team"]
    home_exp_points = df_all_stats[ df_all_stats["team"]==home_team]["exp_points" ].values[0]
    away_exp_points = df_all_stats[ df_all_stats["team"]==away_team]["exp_points" ].values[0]

    exp_points_diff_teams = (home_exp_points - away_exp_points)
    print(exp_points_diff_teams)

    if exp_points_diff_teams > 2:
        result = "H"
        odds = row["Industry Average Home Team Win Odds (American)"]
    elif abs(exp_points_diff_teams) < 2:
        result = "D"
        odds = row["Industry Average Draw Odds (American)"]
    else:
        result = "A"
        odds = row["Industry Average Away Team Win Odds (American)"]

    pred = {"Home Team": home_team, "Away Team": away_team, "Prediction": result, "Odds": odds}
    predictions.append(pred)

df_slip = pd.DataFrame(predictions)

df_slip

13.562193366074368
-5.624746741791235
4.969195789018791
8.458993205946832
3.9709675829447164
-17.34565128073568
10.642083101611043
-17.244040761688957
-14.666498025825987
-8.167086892157378
21.81419690310154
-17.810389037082864
-12.667930214442684
5.7865447788799145


Unnamed: 0,Home Team,Away Team,Prediction,Odds
0,Philadelphia,Toronto FC,H,-294
1,Orlando City,Columbus Crew,A,242
2,FC Dallas,Sporting KC,H,-112
3,Los Angeles FC,Nashville,H,-147
4,Minnesota Utd,Vancouver,H,-135
5,Real Salt Lake,Portland Timbers,A,358
6,NY Red Bulls,Charlotte,H,-161
7,Inter Miami,CF MontrÃ©al,A,156
8,D.C. United,FC Cincinnati,A,-149
9,Chicago Fire,New England,A,178


In [390]:
df_slip = df_mls_odds_10.merge(df_slip, on=["Home Team", "Away Team"])[["Home Team", "Away Team", "Prediction", "Odds", "Full Time Result"]]

df_slip

Unnamed: 0,Home Team,Away Team,Prediction,Odds,Full Time Result
0,Philadelphia,Toronto FC,H,-294,H
1,Orlando City,Columbus Crew,A,242,H
2,FC Dallas,Sporting KC,H,-112,H
3,Los Angeles FC,Nashville,H,-147,A
4,Minnesota Utd,Vancouver,H,-135,H
5,Real Salt Lake,Portland Timbers,A,358,H
6,NY Red Bulls,Charlotte,H,-161,H
7,Inter Miami,CF MontrÃ©al,A,156,A
8,D.C. United,FC Cincinnati,A,-149,A
9,Chicago Fire,New England,A,178,D


In [391]:
df_slip["Win"] = np.where( df_slip["Prediction"]==df_slip["Full Time Result"], calculate_winnings(df_slip["Odds"], 1), -1)

df_slip

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [None]:
sumWinnings = df_slip["Win"].sum()

sumWinnings


-15.000000000000002