In [192]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl

mpl.rcParams['axes.grid'] = "True"
mpl.rcParams['axes.spines.top'] = "False"
mpl.rcParams['axes.spines.right'] = "False"
mpl.rcParams['xtick.labelsize'] = 20
mpl.rcParams['ytick.labelsize'] = 20


df_mls = pd.read_csv("../../data/MLS_1823_clean.csv")
df_mls.replace('Montreal Impact', 'CF MontrÃ©al',  inplace=True)
df_mls = df_mls[df_mls['Season'] == 2023]

df_mls_matches = pd.read_csv("../../data/MLS_Matches_1823_clean.csv")
df_mls_matches = df_mls_matches[df_mls_matches['Season'] == 2022]
df_mls_matches.replace('Montreal Impact', 'CF MontrÃ©al',  inplace=True)
df_mls_matches.replace('CF Montréal', 'CF MontrÃ©al',  inplace=True)
df_mls_matches.replace('Dynamo FC', 'Dynamo',  inplace=True)
df_mls_matches = df_mls_matches.sort_values(by="Date")

all_teams = df_mls_matches["Team"].unique()

df_mls_odds = pd.read_csv("../../data/MLS_Matches_odds_clean.csv")
df_mls_odds = df_mls_odds[df_mls_odds['Season'] == 2022]

df_mls_odds = df_mls_odds.sort_values(by='Date')

df_mls_odds.tail(28)

Unnamed: 0.1,Unnamed: 0,Season,Date,Home Team,Away Team,Full Time Result,Industry Average Away Team Win Odds (American),Industry Average Draw Odds (American),Industry Average Home Team Win Odds (American)
413,413,2022,2022-10-06,Charlotte,Columbus Crew,D,155,222,177
368,368,2022,2022-10-09,Philadelphia,Toronto FC,H,679,453,-294
367,367,2022,2022-10-09,Orlando City,Columbus Crew,H,242,241,111
370,370,2022,2022-10-09,FC Dallas,Sporting KC,H,276,277,-112
372,372,2022,2022-10-09,Los Angeles FC,Nashville,A,331,327,-147
373,373,2022,2022-10-09,Minnesota Utd,Vancouver,H,309,315,-135
374,374,2022,2022-10-09,Real Salt Lake,Portland Timbers,H,358,323,-154
366,366,2022,2022-10-09,NY Red Bulls,Charlotte,H,436,291,-161
365,365,2022,2022-10-09,Inter Miami,CF MontrÃ©al,A,156,258,156
364,364,2022,2022-10-09,D.C. United,FC Cincinnati,A,-149,310,354


In [193]:
def split_data_odds(fixture):
    matches = fixture*14
    df1 = df_mls_odds.iloc[:matches, :]
    df2 = df_mls_odds.iloc[matches:matches+14, :]
    return df1, df2


In [194]:
def split_data_matches(fixture):
    matches = fixture*28
    df1 = df_mls_matches.iloc[:matches, :]
    df2 = df_mls_matches.iloc[matches:matches+28, :]
    return df1, df2



In [195]:
def calc_feats_match(df, team):
    team_stats = dict()
    df_team = df[(df["Team"]==team)]
    team_shots = df_team.apply(lambda x: x["Sh.2"], axis=1).sum()
    team_shots_ot = df_team.apply(lambda x: x["SoT"], axis=1).sum()
    team_shots_sca = df_team.apply(lambda x: x["SCA"], axis=1).sum()
    team_shots_gca = df_team.apply(lambda x: x["GCA"], axis=1).sum()
    team_shots_npxG = df_team.apply(lambda x: x["npxG"], axis=1).sum()
    team_shots_xA = df_team.apply(lambda x: x["xA"], axis=1).sum()
    team_points = df_team.apply(lambda x: 3 if (x["Result"].startswith("W")) else (1 if x["Result"].startswith("D") else 0), axis=1).sum()

    team_stats["team"] = team
    team_stats["shots"] = team_shots
    team_stats["shots_ot"] = team_shots_ot
    team_stats["sca"] = team_shots_sca
    team_stats["gca"] = team_shots_gca
    team_stats["npxG"] = team_shots_npxG
    team_stats["xA"] = team_shots_xA
    team_stats["points"] = team_points
    return team_stats



In [196]:
def calc_points(df, team, team_stats):
    df_team = df[(df["Home Team"]==team) | (df["Away Team"]==team)]
    team_points = df_team.apply(lambda x: 3 if ((x["Home Team"] == team and x["Full Time Result"].startswith("H")) or (x["Away Team"] == team and x["Full Time Result"].startswith("A"))) else (1 if x["Full Time Result"] == "D" else 0), axis=1).sum() 
    team_stats["points_2"] = team_points
    return team_stats

In [245]:
df_mls_fix4 , df_mls_match10 = split_data_matches(5)
df_mls_odds_fix4 , df_mls_odds_10 = split_data_odds(5)


all_teams_stats = []

for team in all_teams:
    team_stats = calc_feats_match(df_mls_fix4, team)
    team_stats = calc_points(df_mls_odds_fix4, team, team_stats)
    all_teams_stats.append(team_stats)

df_all_stats = pd.DataFrame(all_teams_stats)


In [246]:
from joblib import dump,load

print(df_all_stats)
features = df_all_stats[["npxG","sca","gca","xA", "shots", "shots_ot"]]

# print(features)
model = load('../../data/mls_model.joblib')

model.predict(features)

                team  shots  shots_ot  sca  gca  npxG   xA  points  points_2
0      Minnesota Utd     50        15   86    7   5.0  3.7       5         8
1    Colorado Rapids     49        17   83   10   7.3  4.7       8         7
2        D.C. United     56        16   98    6   4.6  3.5       6         6
3       Philadelphia     60        23  101   11   7.2  5.4      13        13
4        New England     66        23  120   10   5.4  5.2       4         4
5       NY Red Bulls     73        32  123   14   9.0  6.4      10        10
6          Vancouver     53         7   90    5   4.6  3.4       4         4
7     Los Angeles FC     74        27  120   16   8.7  4.8      13        13
8             Austin     72        22  123   18   8.8  8.0       8         8
9         Toronto FC     39        20   68   11   5.4  5.2       7         7
10     FC Cincinnati     83        28  149   14  10.4  7.9       6         6
11      Chicago Fire     47        13   81   10   4.4  3.4       8         9

array([ 4.33345053,  5.28500609,  3.93327547,  5.7913338 ,  5.47978219,
        6.54867438,  5.04401669,  9.22832922, 11.14492837,  5.02530962,
        7.14867121,  6.59223754,  6.06925016,  6.71756557,  3.95928523,
        7.28846562,  5.86987681,  4.9176597 ,  6.74094422,  4.1669554 ,
        8.75353849,  4.64427523,  4.22314817,  6.03660233,  9.02654783,
        3.22093224,  7.41120037,  6.2790443 ])

In [247]:
df_all_stats["exp_points"] = model.predict(features)
df_all_stats["exp_points_diff"] = (df_all_stats["exp_points"] - df_all_stats["points"])
df_all_stats

Unnamed: 0,team,shots,shots_ot,sca,gca,npxG,xA,points,points_2,exp_points,exp_points_diff
0,Minnesota Utd,50,15,86,7,5.0,3.7,5,8,4.333451,-0.666549
1,Colorado Rapids,49,17,83,10,7.3,4.7,8,7,5.285006,-2.714994
2,D.C. United,56,16,98,6,4.6,3.5,6,6,3.933275,-2.066725
3,Philadelphia,60,23,101,11,7.2,5.4,13,13,5.791334,-7.208666
4,New England,66,23,120,10,5.4,5.2,4,4,5.479782,1.479782
5,NY Red Bulls,73,32,123,14,9.0,6.4,10,10,6.548674,-3.451326
6,Vancouver,53,7,90,5,4.6,3.4,4,4,5.044017,1.044017
7,Los Angeles FC,74,27,120,16,8.7,4.8,13,13,9.228329,-3.771671
8,Austin,72,22,123,18,8.8,8.0,8,8,11.144928,3.144928
9,Toronto FC,39,20,68,11,5.4,5.2,7,7,5.02531,-1.97469


In [248]:
df_mls_odds_10[["Home Team", "Away Team", "Industry Average Away Team Win Odds (American)", "Industry Average Draw Odds (American)", "Industry Average Home Team Win Odds (American)"]]

predictions = []
for ind, row in df_mls_odds_10.iterrows():
    home_team = row["Home Team"]
    away_team = row["Away Team"]
    home_exp_points = df_all_stats[ df_all_stats["team"]==home_team]["exp_points" ].values[0]
    away_exp_points = df_all_stats[ df_all_stats["team"]==away_team]["exp_points" ].values[0]

    exp_points_diff_teams = (home_exp_points - away_exp_points)
    print(exp_points_diff_teams)

    if exp_points_diff_teams > 1.5:
        result = "H"
        odds = row["Industry Average Home Team Win Odds (American)"]
    elif abs(exp_points_diff_teams) < 1.5:
        result = "D"
        odds = row["Industry Average Draw Odds (American)"]
    else:
        result = "A"
        odds = row["Industry Average Away Team Win Odds (American)"]

    pred = {"Home Team": home_team, "Away Team": away_team, "Prediction": result, "Odds": odds}
    predictions.append(pred)

df_slip = pd.DataFrame(predictions)

df_slip

-2.0773170948056707
-2.1261942768091613
2.0732903484294125
0.14870668125332553
-1.5204969581341174
-2.4778734450089894
-0.4877104979004221
-4.584053997845275
-1.6735488845841955
-1.0022159341228676
-1.251863292763601
0.784244066731266
2.385890746667065
-2.883661679378264


Unnamed: 0,Home Team,Away Team,Prediction,Odds
0,Inter Miami,Dynamo,A,216
1,Colorado Rapids,Real Salt Lake,A,474
2,Portland Timbers,LA Galaxy,H,118
3,Orlando City,Chicago Fire,D,244
4,Inter Miami,New England,A,128
5,NY Red Bulls,CF MontrÃ©al,A,452
6,Philadelphia,Columbus Crew,D,224
7,LA Galaxy,Los Angeles FC,A,158
8,Vancouver,Portland Timbers,A,153
9,Sporting KC,Nashville,D,198


In [249]:
df_slip = df_mls_odds_10.merge(df_slip, on=["Home Team", "Away Team"])[["Home Team", "Away Team", "Prediction", "Odds", "Full Time Result"]]

df_slip

Unnamed: 0,Home Team,Away Team,Prediction,Odds,Full Time Result
0,Inter Miami,Dynamo,A,216,A
1,Colorado Rapids,Real Salt Lake,A,474,D
2,Portland Timbers,LA Galaxy,H,118,A
3,Orlando City,Chicago Fire,D,244,H
4,Inter Miami,New England,A,128,H
5,NY Red Bulls,CF MontrÃ©al,A,452,A
6,Philadelphia,Columbus Crew,D,224,H
7,LA Galaxy,Los Angeles FC,A,158,H
8,Vancouver,Portland Timbers,A,153,A
9,Sporting KC,Nashville,D,198,A


In [250]:
def calculate_winnings(odds, bet_amount=1):
    print(type(odds))
    if (odds > 0).any():
        # Positive odds
        winnings = (odds / 100) * bet_amount
    else:
        # Negative odds
        winnings = (100 / abs(odds)) * bet_amount
    return winnings

In [251]:
df_slip["Win"] = np.where( df_slip["Prediction"]==df_slip["Full Time Result"], calculate_winnings(df_slip["Odds"], 1), -1)

df_slip

<class 'pandas.core.series.Series'>


Unnamed: 0,Home Team,Away Team,Prediction,Odds,Full Time Result,Win
0,Inter Miami,Dynamo,A,216,A,2.16
1,Colorado Rapids,Real Salt Lake,A,474,D,-1.0
2,Portland Timbers,LA Galaxy,H,118,A,-1.0
3,Orlando City,Chicago Fire,D,244,H,-1.0
4,Inter Miami,New England,A,128,H,-1.0
5,NY Red Bulls,CF MontrÃ©al,A,452,A,4.52
6,Philadelphia,Columbus Crew,D,224,H,-1.0
7,LA Galaxy,Los Angeles FC,A,158,H,-1.0
8,Vancouver,Portland Timbers,A,153,A,1.53
9,Sporting KC,Nashville,D,198,A,-1.0


In [252]:
sumWinnings = df_slip["Win"].sum()

sumWinnings


-2.79