In [1109]:
import numpy as np
import pandas as pd
import geopandas as gpd
import datetime
from geopy.distance import geodesic
ROOT = '/home/robert/.config/JetBrains/DataSpell2021.3/projects/MLS'
INITIAL_ELO = 1400
K = 20
HOME_ADV = 90

In [1110]:
AllYears = pd.read_csv(f'{ROOT}/Results/AllYears.csv', index_col=0)
Schedule = pd.read_csv(f'{ROOT}/Results/2022Schedule.csv')
AllYears.drop(['Venue'], axis=1, inplace=True)
AllYears = AllYears.sort_values(by=['Date'])
AllYears['EloDiff'] = np.nan

In [1111]:
teams = {'Atlanta Utd': ['ATL', 'Atlanta'],
         'Austin FC': ['ATX', 'Austin'],
         'CF Montréal': ['MTL', 'Montreal'],
         'CF Montreal': ['MTL', 'Montreal'],
         'Charlotte FC': ['CLT', 'Charlotte'],
         'Chicago Fire': ['CHI', 'Chicago'],
         'Chivas USA': ['CHV', 'Los Angeles'],
         'Colorado Rapids': ['COL', 'Denver'],
         'Columbus Crew': ['CLB', 'Columbus'],
         'D.C. United': ['DC', 'Washington DC'],
         'Dallas Burn': ['DAL', 'Dallas'],
         'FC Cincinnati': ['CIN', 'Cincinnati'],
         'FC Dallas': ['DAL', 'Dallas'],
         'Houston Dynamo': ['HOU', 'Houston'],
         'Inter Miami': ['MIA', 'Miami'],
         'KC Wiz': ['SKC', 'Kansas City'],
         'KC Wizards': ['SKC', 'Kansas City'],
         'LA Galaxy': ['LA', 'Los Angeles'],
         'Los Angeles FC': ['LAFC', 'Los Angeles'],
         'MetroStars': ['RBNY', 'New York'],
         'Miami Fusion': ['MIAF', 'Miami'],
         'Minnesota Utd': ['MIN', 'Saint Paul'],
         'Montreal Impact': ['MTL', 'Montreal Impact'],
         'NY Red Bulls': ['RBNY', 'New York'],
         'NYCFC': ['NYC', 'New York'],
         'Nashville': ['NSH', 'Nashville'],
         'New England': ['NE', 'Foxborough'],
         'Orlando City': ['ORL', 'Orlando'],
         'Philadelphia': ['PHI', 'Chester'],
         'Portland Timbers': ['POR', 'Portland'],
         'Real Salt Lake': ['RSL', 'Sandy'],
         'San Jose': ['SJ', 'San Jose'],
         'Seattle': ['SEA', 'Seattle'],
         'Sporting KC': ['SKC', 'Kansas City'],
         'Tampa Bay': ['TB', 'Tampa Bay'],
         'Toronto FC': ['TOR', 'Toronto'],
         'Vancouver': ['VAN', 'Vancouver']
             }
elo = {}

In [1112]:
def get_distance(city1, city2):
    def get_lat_long(city):
        lat = gpd.tools.geocode(city).geometry.y.values[0]
        long = gpd.tools.geocode(city).geometry.x.values[0]
        return (lat, long)
    return int(geodesic(get_lat_long(city1), get_lat_long(city2)).mi)

## Elo Functions

In [1113]:
def win_chance(elo1, elo2):
    """Given elo1 and elo2, will return the chance of elo1 winning"""
    return  1 / ( 1 + 10**((elo2-elo1)/400) )

def get_g(score1, score2):
    if score2 > score1:
        score1, score2 = score2, score1
    if score1 - score2 <=1:
        return 1
    elif score1 - score2 == 2:
        return 1.5
    else:
        return (11 + score1 - score2) / 8

def update_elo(home_elo, away_elo, home_score, away_score):
    G = get_g(home_score, away_score)
    if home_score==away_score:
        W = 0.5
    elif home_score > away_score:
        W = 1
    else:
        W = 0
    We = win_chance(home_elo + HOME_ADV, away_elo)
    elo_change = K * G * (W - We)
    home_elo += elo_change
    away_elo -= elo_change
    return int(round(home_elo, 0)), int(round(away_elo, 0))

In [1114]:
for i in AllYears.index:
    AllYears.at[i, 'Home'] = teams[AllYears.at[i, 'Home']][0]
    AllYears.at[i, 'Away'] = teams[AllYears.at[i, 'Away']][0]
    if AllYears.at[i, 'Winner'] == AllYears.at[i, 'Winner']:
        AllYears.at[i, 'Winner'] = teams[AllYears.at[i, 'Winner']][0]
    else:
        AllYears.at[i, 'Winner'] = 'Draw'
    if AllYears.at[i, 'Home'] not in elo:
        elo[AllYears.at[i, 'Home']]=[INITIAL_ELO, AllYears.at[i, 'Season']]
    if AllYears.at[i, 'Away'] not in elo:
        elo[AllYears.at[i, 'Away']]=[INITIAL_ELO, AllYears.at[i, 'Season']]

In [1115]:
def get_regular_season(team_code):
    home_games = AllYears[AllYears.Home==team_code][AllYears[AllYears.Home==team_code].Round=='Regular Season']
    away_games = AllYears[AllYears.Away==team_code][AllYears[AllYears.Away==team_code].Round=='Regular Season']
    return pd.concat([home_games, away_games]).sort_values(by=['Date'])

In [1116]:
def get_overall_record(team_code):
    df = get_regular_season(team_code)
    wins = len(df[df.Winner==team_code])
    draws = len(df[df.Winner=='Draw'])
    losses = len(df) - wins - draws
    return wins, draws, losses

In [1117]:
def regress_elo(elo):
    return int(round(elo - (elo - INITIAL_ELO)/3, 0))

In [1118]:
for i in AllYears.index:
    # Define some variables
    season = AllYears.at[i, 'Season']
    home_code = AllYears.at[i, 'Home']
    home_score = AllYears.at[i, 'GHome']
    away_code = AllYears.at[i, 'Away']
    away_score = AllYears.at[i, 'GAway']

    # Check for new Season
    if season != elo[home_code][1]:
        elo[home_code][0] = regress_elo(elo[home_code][0])
        elo[home_code][1] = season
    if season != elo[away_code][1]:
        elo[home_code][0] = regress_elo(elo[home_code][0])
        elo[home_code][1] = season

    # Add current Elos to df
    AllYears.at[i, 'HomeElo'] = elo[home_code][0]
    AllYears.at[i, 'AwayElo'] = elo[away_code][0]
    AllYears.at[i, 'EloDiff'] = elo[home_code][0] - elo[away_code][0]

    # Update Elos
    new_home_elo, new_away_elo = update_elo(elo[home_code][0], elo[away_code][0], home_score, away_score)
    elo[home_code][0]= new_home_elo
    elo[away_code][0]= new_away_elo

## All-time Rolling Elos

Eventually, I will regress the teams to the mean towards the end of the season but for now, a rough this will represent a rough model to further work on some features. Mainly, creating a criteria in which the model will predict draws

In [1119]:
AllYears.HomeElo = AllYears.HomeElo.astype('int64')
AllYears.AwayElo = AllYears.AwayElo.astype('int64')
AllYears.EloDiff = AllYears.EloDiff.astype('int64')
AllYears

Unnamed: 0,Season,Round,Date,Home,GHome,GAway,Away,Winner,EloDiff,HomeElo,AwayElo
6848,1996,Regular Season,1996-04-06,SJ,1,0,DC,SJ,0,1400,1400
6849,1996,Regular Season,1996-04-13,LA,2,1,RBNY,LA,0,1400,1400
6850,1996,Regular Season,1996-04-13,TB,3,2,NE,TB,0,1400,1400
6851,1996,Regular Season,1996-04-13,SKC,3,0,COL,SKC,0,1400,1400
6852,1996,Regular Season,1996-04-13,CLB,4,0,DC,CLB,7,1400,1393
...,...,...,...,...,...,...,...,...,...,...,...
6844,2022,Regular Season,2022-02-27,ATL,3,1,SKC,ATL,-49,1406,1455
6845,2022,Regular Season,2022-02-27,ORL,2,0,MTL,ORL,23,1401,1378
6847,2022,Regular Season,2022-02-27,SEA,0,1,NSH,NSH,-56,1431,1487
6843,2022,Regular Season,2022-02-27,HOU,0,0,RSL,Draw,-42,1349,1391


In [1120]:
# Draw %
PostDraws = AllYears[AllYears.Season>=2000]
Draws = PostDraws[PostDraws.Winner=='Draw']
len(Draws) / len(PostDraws)

0.246286535697173

In [1121]:
# Home Team Win%
len(PostDraws[PostDraws.Home==PostDraws.Winner]) / len(PostDraws)

0.5067880530266731

In [1122]:
# Home Team Loss%
len(PostDraws[PostDraws.Away==PostDraws.Winner]) / len(PostDraws)

0.24692541127615397

In [1123]:
Predictions = AllYears[AllYears.Season >= 1998].copy(deep=True)
Predictions['Prediction']=np.nan
Predictions

Unnamed: 0,Season,Round,Date,Home,GHome,GAway,Away,Winner,EloDiff,HomeElo,AwayElo,Prediction
173,1998,Regular Season,1998-03-15,MIAF,0,2,DC,DC,-93,1400,1493,
175,1998,Regular Season,1998-03-21,DC,3,2,SKC,DC,59,1448,1389,
177,1998,Regular Season,1998-03-21,MIAF,0,2,CHI,CHI,-15,1385,1400,
174,1998,Regular Season,1998-03-21,LA,3,3,SJ,LA,66,1418,1352,
176,1998,Regular Season,1998-03-21,DAL,1,1,COL,DAL,-2,1399,1401,
...,...,...,...,...,...,...,...,...,...,...,...,...
6844,2022,Regular Season,2022-02-27,ATL,3,1,SKC,ATL,-49,1406,1455,
6845,2022,Regular Season,2022-02-27,ORL,2,0,MTL,ORL,23,1401,1378,
6847,2022,Regular Season,2022-02-27,SEA,0,1,NSH,NSH,-56,1431,1487,
6843,2022,Regular Season,2022-02-27,HOU,0,0,RSL,Draw,-42,1349,1391,


In [1124]:
for i in Predictions.index:
    if Predictions.at[i, 'HomeElo'] >= Predictions.at[i, 'AwayElo']:
        Predictions.at[i, 'Prediction'] = Predictions.at[i, 'Home']
    else:
        Predictions.at[i, 'Prediction'] = Predictions.at[i, 'Away']

In [1125]:
NonDraws =  Predictions[Predictions.Winner != 'Draw']
NonDraws

Unnamed: 0,Season,Round,Date,Home,GHome,GAway,Away,Winner,EloDiff,HomeElo,AwayElo,Prediction
173,1998,Regular Season,1998-03-15,MIAF,0,2,DC,DC,-93,1400,1493,DC
175,1998,Regular Season,1998-03-21,DC,3,2,SKC,DC,59,1448,1389,DC
177,1998,Regular Season,1998-03-21,MIAF,0,2,CHI,CHI,-15,1385,1400,CHI
174,1998,Regular Season,1998-03-21,LA,3,3,SJ,LA,66,1418,1352,LA
176,1998,Regular Season,1998-03-21,DAL,1,1,COL,DAL,-2,1399,1401,COL
...,...,...,...,...,...,...,...,...,...,...,...,...
6834,2022,Regular Season,2022-02-26,LAFC,3,0,COL,LAFC,-64,1407,1471,COL
6844,2022,Regular Season,2022-02-27,ATL,3,1,SKC,ATL,-49,1406,1455,SKC
6845,2022,Regular Season,2022-02-27,ORL,2,0,MTL,ORL,23,1401,1378,ORL
6847,2022,Regular Season,2022-02-27,SEA,0,1,NSH,NSH,-56,1431,1487,NSH


## Model Accuracy

In [1126]:
print(f'{round(len(NonDraws[NonDraws.Winner==NonDraws.Prediction])/len(NonDraws) * 100, 2)}%')

57.53%


This shows the model is somewhat effective. The Elo model is currently not accounting for rest time or distance travelled.

## Making Prediction

In [1127]:
def money_line(win_odds):
    if win_odds >= .5:
        return int(round((100 * win_odds) / (win_odds - 1), 0))
    else:
        return int(round(-(100 * (win_odds - 1)) / win_odds, 0))

In [1128]:
def get_odds(home_code, away_code):
    home_win_odds = win_chance(elo[home_code][0], elo[away_code][0])
    away_win_odds = 1 - home_win_odds
    return money_line(home_win_odds), money_line(away_win_odds)

In [1129]:
def profit(line, amount_wagered):
    if line > 0:
        return round(amount_wagered * line/100, 2)
    if line < 0:
        return round(-1 * amount_wagered * 100/line, 2)

In [1130]:
get_odds('SKC', 'HOU')

(-172, 172)

In [1131]:
profit(-525, 100)

19.05

In [1132]:
profit(-105, 100)

95.24

Like traditional American betting lines, negative odds indicate the favorite. A negative amount represents how much you have to bet to win $100 (i.e.  a $100 bet on a -525 line returns $19.05 + $100). A positive line means the team is not the favorite and the line represents the amount of profit on a $100 bet (i.e. a $100 bet on a +200 line returns $200 + $100).

In [1133]:
Schedule.dropna(inplace=True)
Schedule['ModelHome'] = np.nan
Schedule['ModelAway'] = np.nan
Schedule['HomeDiff'] = np.nan
Schedule['AwayDiff'] = np.nan

In [1134]:
for i in Schedule.index:
    Schedule.at[i, 'Home'] = teams[Schedule.at[i, 'Home']][0]
    Schedule.at[i, 'Away'] = teams[Schedule.at[i, 'Away']][0]
    Schedule.at[i, 'ModelHome'], Schedule.at[i, 'ModelAway'] = get_odds(Schedule.at[i, 'Home'], Schedule.at[i, 'Away'])
    Schedule.at[i, 'HomeDiff'] = Schedule.at[i, 'DraftKingsHome'] - Schedule.at[i, 'ModelHome']
    Schedule.at[i, 'AwayDiff'] = Schedule.at[i, 'DraftKingsAway'] - Schedule.at[i, 'ModelAway']

In [1135]:
columns = ['Day', 'Date', 'Home', 'DraftKingsHome', 'ModelHome', 'Away', 'DraftKingsAway', 'ModelAway', 'HomeDiff', 'AwayDiff']
Schedule = Schedule[columns]
Schedule

Unnamed: 0,Day,Date,Home,DraftKingsHome,ModelHome,Away,DraftKingsAway,ModelAway,HomeDiff,AwayDiff
0,Sat,2022-03-05,NE,-525.0,-250.0,DAL,370.0,250.0,-275.0,120.0
1,Sat,2022-03-05,TOR,-120.0,225.0,RBNY,-105.0,-225.0,-345.0,120.0
2,Sat,2022-03-05,SKC,-260.0,-172.0,HOU,205.0,172.0,-88.0,33.0
3,Sat,2022-03-05,MTL,-135.0,139.0,PHI,110.0,-139.0,-274.0,249.0
4,Sat,2022-03-05,SJ,-110.0,137.0,CLB,-110.0,-137.0,-247.0,27.0
5,Sat,2022-03-05,VAN,165.0,156.0,NYC,-210.0,-156.0,9.0,-54.0
6,Sat,2022-03-05,RSL,100.0,117.0,SEA,-120.0,-117.0,-17.0,-3.0
7,Sat,2022-03-05,COL,-195.0,-123.0,ATL,155.0,123.0,-72.0,32.0
8,Sat,2022-03-05,CHI,-295.0,153.0,ORL,225.0,-153.0,-448.0,378.0
9,Sat,2022-03-05,MIN,-145.0,154.0,NSH,115.0,-154.0,-299.0,269.0


Looking at my model's predictions vs. Draft Kings a few games standout as obvious differences. Austin vs. Miami has Austin heavy favorites after the beat Cincinnati 5-0. Both are below average teams but CIN is especially bad. I think Miami is a solid bet here. LAFC are also heavy favorites over Portland because of their 3-0 win over Colorado while my model sees the team as mostly even. Last is CHI over ORL, I'm honestly not sure why.

## Placing Bets

I'm going to go with the premise that I have $100 a week to bet on MLS game and I'm going to track how much money I make/lose over the course of a season.

In [1136]:
games = [8, 12]
wager_amounts = [25, 75]
wager_teams = ['ORL', 'MIA']

In [1137]:
Bets = Schedule.iloc[games].copy(deep=True)
Bets['Wagers'] = wager_amounts
Bets['BetTeam'] = wager_teams

In [1138]:
Bets

Unnamed: 0,Day,Date,Home,DraftKingsHome,ModelHome,Away,DraftKingsAway,ModelAway,HomeDiff,AwayDiff,Wagers,BetTeam
8,Sat,2022-03-05,CHI,-295.0,153.0,ORL,225.0,-153.0,-448.0,378.0,25,ORL
12,Sun,2022-03-06,ATX,-450.0,101.0,MIA,330.0,-101.0,-551.0,431.0,75,MIA
