In [2]:
import time
import datetime

import pandas as pd
from bs4 import BeautifulSoup
import requests

## Import historical game by game data from FiveThirtyEight GitHub Repo

[FiveThirtyEight Data Repo](https://github.com/fivethirtyeight/data)

In [7]:
url = "https://projects.fivethirtyeight.com/nba-model/nba_elo.csv"
df = pd.read_csv(url).astype({'date': 'datetime64[ns]'})
df.head()

Unnamed: 0,date,season,neutral,playoff,team1,team2,elo1_pre,elo2_pre,elo_prob1,elo_prob2,elo1_post,elo2_post,carmelo1_pre,carmelo2_pre,carmelo1_post,carmelo2_post,carmelo_prob1,carmelo_prob2,score1,score2
0,1946-11-01,1947,0,,TRH,NYK,1300.0,1300.0,0.640065,0.359935,1293.2767,1306.7233,,,,,,,66.0,68.0
1,1946-11-02,1947,0,,CHS,NYK,1300.0,1306.7233,0.631101,0.368899,1309.6521,1297.0712,,,,,,,63.0,47.0
2,1946-11-02,1947,0,,PRO,BOS,1300.0,1300.0,0.640065,0.359935,1305.1542,1294.8458,,,,,,,59.0,53.0
3,1946-11-02,1947,0,,STB,PIT,1300.0,1300.0,0.640065,0.359935,1304.6908,1295.3092,,,,,,,56.0,51.0
4,1946-11-02,1947,0,,DTF,WSC,1300.0,1300.0,0.640065,0.359935,1279.6189,1320.3811,,,,,,,33.0,50.0


### Analyze fields

See [FiveThirtyEight NBA Predictions](https://projects.fivethirtyeight.com/2018-nba-predictions/) for explanation of Elo and CARM-Elo

| Name           |  Type         | Description                                                                        
| -------------  |-------------  | ----------------------------------------------------------------------------------|
| date           | datetime      |   Game date                                                                       |                      
| season         | int           |   End year of season                                                              |
| neutral        | bool          |   1=neutral home court                                                            |
| playoff        | string        |   1946-2015: t = playoffs. Starting in 2016: q=conference quarterfinals, s=conference semifinals, c=conference final,f=finals                                                                 |
| team1          | string        |   Home team initials                                                              |
| team2          | string        |   Away team initials                                                              |
| elo1_pre       | double        |   Home team Elo before game                                                       |
| elo2_pre       | double        |   Away team Elo before game                                                       |
| elo_prob1      | double        |   Win probability based on elo1_pre                                               |
| elo_prob2      | double        |   Win probability based on elo2_pre                                               |
| elo1_post      | double        |   Home team Elo after game                                                        |
| elo2_post      | double        |   Away team Elo after game                                                        |
| carmelo1_pre\* | double        |   Home team CARM-Elo before game                                                  |
| carmelo2_pre\* | double        |   Away team CARM-Elo before game                                                  |
| carmelo1_post\*| double        |   Home team CARM-Elo after game                                                   |
| carmelo2_post\*| double        |   Away team CARM-Elo after game                                                   |
| carmelo_prob1\*| double        |   Win probability based on carmelo1_pre                                           |
| carmelo_prob2\*| double        |   Win probability based on carmelo2_pre                                           |
| score1         | int           |   Home team final score                                                           |
| score2         | int           |   Away team final score                                                           |

\* data only available from 2015-2018

#### Determine CARM-Elo starting point

In [8]:
df_carm_elo = df[["date", "season", "team1", "team2", "carmelo1_pre", "carmelo2_pre", "carmelo1_post", "carmelo2_post"]]
df_carm_elo[~(pd.isnull(df_carm_elo["carmelo1_pre"]))].sort_values("date").head()

Unnamed: 0,date,season,team1,team2,carmelo1_pre,carmelo2_pre,carmelo1_post,carmelo2_post
63157,2015-10-27,2016,ATL,DET,1542.664875,1451.827385,1521.809995,1472.682265
63158,2015-10-27,2016,CHI,CLE,1564.372491,1732.025482,1570.473937,1725.924036
63159,2015-10-27,2016,GSW,NOP,1730.513765,1555.126845,1734.342589,1551.298021
63173,2015-10-28,2016,LAL,MIN,1317.548331,1345.379348,1312.320723,1350.606956
63172,2015-10-28,2016,SAC,LAC,1487.260869,1671.590488,1481.391159,1677.460198


#### CARM-Elo data available starting with 2015-2016 season

## Import historical season data from basketball-reference

[Basketball-Reference Miscellaneous season stats](https://www.basketball-reference.com/leagues/NBA_2018.html#misc_stats::none)

In [9]:
def get_season_data(end_year):
    ''' 
    get cumulative statistics for season specified by end_year
    
    end_year: int, year to query (ex: 2018 queries 2017-2018 season)
    
    returns Pandas dataframe w/ basketball-reference.com's miscellaneous stats table for season specified by end_year
    '''
    from bs4 import Comment
    html = "https://www.basketball-reference.com/leagues/NBA_{}.html".format(end_year)
    result = requests.get(html)
    soup = BeautifulSoup(result.content, "html.parser")
    # html tree is strange...table is wrapped inside a comment
    table = [c for c in (soup.find('div', id="all_misc_stats")).children if type(c) == Comment][0]
    # parse table with pandas
    df = pd.read_html(table, header=1)[0]
    df["Season"] = end_year
    return df

#### Option 1) Download latest data

In [10]:
start_year = 1951
end_year = 2018
dfs_1951_2018 = [get_season_data(i) for i in range(1951, 2019)]
df_1951_2018 = pd.concat(dfs_1951_2018)
df_1951_2018["Team"] =  df_1951_2018["Team"].map(lambda s: s.replace("*", ""))
df_1951_2018 = df_1951_2018.set_index(["Season", "Team"])
pd.set_option('display.max_columns', 30)
df_1951_2018.head()
# Save
# df_1951_2018.to_csv("../Data/nba_season_data.csv")

Unnamed: 0_level_0,Unnamed: 1_level_0,Rk,Age,W,L,PW,PL,MOV,SOS,SRS,ORtg,DRtg,Pace,FTr,3PAr,TS%,eFG%,TOV%,ORB%,FT/FGA,eFG%.1,TOV%.1,DRB%,FT/FGA.1,Arena,Attend.,Attend./G
Season,Team,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
1951,Minneapolis Lakers,1.0,,44.0,24.0,49.0,19.0,5.41,-0.63,4.79,86.4,80.7,94.8,0.356,,0.436,0.373,,,0.262,,,,,Minneapolis Auditorium,,
1951,Philadelphia Warriors,2.0,,40.0,26.0,43.0,23.0,3.76,-0.36,3.4,84.8,81.0,99.3,0.385,,0.425,0.35,,,0.294,,,,,Philadelphia Arena,,
1951,Rochester Royals,3.0,,41.0,27.0,42.0,26.0,2.99,-0.44,2.54,89.3,86.2,92.3,0.418,,0.452,0.378,,,0.315,,,,,Edgerton Park Arena,,
1951,Syracuse Nationals,4.0,,32.0,34.0,34.0,32.0,0.53,0.09,0.62,86.4,85.9,98.1,0.491,,0.435,0.351,,,0.356,,,,,State Fair Coliseum,,
1951,New York Knicks,5.0,,36.0,30.0,34.0,32.0,0.41,0.07,0.49,88.0,87.6,94.8,0.415,,0.445,0.379,,,0.296,,,,,Madison Square Garden (III),,


#### Option 2) Use saved offline file

In [16]:
df_1951_2018 = pd.read_csv("../Data/nba_season_data.csv")
df_1951_2018["Team"] =  df_1951_2018["Team"].map(lambda s: s.replace("*", ""))
df_1951_2018 = df_1951_2018.set_index(["Season", "Team"])
pd.set_option('display.max_columns', 30)
df_1951_2018.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Rk,Age,W,L,PW,PL,MOV,SOS,SRS,ORtg,DRtg,Pace,FTr,3PAr,TS%,eFG%,TOV%,ORB%,FT/FGA,eFG%.1,TOV%.1,DRB%,FT/FGA.1,Arena,Attend.,Attend./G
Season,Team,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
1951,Minneapolis Lakers,1.0,,44.0,24.0,49.0,19.0,5.41,-0.63,4.79,86.4,80.7,94.8,0.356,,0.436,0.373,,,0.262,,,,,Minneapolis Auditorium,,
1951,Philadelphia Warriors,2.0,,40.0,26.0,43.0,23.0,3.76,-0.36,3.4,84.8,81.0,99.3,0.385,,0.425,0.35,,,0.294,,,,,Philadelphia Arena,,
1951,Rochester Royals,3.0,,41.0,27.0,42.0,26.0,2.99,-0.44,2.54,89.3,86.2,92.3,0.418,,0.452,0.378,,,0.315,,,,,Edgerton Park Arena,,
1951,Syracuse Nationals,4.0,,32.0,34.0,34.0,32.0,0.53,0.09,0.62,86.4,85.9,98.1,0.491,,0.435,0.351,,,0.356,,,,,State Fair Coliseum,,
1951,New York Knicks,5.0,,36.0,30.0,34.0,32.0,0.41,0.07,0.49,88.0,87.6,94.8,0.415,,0.445,0.379,,,0.296,,,,,Madison Square Garden (III),,


### Analyze fields

| Name      |  Type  | Description                                                                                   |
| --------- |--------| --------------------------------------------------------------------------------------------- |
| Rk        | double |   Rank (used to index for sorting within webpage)                                             |
| Age       | double |   Age of Player at the start of February 1st of that season.                                  |
| W         | double |   Wins                                                                                        |
| L         | double |   Losses                                                                                      |
| PW        | double |   Pythagorean wins, i.e., expected wins based on points scored and allowed                    |
| PL        | double |   Pythagorean losses, i.e., expected losses based on points scored and allowed                |
| MOV       | double |   Margin of Victory                                                                           |
| SOS       | double |   Strength of Schedule; a rating of strength of schedule. The rating is denominated in points above/below average, where zero is average.                                                                          |
| SRS       | double |   Simple Rating System; a team rating that takes into account average point differential and strength of schedule. The rating is denominated in points above/below average, where zero is average.                |
| ORtg      | double |   An estimate of points produced (players) or scored (teams) per 100 possessions              |  
| DRtg      | double |   An estimate of points allowed per 100 possessions                                           |
| Pace      | double |   An estimate of possessions per 48 minutes                                                   |
| FTr       | double |   Number of FT Attempts Per FG Attempt                                                        |
| 3PAr\*\*  | double |   Percentage of FG Attempts from 3-Point Range                                                |
| TS%       | double |   A measure of shooting efficiency that takes into account 2-point field goals, 3-point field goals, and free throws.                                                                                              |
| eFG%      | double |   Adjusts for the fact that a 3-point field goal is worth one more point than a 2-point field goal.                                                                                                                |
| TOV%\*    | double |   An estimate of turnovers committed per 100 plays.                                           |
| ORB%\*    | int    |   An estimate of the percentage of available offensive rebounds a team grabbed.               |
| FT/FGA    | int    |   Free Throws Per Field Goal Attempt                                                          |
| eFG%.1    | double |    Opponent Effective Field Goal Percentage                                                   |
| TOV%.1\*  | double |   Opponent Turnover Percentage                                                                |
| DRB%\*    | int    |   An estimate of the percentage of available defensive rebounds a team grabbed.               |
| FT/FGA.1\*| int    |   Opponent Free Throws Per Field Goal Attempt                                                 |    
| Arena     | string |   Home Arena                                                                                  |
| Attend.   | double |   Cumulative home attendance                                                                  |
| Attend./G | double |   Attendance per home game at the team's primary arena                                        |

\* data only available from 1974-2018

\*\* data only available from 1980-2018

NOTE: No data available for 1954-1955 Baltimore Bullets

## Import additional historical game by game data from basketball-reference

Starting from the 1983-1984 season, basketball-reference.com has game by game advanced statistics such as ORtg, DRtg, and eFG%

[Example boxscore](https://www.basketball-reference.com/boxscores/201803050CHI.html)

In [None]:
def boxscore_links_for_date(date):
    '''
    get list of basketball-reference links to boxscores for games on given date
    
    date: datetime.datetime object with year, month, and day specified
    
    returns list of urls to basketball-reference single game boxscores for given date
    '''
    link = "https://www.basketball-reference.com/boxscores/?month={}&day={}&year={}".format(date.month, date.day, date.year)
    result = requests.get(link)
    soup = BeautifulSoup(result.content, "html.parser")
    return ["http://www.basketball-reference.com" + game.find("a").get("href") for game in soup.find_all("td", {"class": "right gamelink"})]

In [None]:
def boxscore_dict_for_link(link):
    '''
    advanced box score stats as dictionary from basketball-reference boxscore link
    
    link: string, link to single game basketball-reference boxscore
    
    returns dictionary with advanced stats for home (team1) and away (team2) teams for boxscore linked
    NOTE: basketball-reference only supports single game advanced stats starting from 1983-1984 NBA season
    '''
    result = requests.get(link)
    soup = BeautifulSoup(result.content, "html.parser")
    from bs4 import Comment
    assert soup.find("div", id="all_four_factors") != None, "Advanced box score metrics only available for dates with at least 1 NBA game starting from 1983-1984 season"
    for c in soup.find("div", id="all_four_factors").children:
        if type(c) == Comment:
            s_ind = c.index("<table")
            e_ind = c.index("</table>")
            table_html = c[s_ind:e_ind+8]
            break
    df = pd.read_html(table_html, header=1, index_col=0)[0]
    df["DRtg"] = df["ORtg"].values[::-1]
    df["NetRtg"] = df["ORtg"] - df["DRtg"]
    d = {}
    for i, team_name in enumerate(df.index):
        prefix = "team2_" if i == 0 else "team1_"
        for col in df.columns:
            d[prefix+col] = df.loc[team_name, col]
    return d

#### Crawling through nba_elo data to find advanced box score metrics for all games since 1983-1984 NBA season

In [None]:
start_year = 1993
end_year = 2018
url = "https://projects.fivethirtyeight.com/nba-model/nba_elo.csv"
df = pd.read_csv(url).astype({'date': 'datetime64[ns]'})
season_start_dates = []
season_end_dates = []
for i in range(start_year, end_year+1):
    season_df = df[(df["season"] == i) & (~pd.isnull(df["score1"]))]
    season_start_dates.append(season_df.loc[season_df.index[0], "date"])
    season_end_dates.append(season_df.loc[season_df.index[-1], "date"])
# Query basketball reference game by game, and save results for each year
for season_start_date, season_end_date in zip(season_start_dates, season_end_dates):
    df_slice = df[(df["date"] >= season_start_date) & (df["date"] <= season_end_date)].copy()
    current_date = season_start_date
    boxscores_for_date = []
    while not boxscores_for_date:
        try:
            boxscores_for_date = boxscore_links_for_date(current_date)
        except ConnectionError:
            print("Connection Error occured. Sleeping for 1 min and re-trying")
            boxscores_for_date = []
            time.sleep(60)
    data = []
    for _, row in df_slice.iterrows():
        print("{} vs. {} on {}".format(row["team1"], row["team2"], row["date"]))
        if current_date != row["date"]:
            # get new boxscores for date
            print("New day ({}), getting boxscores".format(row["date"]))
            boxscores_for_date = []
            while not boxscores_for_date:
                try:
                    boxscores_for_date = boxscore_links_for_date(row["date"])
                except ConnectionError:
                    print("Connection Error occured. Sleeping for 1 min and re-trying")
                    boxscores_for_date = []
                    time.sleep(60)
            current_date = row["date"]
        filtered_boxscores = list(filter(lambda link: row["team1"] in link, boxscores_for_date))
        if not filtered_boxscores:
            boxscore_for_game = list(filter(lambda link: row["team2"] in link, boxscores_for_date))[0]
        else:
            boxscore_for_game = filtered_boxscores[0]
        d = boxscore_dict_for_link(boxscore_for_game)
        for key, val in d.items():
            row[key] = val
        data.append(row)
    # save data for year
    print("Saving for season from {} to {}".format(season_start_date, season_end_date))
    pd.DataFrame(data).to_csv("../Data/nba_game_data_{}-{}-{}_to_{}-{}-{}.csv".format(season_start_date.year, season_start_date.month, season_start_date.day, season_end_date.year, season_end_date.month, season_end_date.day))


In [None]:
fns = [
    '../Data/nba_game_data_1983-10-28_to_1983-10-30.csv',
    '../Data/nba_game_data_1983-11-1_to_1983-11-30.csv',
    '../Data/nba_game_data_1983-12-1_to_1983-12-30.csv',
    '../Data/nba_game_data_1984-1-1_to_1984-1-26.csv',
    '../Data/nba_game_data_1984-1-27_to_1984-6-12.csv'
]
df = pd.concat([pd.read_csv(fn, index_col=0) for fn in fns])
df

### Calculate moving SRS

In [26]:
df_1951_2018.loc[2018].head()

Unnamed: 0_level_0,Rk,Age,W,L,PW,PL,MOV,SOS,SRS,ORtg,DRtg,Pace,FTr,3PAr,TS%,eFG%,TOV%,ORB%,FT/FGA,eFG%.1,TOV%.1,DRB%,FT/FGA.1,Arena,Attend.,Attend./G
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
Golden State Warriors,1.0,29.0,50.0,14.0,48.0,16.0,8.58,0.02,8.6,115.4,106.9,100.2,0.249,0.35,0.616,0.582,14.4,21.3,0.202,0.498,12.6,76.3,0.191,Oracle Arena,627072.0,19596.0
Houston Rockets,2.0,29.7,50.0,13.0,48.0,15.0,8.84,-0.26,8.58,116.0,107.0,98.1,0.309,0.504,0.597,0.555,12.8,21.8,0.245,0.528,13.6,80.6,0.178,Toyota Center,552135.0,17811.0
Toronto Raptors,3.0,25.8,46.0,17.0,48.0,15.0,8.75,-0.63,8.12,113.8,104.9,97.8,0.259,0.374,0.574,0.536,11.9,22.7,0.207,0.497,13.7,76.9,0.217,Air Canada Centre,634465.0,19827.0
Boston Celtics,4.0,24.6,45.0,20.0,42.0,23.0,4.31,-0.35,3.96,108.1,103.6,96.1,0.239,0.369,0.554,0.52,12.7,21.1,0.185,0.489,12.8,78.3,0.201,TD Garden,614592.0,18076.0
Minnesota Timberwolves,5.0,27.1,38.0,28.0,39.0,27.0,2.73,-0.1,2.63,113.8,111.0,95.8,0.286,0.26,0.569,0.526,11.5,24.5,0.23,0.541,14.2,76.2,0.186,Target Center,537953.0,16811.0


In [27]:
df_2018_season = df[(df["season"] == 2018) & (df["date"] < datetime.datetime(2018, 3, 7))]
df_2018_season.head()

Unnamed: 0,date,season,neutral,playoff,team1,team2,elo1_pre,elo2_pre,elo_prob1,elo_prob2,elo1_post,elo2_post,carmelo1_pre,carmelo2_pre,carmelo1_post,carmelo2_post,carmelo_prob1,carmelo_prob2,score1,score2
65782,2017-10-17,2018,0,,CLE,BOS,1647.989805,1532.470014,0.775674,0.224326,1650.129184,1530.330635,1648.0,1549.0,1650.308911,1546.691089,0.74629,0.25371,102.0,99.0
65783,2017-10-17,2018,0,,GSW,HOU,1760.609663,1574.467471,0.838508,0.161492,1751.819016,1583.258119,1761.0,1675.0,1753.884111,1682.115889,0.747495,0.252505,121.0,122.0
65784,2017-10-18,2018,0,,ORL,MIA,1390.229357,1552.809706,0.410901,0.589099,1400.663642,1542.375421,1458.0,1483.0,1464.397752,1476.602248,0.598634,0.401366,116.0,109.0
65785,2017-10-18,2018,0,,DET,CHO,1456.654984,1473.216401,0.617821,0.382179,1464.992663,1464.878722,1427.0,1542.0,1439.104231,1529.895769,0.476536,0.523464,102.0,90.0
65786,2017-10-18,2018,0,,IND,BRK,1502.884837,1405.034022,0.757481,0.242519,1506.960938,1400.957921,1406.0,1381.0,1411.729285,1375.270715,0.671978,0.328022,140.0,131.0


In [28]:
nba_initials = {
    'Atlanta Hawks': 'ATL',
    'Boston Celtics': 'BOS',
    'Brooklyn Nets': 'BRK',
    'Charlotte Hornets': 'CHO',
    'Chicago Bulls': 'CHI',
    'Cleveland Cavaliers': 'CLE',
    'Dallas Mavericks': 'DAL',
    'Denver Nuggets': 'DEN',
    'Detroit Pistons': 'DET',
    'Golden State Warriors': 'GSW',
    'Houston Rockets': 'HOU',
    'Indiana Pacers': 'IND',
    'Los Angeles Clippers': 'LAC',
    'Los Angeles Lakers': 'LAL',
    'Memphis Grizzlies': 'MEM',
    'Miami Heat': 'MIA',
    'Milwaukee Bucks': 'MIL',
    'Minnesota Timberwolves': 'MIN',
    'New Orleans Pelicans': 'NOP',
    'New York Knicks': 'NYK',
    'Oklahoma City Thunder': 'OKC',
    'Orlando Magic': 'ORL',
    'Philadelphia 76ers': 'PHI',
    'Phoenix Suns': 'PHO',
    'Portland Trail Blazers': 'POR',
    'Sacramento Kings': 'SAC',
    'San Antonio Spurs': 'SAS',
    'Toronto Raptors': 'TOR',
    'Utah Jazz': 'UTA',
    'Washington Wizards': 'WAS'
}

In [29]:
def margin_for_team(abbrev, margins):
    return sum(margins[abbrev]) / len(margins[abbrev])

In [30]:
def sos_for_team(abbrev, schedule, margins):
    opp_movs = []
    for abbrev, gp in schedule[abbrev].items():
        opp_movs += [margin_for_team(abbrev, margins)] * gp
    return sum(opp_movs) / len(opp_movs)

In [31]:
margins = {abbrev: [] for _, abbrev in nba_initials.items()}
schedule = {abbrev: {} for _, abbrev in nba_initials.items()}
for _, game in df_2018_season.iterrows():
    home_team = game["team1"]
    home_score = game["score1"]
    away_team = game["team2"]
    away_score = game["score2"]
    # update rolling schedule for both teams
    gp = schedule[home_team].get(away_team, None)
    if gp is None:
        schedule[home_team][away_team] = 1
    else:
        schedule[home_team][away_team] = gp + 1
    gp = schedule[away_team].get(home_team, None)
    if gp is None:
        schedule[away_team][home_team] = 1
    else:
        schedule[away_team][home_team] = gp + 1
    mov_home_team = home_score - away_score
    mov_away_team = -mov_home_team
    margins[home_team].append(mov_home_team)
    margins[away_team].append(mov_away_team)
print(margin_for_team("GSW", margins))
print(sos_for_team("GSW", schedule, margins))

8.578125
-0.021849781078296507


In [33]:
df_2018_summary = df_1951_2018.loc[2018]
df_2018_summary = df_2018_summary[:-1]
df_2018_summary.index
sos_lst, margin_lst = zip(*[(sos_for_team(nba_initials[name], schedule, margins), margin_for_team(nba_initials[name], margins)) for name in df_2018_summary.index])
df_2018_summary["my_MOV"] = list(map(lambda x: round(x,2), margin_lst))
df_2018_summary["my_SOS"] = list(map(lambda x: round(x,2), sos_lst))
df_2018_summary["my_SRS"] = df_2018_summary["my_MOV"] + df_2018_summary["my_SOS"]
df_2018_summary[['SRS','MOV', 'SOS', 'my_SRS', 'my_MOV', 'my_SOS']]

Unnamed: 0_level_0,SRS,MOV,SOS,my_SRS,my_MOV,my_SOS
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Golden State Warriors,8.6,8.58,0.02,8.56,8.58,-0.02
Houston Rockets,8.58,8.84,-0.26,8.52,8.84,-0.32
Toronto Raptors,8.12,8.75,-0.63,8.08,8.75,-0.67
Boston Celtics,3.96,4.31,-0.35,3.95,4.31,-0.36
Minnesota Timberwolves,2.63,2.73,-0.1,2.62,2.73,-0.11
Philadelphia 76ers,2.53,2.11,0.42,2.59,2.11,0.48
Oklahoma City Thunder,2.45,2.58,-0.13,2.39,2.58,-0.19
San Antonio Spurs,2.4,2.94,-0.54,2.32,2.94,-0.62
Utah Jazz,2.17,1.73,0.43,2.19,1.73,0.46
Portland Trail Blazers,1.82,2.23,-0.41,1.77,2.23,-0.46


In [35]:
df_2018_summary[['SRS','MOV', 'SOS', 'my_SRS', 'my_MOV', 'my_SOS']].mean()

SRS       0.013333
MOV       0.012667
SOS       0.001333
my_SRS    0.013667
my_MOV    0.012667
my_SOS    0.001000
dtype: float64