In [71]:
import glob
import time
import datetime
import subprocess
from subprocess import Popen, PIPE

import pandas as pd
from bs4 import BeautifulSoup
import requests
from IPython.core.display import display, HTML

display(HTML("<style>.container { width:100% !important; }</style>"))
pd.set_option('display.max_columns', 50)

## Import historical game by game data from FiveThirtyEight GitHub Repo

[FiveThirtyEight Data Repo](https://github.com/fivethirtyeight/data)

In [19]:
url = "https://projects.fivethirtyeight.com/nba-model/nba_elo.csv"
df_elo_1941_2018 = pd.read_csv(url).astype({'date': 'datetime64[ns]'})
df_elo_1941_2018.head()

Unnamed: 0,date,season,neutral,playoff,team1,team2,elo1_pre,elo2_pre,elo_prob1,elo_prob2,elo1_post,elo2_post,carmelo1_pre,carmelo2_pre,carmelo1_post,carmelo2_post,carmelo_prob1,carmelo_prob2,score1,score2
0,1946-11-01,1947,0,,TRH,NYK,1300.0,1300.0,0.640065,0.359935,1293.2767,1306.7233,,,,,,,66.0,68.0
1,1946-11-02,1947,0,,CHS,NYK,1300.0,1306.7233,0.631101,0.368899,1309.6521,1297.0712,,,,,,,63.0,47.0
2,1946-11-02,1947,0,,PRO,BOS,1300.0,1300.0,0.640065,0.359935,1305.1542,1294.8458,,,,,,,59.0,53.0
3,1946-11-02,1947,0,,STB,PIT,1300.0,1300.0,0.640065,0.359935,1304.6908,1295.3092,,,,,,,56.0,51.0
4,1946-11-02,1947,0,,DTF,WSC,1300.0,1300.0,0.640065,0.359935,1279.6189,1320.3811,,,,,,,33.0,50.0


### Analyze fields

See [FiveThirtyEight NBA Predictions](https://projects.fivethirtyeight.com/2018-nba-predictions/) for explanation of Elo and CARM-Elo

| Name           |  Type         | Description                                                                        
| -------------  |-------------  | ----------------------------------------------------------------------------------|
| date           | datetime      |   Game date                                                                       |                      
| season         | int           |   End year of season                                                              |
| neutral        | bool          |   1=neutral home court                                                            |
| playoff        | string        |   1946-2015: t = playoffs. Starting in 2016: q=conference quarterfinals, s=conference semifinals, c=conference final,f=finals                                                                 |
| team1          | string        |   Home team initials                                                              |
| team2          | string        |   Away team initials                                                              |
| elo1_pre       | double        |   Home team Elo before game                                                       |
| elo2_pre       | double        |   Away team Elo before game                                                       |
| elo_prob1      | double        |   Win probability based on elo1_pre                                               |
| elo_prob2      | double        |   Win probability based on elo2_pre                                               |
| elo1_post      | double        |   Home team Elo after game                                                        |
| elo2_post      | double        |   Away team Elo after game                                                        |
| carmelo1_pre\* | double        |   Home team CARM-Elo before game                                                  |
| carmelo2_pre\* | double        |   Away team CARM-Elo before game                                                  |
| carmelo1_post\*| double        |   Home team CARM-Elo after game                                                   |
| carmelo2_post\*| double        |   Away team CARM-Elo after game                                                   |
| carmelo_prob1\*| double        |   Win probability based on carmelo1_pre                                           |
| carmelo_prob2\*| double        |   Win probability based on carmelo2_pre                                           |
| score1         | int           |   Home team final score                                                           |
| score2         | int           |   Away team final score                                                           |

\* data only available from 2015-2018

#### Determine CARM-Elo starting point

In [8]:
df_carm_elo = df[["date", "season", "team1", "team2", "carmelo1_pre", "carmelo2_pre", "carmelo1_post", "carmelo2_post"]]
df_carm_elo[~(pd.isnull(df_carm_elo["carmelo1_pre"]))].sort_values("date").head()

Unnamed: 0,date,season,team1,team2,carmelo1_pre,carmelo2_pre,carmelo1_post,carmelo2_post
63157,2015-10-27,2016,ATL,DET,1542.664875,1451.827385,1521.809995,1472.682265
63158,2015-10-27,2016,CHI,CLE,1564.372491,1732.025482,1570.473937,1725.924036
63159,2015-10-27,2016,GSW,NOP,1730.513765,1555.126845,1734.342589,1551.298021
63173,2015-10-28,2016,LAL,MIN,1317.548331,1345.379348,1312.320723,1350.606956
63172,2015-10-28,2016,SAC,LAC,1487.260869,1671.590488,1481.391159,1677.460198


#### CARM-Elo data available starting with 2015-2016 season

## Import historical season data from basketball-reference

[Basketball-Reference Miscellaneous season stats](https://www.basketball-reference.com/leagues/NBA_2018.html#misc_stats::none)

** NOTE: ORtg and DRtg available starting from 1950-1951 NBA season **

In [204]:
def get_season_data(end_year):
    ''' 
    get cumulative statistics for season specified by end_year
    
    end_year: int, year to query (ex: 2018 queries 2017-2018 season)
    
    returns Pandas dataframe w/ basketball-reference.com's miscellaneous stats table for season specified by end_year
    '''
    from bs4 import Comment
    html = "https://www.basketball-reference.com/leagues/NBA_{}.html".format(end_year)
    result = requests.get(html)
    soup = BeautifulSoup(result.content, "html.parser")
    # html tree is strange...table is wrapped inside a comment
    table = [c for c in (soup.find('div', id="all_misc_stats")).children if type(c) == Comment][0]
    # parse table with pandas
    df = pd.read_html(table, header=1)[0]
    df["Season"] = end_year
    return df

### Option 1) Download latest data

In [209]:
def generate_season_summaries_for(start_year, end_year):
    '''
    generate dataframe with basketball-reference season summary data (see https://www.basketball-reference.com/leagues/NBA_2018.html#misc_stats::none) for all seasons [start_year, end_year] (inclusive)

    start_year: int, lower bound for year (ex: 2000 = 1999-2000 NBA season)
    end_year: int, upper bound for year

    returns dataframe with season summary data for all seasons [start_year, end_year] (inclusive)
    '''
    assert start_year >= 1950, "Start year must be 1950 or later"
    df_season_summaries = pd.concat([get_season_data(i) for i in range(start_year, end_year+1)])
    # Playoff teams labeled with * in basketball-reference data. Remove distinction for easier grouping.
    df_season_summaries["Team"] =  df_season_summaries["Team"].map(lambda s: s.replace("*", ""))
    df_season_summaries["NetRtg"] = df_season_summaries["ORtg"] - df_season_summaries["DRtg"]
    df_season_summaries = df_season_summaries.set_index(["Season", "Team"])
    # Re-order columns
    cols = df_season_summaries_1951_2018.columns.values.tolist()
    df_season_summaries = df_season_summaries_1951_2018.reindex_axis(cols[:cols.index("Pace")] + ["NetRtg"] + cols[cols.index("Pace"):-1], axis=1)
    return df_season_summaries

In [210]:
# ORtg and DRtg available starting from 1950-51 season 
start_year = 1951
end_year = 2018
generate_season_summaries_for(start_year, end_year).to_csv("../Data/nba_season_summaries_{}_{}.csv".format(start_year, end_year))

### Option 2) Use saved offline file

In [214]:
df_season_summaries_1951_2018 = pd.read_csv("../Data/nba_season_summaries_{}_{}.csv".format(start_year, end_year), index_col=[0, 1])
df_season_summaries_1951_2018.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Rk,Age,W,L,PW,PL,MOV,SOS,SRS,ORtg,DRtg,NetRtg,Pace,FTr,3PAr,TS%,eFG%,TOV%,ORB%,FT/FGA,eFG%.1,TOV%.1,DRB%,FT/FGA.1,Arena,Attend.,Attend./G
Season,Team,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
1951,Minneapolis Lakers,1.0,,44.0,24.0,49.0,19.0,5.41,-0.63,4.79,86.4,80.7,5.7,94.8,0.356,,0.436,0.373,,,0.262,,,,,Minneapolis Auditorium,,
1951,Philadelphia Warriors,2.0,,40.0,26.0,43.0,23.0,3.76,-0.36,3.4,84.8,81.0,3.8,99.3,0.385,,0.425,0.35,,,0.294,,,,,Philadelphia Arena,,
1951,Rochester Royals,3.0,,41.0,27.0,42.0,26.0,2.99,-0.44,2.54,89.3,86.2,3.1,92.3,0.418,,0.452,0.378,,,0.315,,,,,Edgerton Park Arena,,
1951,Syracuse Nationals,4.0,,32.0,34.0,34.0,32.0,0.53,0.09,0.62,86.4,85.9,0.5,98.1,0.491,,0.435,0.351,,,0.356,,,,,State Fair Coliseum,,
1951,New York Knicks,5.0,,36.0,30.0,34.0,32.0,0.41,0.07,0.49,88.0,87.6,0.4,94.8,0.415,,0.445,0.379,,,0.296,,,,,Madison Square Garden (III),,


### Analyze fields

| Name      |  Type  | Description                                                                                   |
| --------- |--------| --------------------------------------------------------------------------------------------- |
| Rk        | double |   Rank (used to index for sorting within webpage)                                             |
| Age       | double |   Age of Player at the start of February 1st of that season.                                  |
| W         | double |   Wins                                                                                        |
| L         | double |   Losses                                                                                      |
| PW        | double |   Pythagorean wins, i.e., expected wins based on points scored and allowed                    |
| PL        | double |   Pythagorean losses, i.e., expected losses based on points scored and allowed                |
| MOV       | double |   Margin of Victory                                                                           |
| SOS       | double |   Strength of Schedule; a rating of strength of schedule. The rating is denominated in points above/below average, where zero is average.                                                                          |
| SRS       | double |   Simple Rating System; a team rating that takes into account average point differential and strength of schedule. The rating is denominated in points above/below average, where zero is average.                |
| ORtg      | double |   An estimate of points produced (players) or scored (teams) per 100 possessions              |  
| DRtg      | double |   An estimate of points allowed per 100 possessions                                           |
| Pace      | double |   An estimate of possessions per 48 minutes                                                   |
| FTr       | double |   Number of FT Attempts Per FG Attempt                                                        |
| 3PAr\*\*  | double |   Percentage of FG Attempts from 3-Point Range                                                |
| TS%       | double |   A measure of shooting efficiency that takes into account 2-point field goals, 3-point field goals, and free throws.                                                                                              |
| eFG%      | double |   Adjusts for the fact that a 3-point field goal is worth one more point than a 2-point field goal.                                                                                                                |
| TOV%\*    | double |   An estimate of turnovers committed per 100 plays.                                           |
| ORB%\*    | int    |   An estimate of the percentage of available offensive rebounds a team grabbed.               |
| FT/FGA    | int    |   Free Throws Per Field Goal Attempt                                                          |
| eFG%.1    | double |    Opponent Effective Field Goal Percentage                                                   |
| TOV%.1\*  | double |   Opponent Turnover Percentage                                                                |
| DRB%\*    | int    |   An estimate of the percentage of available defensive rebounds a team grabbed.               |
| FT/FGA.1\*| int    |   Opponent Free Throws Per Field Goal Attempt                                                 |    
| Arena     | string |   Home Arena                                                                                  |
| Attend.   | double |   Cumulative home attendance                                                                  |
| Attend./G | double |   Attendance per home game at the team's primary arena                                        |

\* data only available from 1974-2018

\*\* data only available from 1980-2018

NOTE: No data available for 1954-1955 Baltimore Bullets

## Import additional historical game by game data from basketball-reference

Starting from the 1983-1984 season, basketball-reference.com has game by game advanced statistics such as ORtg, DRtg, and eFG%

[Example boxscore](https://www.basketball-reference.com/boxscores/201803050CHI.html)

In [None]:
def boxscore_links_for_date(date):
    '''
    get list of basketball-reference links to boxscores for games on given date
    
    date: datetime.datetime object with year, month, and day specified
    
    returns list of urls to basketball-reference single game boxscores for given date
    '''
    link = "https://www.basketball-reference.com/boxscores/?month={}&day={}&year={}".format(date.month, date.day, date.year)
    result = requests.get(link)
    soup = BeautifulSoup(result.content, "html.parser")
    return ["http://www.basketball-reference.com" + game.find("a").get("href") for game in soup.find_all("td", {"class": "right gamelink"})]

In [None]:
def boxscore_dict_for_link(link):
    '''
    advanced box score stats as dictionary from basketball-reference boxscore link
    
    link: string, link to single game basketball-reference boxscore
    
    returns dictionary with advanced stats for home (team1) and away (team2) teams for boxscore linked
    NOTE: basketball-reference only supports single game advanced stats starting from 1983-1984 NBA season
    '''
    result = requests.get(link)
    soup = BeautifulSoup(result.content, "html.parser")
    from bs4 import Comment
    assert soup.find("div", id="all_four_factors") != None, "Advanced box score metrics only available for dates with at least 1 NBA game starting from 1983-1984 season"
    for c in soup.find("div", id="all_four_factors").children:
        if type(c) == Comment:
            s_ind = c.index("<table")
            e_ind = c.index("</table>")
            table_html = c[s_ind:e_ind+8]
            break
    df = pd.read_html(table_html, header=1, index_col=0)[0]
    df["DRtg"] = df["ORtg"].values[::-1]
    df["NetRtg"] = df["ORtg"] - df["DRtg"]
    d = {}
    for i, team_name in enumerate(df.index):
        prefix = "team2_" if i == 0 else "team1_"
        for col in df.columns:
            d[prefix+col] = df.loc[team_name, col]
    return d

### Option 1) Download data

#### Crawling through FiveThirtyEight nba_elo data to find advanced box score metrics for all games since 1983-1984 NBA season

In [None]:
start_year = 2017
end_year = 2018

session = subprocess.Popen(['python', 'scrape_boxscores.py', start_year, end_year], stdout=PIPE, stderr=PIPE)
stdout, stderr = session.communicate()

print(stdout)
if stderr:
    raise Exception("Error "+str(stderr))

#### Concatenate outputs from script to one csv

In [None]:
df_boxscores_1984_2018 = pd.concat([pd.read_csv(fn, index_col=0, parse_dates=[1], infer_datetime_format=True) for fn in sorted(glob.glob("../Data/nba_game_data*.csv"))])
df_boxscores_1984_2018.to_csv("../Data/nba_boxscores_1984_2018.csv")

### Option 2) Load offline data

In [68]:
df_boxscores_1984_2018 = pd.read_csv("../Data/nba_boxscores_1984_2018.csv", index_col=0, parse_dates=[1], infer_datetime_format=True)
df_boxscores_1984_2018.tail()

Unnamed: 0,date,season,neutral,playoff,team1,team2,elo1_pre,elo2_pre,elo_prob1,elo_prob2,elo1_post,elo2_post,carmelo1_pre,carmelo2_pre,carmelo1_post,carmelo2_post,carmelo_prob1,carmelo_prob2,score1,score2,team2_Pace,team2_eFG%,team2_TOV%,team2_ORB%,team2_FT/FGA,team2_ORtg,team2_DRtg,team2_NetRtg,team1_Pace,team1_eFG%,team1_TOV%,team1_ORB%,team1_FT/FGA,team1_ORtg,team1_DRtg,team1_NetRtg
66740,2018-03-06,2018,0,,OKC,HOU,1565.863375,1750.210393,0.380945,0.619055,1558.456644,1757.617124,1586.852148,1768.879368,1579.48426,1776.247257,0.379293,0.620707,112.0,122.0,99.4,0.646,18.7,24.1,0.403,122.7,112.6,10.1,99.4,0.559,16.3,28.6,0.086,112.6,122.7,-10.1
66741,2018-03-06,2018,0,,DAL,DEN,1393.308064,1567.347589,0.395033,0.604967,1407.469925,1553.185728,1391.959448,1560.788674,1405.789978,1546.958144,0.405412,0.594588,118.0,107.0,104.5,0.473,11.9,20.8,0.217,102.4,112.9,-10.5,104.5,0.561,13.0,19.1,0.189,112.9,102.4,10.5
66742,2018-03-06,2018,0,,POR,NYK,1595.068944,1393.149331,0.850431,0.149569,1599.555787,1388.662489,1598.970316,1393.937172,1604.927424,1387.980064,0.808096,0.191904,111.0,87.0,93.0,0.464,13.5,20.0,0.12,93.6,119.4,-25.8,93.0,0.556,17.2,33.3,0.259,119.4,93.6,25.8
66743,2018-03-06,2018,0,,LAC,NOP,1579.868806,1564.508062,0.660174,0.339826,1569.632309,1574.744559,1581.976479,1571.585123,1571.828133,1581.733469,0.656241,0.343759,116.0,121.0,109.9,0.51,6.7,16.1,0.155,110.1,105.5,4.6,109.9,0.5,10.5,19.2,0.172,105.5,110.1,-4.6
66744,2018-03-06,2018,0,,GSW,BRK,1698.051976,1352.214256,0.928671,0.071329,1699.340397,1350.925835,1706.14366,1345.527537,1707.459744,1344.211453,0.927293,0.072707,114.0,101.0,97.1,0.476,11.1,21.7,0.235,104.0,117.4,-13.4,97.1,0.633,16.2,15.2,0.108,117.4,104.0,13.4


### Analyze Fields

Source: NBA game data (1984-2018) from [FiveThirtyEight Data Repo](https://github.com/fivethirtyeight/data) and [basketball-reference.com's boxscore data](https://www.basketball-reference.com/boxscores/)

See [FiveThirtyEight NBA Predictions](https://projects.fivethirtyeight.com/2018-nba-predictions/) for explanation of Elo and CARM-Elo

| Name           |  Type         | Description                                                                        
| -------------  |-------------  | ----------------------------------------------------------------------------------|
| date           | datetime      |   Game date                                                                       |                      
| season         | int           |   End year of season                                                              |
| neutral        | bool          |   1=neutral home court                                                            |
| playoff        | string        |   1946-2015: t = playoffs. Starting in 2016: q=conference quarterfinals, s=conference semifinals, c=conference final,f=finals                                                                 |
| team1          | string        |   Home team initials                                                              |
| team2          | string        |   Away team initials                                                              |
| elo1_pre       | double        |   Home team Elo before game                                                       |
| elo2_pre       | double        |   Away team Elo before game                                                       |
| elo_prob1      | double        |   Win probability based on elo1_pre                                               |
| elo_prob2      | double        |   Win probability based on elo2_pre                                               |
| elo1_post      | double        |   Home team Elo after game                                                        |
| elo2_post      | double        |   Away team Elo after game                                                        |
| carmelo1_pre\* | double        |   Home team CARM-Elo before game                                                  |
| carmelo2_pre\* | double        |   Away team CARM-Elo before game                                                  |
| carmelo1_post\*| double        |   Home team CARM-Elo after game                                                   |
| carmelo2_post\*| double        |   Away team CARM-Elo after game                                                   |
| carmelo_prob1\*| double        |   Win probability based on carmelo1_pre                                           |
| carmelo_prob2\*| double        |   Win probability based on carmelo2_pre                                           |
| score1         | int           |   Home team final score                                                           |
| score2         | int           |   Away team final score                                                           |
| team1_pace     | double        |   Home team possessions per 48 mins                                               |
| team1_eFG%     | double        |   Home team eFG%                                                                  |
| team1_TOV%     | double        |   Home team TOV%                                                                  |
| team1_ORB%     | double        |   Home team ORB%                                                                  |
| team1_FT/FGA   | double        |   Home team FT/FGA                                                                |
| team1_ORtg     | double        |   Home team ORtg                                                                  |
| team1_DRtg     | double        |   Home team DRtg                                                                  |
| team1_NetRtg   | double        |   Home team NetRtg                                                                |
| team2_pace     | double        |   Away team possessions per 48 mins                                               |
| team2_eFG%     | double        |   Away team eFG%                                                                  |
| team2_TOV%     | double        |   Away team TOV%                                                                  |
| team2_ORB%     | double        |   Away team ORB%                                                                  |
| team2_FT/FGA   | double        |   Away team FT/FGA                                                                |
| team2_ORtg     | double        |   Away team ORtg                                                                  |
| team2_DRtg     | double        |   Away team DRtg                                                                  |
| team2_NetRtg   | double        |   Away team NetRtg                                                                |


\* data only available from 2015-2018


### Calculate moving SRS

In [88]:
mask_2018 = df_boxscores_1984_2018["season"] == 2018
mask_max_date = df_boxscores_1984_2018["date"] < datetime.datetime(2018, 3, 7)

df_2018_boxscores = df_boxscores_1984_2018[(mask_2018) & (mask_max_date)]
df_2018_boxscores.head()

Unnamed: 0,date,season,neutral,playoff,team1,team2,elo1_pre,elo2_pre,elo_prob1,elo_prob2,elo1_post,elo2_post,carmelo1_pre,carmelo2_pre,carmelo1_post,carmelo2_post,carmelo_prob1,carmelo_prob2,score1,score2,team2_Pace,team2_eFG%,team2_TOV%,team2_ORB%,team2_FT/FGA,team2_ORtg,team2_DRtg,team2_NetRtg,team1_Pace,team1_eFG%,team1_TOV%,team1_ORB%,team1_FT/FGA,team1_ORtg,team1_DRtg,team1_NetRtg
65782,2017-10-17,2018,0,,CLE,BOS,1647.989805,1532.470014,0.775674,0.224326,1650.129184,1530.330635,1648.0,1549.0,1650.308911,1546.691089,0.74629,0.25371,102.0,99.0,99.3,0.455,9.2,18.0,0.216,99.7,102.7,-3.0,99.3,0.488,15.3,19.6,0.253,102.7,99.7,3.0
65783,2017-10-17,2018,0,,GSW,HOU,1760.609663,1574.467471,0.838508,0.161492,1751.819016,1583.258119,1761.0,1675.0,1753.884111,1682.115889,0.747495,0.252505,121.0,122.0,102.0,0.562,10.2,22.2,0.134,119.6,118.6,1.0,102.0,0.638,16.0,15.4,0.238,118.6,119.6,-1.0
65784,2017-10-18,2018,0,,ORL,MIA,1390.229357,1552.809706,0.410901,0.589099,1400.663642,1542.375421,1458.0,1483.0,1464.397752,1476.602248,0.598634,0.401366,116.0,109.0,105.2,0.475,10.7,22.0,0.129,103.6,110.3,-6.7,105.2,0.522,12.0,25.0,0.244,110.3,103.6,6.7
65785,2017-10-18,2018,0,,DET,CHO,1456.654984,1473.216401,0.617821,0.382179,1464.992663,1464.878722,1427.0,1542.0,1439.104231,1529.895769,0.476536,0.523464,102.0,90.0,98.5,0.459,16.5,7.3,0.315,91.4,103.6,-12.2,98.5,0.474,7.3,17.0,0.115,103.6,91.4,12.2
65786,2017-10-18,2018,0,,IND,BRK,1502.884837,1405.034022,0.757481,0.242519,1506.960938,1400.957921,1406.0,1381.0,1411.729285,1375.270715,0.671978,0.328022,140.0,131.0,113.3,0.543,15.6,25.0,0.309,115.7,123.6,-7.9,113.3,0.564,10.8,30.4,0.245,123.6,115.7,7.9


In [89]:
nba_initials = {
    'Atlanta Hawks': 'ATL',
    'Boston Celtics': 'BOS',
    'Brooklyn Nets': 'BRK',
    'Charlotte Hornets': 'CHO',
    'Chicago Bulls': 'CHI',
    'Cleveland Cavaliers': 'CLE',
    'Dallas Mavericks': 'DAL',
    'Denver Nuggets': 'DEN',
    'Detroit Pistons': 'DET',
    'Golden State Warriors': 'GSW',
    'Houston Rockets': 'HOU',
    'Indiana Pacers': 'IND',
    'Los Angeles Clippers': 'LAC',
    'Los Angeles Lakers': 'LAL',
    'Memphis Grizzlies': 'MEM',
    'Miami Heat': 'MIA',
    'Milwaukee Bucks': 'MIL',
    'Minnesota Timberwolves': 'MIN',
    'New Orleans Pelicans': 'NOP',
    'New York Knicks': 'NYK',
    'Oklahoma City Thunder': 'OKC',
    'Orlando Magic': 'ORL',
    'Philadelphia 76ers': 'PHI',
    'Phoenix Suns': 'PHO',
    'Portland Trail Blazers': 'POR',
    'Sacramento Kings': 'SAC',
    'San Antonio Spurs': 'SAS',
    'Toronto Raptors': 'TOR',
    'Utah Jazz': 'UTA',
    'Washington Wizards': 'WAS'
}

In [90]:
def margin_for_team(abbrev, margins):
    return sum(margins[abbrev]) / len(margins[abbrev])

In [91]:
def sos_for_team(abbrev, schedule, margins):
    opp_movs = []
    for abbrev, gp in schedule[abbrev].items():
        opp_movs += [margin_for_team(abbrev, margins)] * gp
    return sum(opp_movs) / len(opp_movs)

In [92]:
margins = {abbrev: [] for _, abbrev in nba_initials.items()}
schedule = {abbrev: {} for _, abbrev in nba_initials.items()}
for _, game in df_2018_boxscores.iterrows():
    home_team = game["team1"]
    home_score = game["score1"]
    away_team = game["team2"]
    away_score = game["score2"]
    # update rolling schedule for both teams
    gp = schedule[home_team].get(away_team, None)
    if gp is None:
        schedule[home_team][away_team] = 1
    else:
        schedule[home_team][away_team] = gp + 1
    gp = schedule[away_team].get(home_team, None)
    if gp is None:
        schedule[away_team][home_team] = 1
    else:
        schedule[away_team][home_team] = gp + 1
    mov_home_team = home_score - away_score
    mov_away_team = -mov_home_team
    margins[home_team].append(mov_home_team)
    margins[away_team].append(mov_away_team)
print(margin_for_team("GSW", margins))
print(sos_for_team("GSW", schedule, margins))

8.578125
-0.021849781078296507


In [114]:
# Get 2018 season summary, dropping League Average row
df_2018_summary = df_season_summaries_1951_2018.loc[2018][:-1]
sos_lst, margin_lst = zip(*[(sos_for_team(nba_initials[name], schedule, margins), margin_for_team(nba_initials[name], margins)) for name in df_2018_summary.index])
df_2018_summary["my_MOV"] = list(map(lambda x: round(x,2), margin_lst))
df_2018_summary["my_SOS"] = list(map(lambda x: round(x,2), sos_lst))
df_2018_summary["my_SRS"] = df_2018_summary["my_MOV"] + df_2018_summary["my_SOS"]
df_2018_summary[['SRS','MOV', 'SOS', 'my_SRS', 'my_MOV', 'my_SOS']]

Unnamed: 0_level_0,SRS,MOV,SOS,my_SRS,my_MOV,my_SOS
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Golden State Warriors,8.61,8.58,0.03,8.56,8.58,-0.02
Houston Rockets,8.61,8.88,-0.26,8.52,8.84,-0.32
Toronto Raptors,8.02,8.64,-0.62,8.08,8.75,-0.67
Boston Celtics,3.96,4.31,-0.35,3.95,4.31,-0.36
Minnesota Timberwolves,2.64,2.73,-0.09,2.62,2.73,-0.11
Philadelphia 76ers,2.54,2.11,0.42,2.59,2.11,0.48
Oklahoma City Thunder,2.45,2.58,-0.12,2.39,2.58,-0.19
Utah Jazz,2.45,2.02,0.43,2.19,1.73,0.46
San Antonio Spurs,2.4,2.94,-0.54,2.32,2.94,-0.62
Portland Trail Blazers,1.82,2.23,-0.41,1.77,2.23,-0.46


In [115]:
df_2018_summary[['SRS','MOV', 'SOS', 'my_SRS', 'my_MOV', 'my_SOS']].mean()

SRS       0.015333
MOV       0.015000
SOS       0.001667
my_SRS    0.013667
my_MOV    0.012667
my_SOS    0.001000
dtype: float64