In [1]:
import pandas as pd
import requests
import time
import numpy as np
import glob
import bs4
import matplotlib.pyplot as plt
import gspread

%matplotlib inline

In [2]:
#pandas params
pd.set_option('display.max_rows', None)
pd.set_option('display.min_rows', 200)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [3]:
this_year = '2020-21'

#  Scrape Data

In [4]:
#Function to scrape per-game values
def scrape_per_game(season):
    headers = {
'Host': 'stats.nba.com',
'Connection': 'keep-alive',
'Accept': 'application/json, text/plain, */*',
'x-nba-stats-token': 'true',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'x-nba-stats-origin': 'stats',
'Referer': 'https://www.nba.com/',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9'}

    url = f"https://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=PerGame&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision=&Weight="
    r = requests.get(url, headers=headers).json()

    df = pd.DataFrame(r['resultSets'][0]['rowSet'], columns = r['resultSets'][0]['headers'])
    df['Season'] = season
    return df

In [5]:
def scrape_matchup_data(teamid,season):
    headers = {
'Host': 'stats.nba.com',
'Connection': 'keep-alive',
'Accept': 'application/json, text/plain, */*',
'x-nba-stats-token': 'true',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'x-nba-stats-origin': 'stats',
'Referer': f"https://stats.nba.com/team/{teamid}/matchups/",
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9'}

    url = f"https://stats.nba.com/stats/leagueseasonmatchups?DateFrom=&DateTo=&DefTeamID={teamid}&LeagueID=00&Outcome=&PORound=0&PerMode=Totals&Season={season}&SeasonType=Regular+Season"""
    r = requests.get(url, headers=headers).json()
    df = pd.DataFrame(r['resultSets'][0]['rowSet'], columns = r['resultSets'][0]['headers'])
    return df

In [6]:
#Function to scrape team totals
def scrape_team_totals (season):
    headers = {
'Host': 'stats.nba.com',
'Connection': 'keep-alive',
'Accept': 'application/json, text/plain, */*',
'x-nba-stats-token': 'true',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'x-nba-stats-origin': 'stats',
'Referer': 'https://www.nba.com/',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9'}

    url = f"https://stats.nba.com/stats/leaguedashteamstats?Conference=&DateFrom=&DateTo=&Division=&GameScope=&GameSegment=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision="
    r = requests.get(url, headers=headers).json()

    df = pd.DataFrame(r['resultSets'][0]['rowSet'], columns = r['resultSets'][0]['headers'])
    df['Season'] = season
    return df

In [7]:
#Function to player totals
def scrape_player_totals(season):
    headers = {
'Host': 'stats.nba.com',
'Connection': 'keep-alive',
'Accept': 'application/json, text/plain, */*',
'x-nba-stats-token': 'true',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'x-nba-stats-origin': 'stats',
'Referer': 'https://www.nba.com/',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9'}

    url = f"https://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision=&Weight="
    r = requests.get(url, headers=headers).json()

    df = pd.DataFrame(r['resultSets'][0]['rowSet'], columns = r['resultSets'][0]['headers'])
    df['Season'] = season
    return df

In [8]:
#Function to scrape hustle stats
def scrape_hustle_stats (season):
    headers = {
'Host': 'stats.nba.com',
'Connection': 'keep-alive',
'Accept': 'application/json, text/plain, */*',
'x-nba-stats-token': 'true',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'x-nba-stats-origin': 'stats',
'Referer': 'https://www.nba.com/',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9'}

    url = f"https://stats.nba.com/stats/leaguehustlestatsplayer?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=Totals&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season}&SeasonSegment=&SeasonType=Regular+Season&TeamID=0&VsConference=&VsDivision=&Weight="
    r = requests.get(url, headers=headers).json()

    df = pd.DataFrame(r['resultSets'][0]['rowSet'], columns = r['resultSets'][0]['headers'])
    df['Season'] = season
    return df

In [9]:
#Function to scrape pace
def scrape_pace (season):
    headers = {
'Host': 'stats.nba.com',
'Connection': 'keep-alive',
'Accept': 'application/json, text/plain, */*',
'x-nba-stats-token': 'true',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'x-nba-stats-origin': 'stats',
'Referer': 'https://www.nba.com/',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9'}

    url = f"https://stats.nba.com/stats/leaguedashteamstats?Conference=&DateFrom=&DateTo=&Division=&GameScope=&GameSegment=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=PerGame&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision="
    r = requests.get(url, headers=headers).json()

    df = pd.DataFrame(r['resultSets'][0]['rowSet'], columns = r['resultSets'][0]['headers'])
    df['Season'] = season
    return df

In [10]:
#Function to scrape team opponent data
def scrape_opponent (season):
    headers = {
'Host': 'stats.nba.com',
'Connection': 'keep-alive',
'Accept': 'application/json, text/plain, */*',
'x-nba-stats-token': 'true',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'x-nba-stats-origin': 'stats',
'Referer': 'https://www.nba.com/',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9'}

    url = f"https://stats.nba.com/stats/leaguedashteamstats?Conference=&DateFrom=&DateTo=&Division=&GameScope=&GameSegment=&LastNGames=0&LeagueID=00&Location=&MeasureType=Opponent&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision="
    r = requests.get(url, headers=headers).json()

    df = pd.DataFrame(r['resultSets'][0]['rowSet'], columns = r['resultSets'][0]['headers'])
    df['Season'] = season
    return df

In [11]:
def calculate_matchup_based_defense(season):
    
    team_totals = scrape_team_totals(season)
    player_totals = scrape_player_totals(season)
    per_game = scrape_per_game(season)
    opponent = scrape_opponent(season)

    csv_list = []
    for team_id in range (1610612737,1610612768):
        time.sleep(np.random.randint(0,5 + 1))
        frame = scrape_matchup_data(team_id, season)
        csv_list.append(frame)
        
    matchup_data = pd.concat(csv_list)

    #Calculate two-pointers
    per_game.insert(loc = 16, column = 'FG2M', value = (per_game.FGM - per_game.FG3M))
    per_game.insert(loc = 17, column = 'FG2A', value = (per_game.FGA - per_game.FG3A))
    per_game.insert(loc = 18, column = 'FG2_PCT', value = (per_game.FG2M / per_game.FG2A))

    #Caclulate shooting eficiency
    per_game.insert(loc = 22, column = 'eFG', value = ((per_game.FGM + (0.5 * per_game.FG3M)) / per_game.FGA))
    per_game.insert(loc = 23, column = 'TS', value = (per_game.PTS / (2 * (per_game.FGA + (0.44 * per_game.FTA))))) 


    per_game = per_game[['PLAYER_ID','PLAYER_NAME','Season','TEAM_ID','TEAM_ABBREVIATION','GP','MIN','PTS','FGM',
                         'FGA','FG_PCT','FG2M','FG2A','FG2_PCT','FG3M','FG3A','FG3_PCT','FTM','FTA','FT_PCT','eFG','TS',
                         'OREB','DREB','REB','AST','TOV','STL','BLK','BLKA','PF','PLUS_MINUS']]

    #Calculate two-pointers
    matchup_data.insert(loc = 16, column = 'MATCHUP_FG2M', value = (matchup_data.MATCHUP_FGM - matchup_data.MATCHUP_FG3M))
    matchup_data.insert(loc = 17, column = 'MATCHUP_FG2A', value = (matchup_data.MATCHUP_FGA - matchup_data.MATCHUP_FG3A))
    matchup_data.insert(loc = 18, column = 'MATCHUP_FG2_PCT', 
                        value = np.where(matchup_data.MATCHUP_FG2A.astype(float) == 0,
                                         0,
                                            (matchup_data.MATCHUP_FG2M.astype(float) / matchup_data.MATCHUP_FG2A.astype(float))))

    #Caclulate shooting eficiency
    matchup_data.insert(loc = 22, column = 'MATCHUP_eFG', 
                value = np.where(matchup_data.MATCHUP_FGA.astype(float) == 0,
                    0,
                        ((matchup_data.MATCHUP_FGM.astype(float) + (0.5 * matchup_data.MATCHUP_FG3M.astype(float))) 
                         / matchup_data.MATCHUP_FGA.astype(float))))
    matchup_data.insert(loc = 23, column = 'MATCHUP_TS', 
                        value = (matchup_data.PLAYER_PTS.astype(float) 
                        / (2 * (matchup_data.MATCHUP_FGA.astype(float) + (0.44 * matchup_data.MATCHUP_FTA.astype(float)))))) 

    df = matchup_data.merge(right = per_game, how = 'inner', left_on = 'OFF_PLAYER_ID', right_on = 'PLAYER_ID')

    df.insert(loc = 63, column = 'weighted_difficulty', value = df['PTS']*df['PARTIAL_POSS'])
    df.insert(loc = 64, column = 'total_possessions', value = df.groupby(['DEF_PLAYER_ID'])['PARTIAL_POSS'].transform('sum'))
    df.insert(loc = 65, column = 'total_weight', value = df.groupby(['DEF_PLAYER_ID'])['weighted_difficulty'].transform('sum'))
    df.insert(loc = 66, column = 'defensive_load', value = df['total_weight']/df['total_possessions'])

    df.insert(loc = 67, column = 'offensive_player_possessions', value = df.groupby(['OFF_PLAYER_ID'])['PARTIAL_POSS'].transform('sum'))
    df.insert(loc = 68, column = 'off_pts_poss', value = (df.PTS / (df.offensive_player_possessions / df.GP_y)))
    df.insert(loc = 69, column = 'weighted_opp_pts_poss', value = df.off_pts_poss * df.PARTIAL_POSS)
    df.insert(loc = 70, column = 'total_expected_opp_pts', value = df.groupby('DEF_PLAYER_ID')['weighted_opp_pts_poss'].transform('sum'))

    df.insert(loc = 71, column = 'points_allowed', value = df.groupby(['DEF_PLAYER_ID'])['PLAYER_PTS'].transform('sum'))
    df.insert(loc = 72, column = 'effectiveness', value = df.total_expected_opp_pts / df.points_allowed)
    df['effectiveness'] = np.where(df['points_allowed'] == 0, 1.5, df['effectiveness'])

    teams = df[['PLAYER_ID','TEAM_ID','TEAM_ABBREVIATION']].drop_duplicates()
    teams = teams.rename(columns = {'PLAYER_ID':'DEFENDER_ID', 'TEAM_ID':'DEF_TEAM_ID','TEAM_ABBREVIATION':'DEF_TEAM_NAME'})
    df2 = df.merge(right = teams, how = 'left', left_on = ['DEF_PLAYER_ID'], right_on = ['DEFENDER_ID'])

    df2.insert(loc = 75, column = 'team_total_possessions', value = df2.groupby(['DEF_TEAM_ID'])['PARTIAL_POSS'].transform('sum'))
    df2.insert(loc = 76, column = 'team_total_weight', value = df2.groupby(['DEF_TEAM_ID'])['weighted_difficulty'].transform('sum'))
    df2.insert(loc = 77, column = 'team_defensive_load', value = df2['team_total_weight']/df2['team_total_possessions'])

    df2.insert(loc = 78, column = 'relative_load', value = df2.defensive_load / df2.team_defensive_load)
    df2.insert(loc = 79, column = 'load_adj_effectiveness', value = df2.effectiveness * df2.relative_load)
    df2.insert(loc = 80, column = 'team_opp_fga', value = df2.groupby(['DEF_TEAM_ID'])['MATCHUP_FGA'].transform('sum'))
    df2.insert(loc = 80, column = 'team_opp_fgm', value = df2.groupby(['DEF_TEAM_ID'])['MATCHUP_FGM'].transform('sum'))

    team_blocks = team_totals[['TEAM_ID','BLK']]
    df3 = df2.merge(right = team_blocks, how = 'left', left_on = 'DEF_TEAM_ID', right_on = 'TEAM_ID')
    df3 = df3.rename(columns = {'BLK_y':'team_blk'})
    df3 = df3.drop(['TEAM_ID_y'], axis = 1)
    df3.insert(loc = 84, column = 'team_direct_blk', value = df3.groupby(['DEF_TEAM_ID'])['MATCHUP_BLK'].transform('sum'))
    df3.insert(loc = 85, column = 'team_help_blk', value = df3.team_blk - df3.team_direct_blk)
    df3.insert(loc = 86, column = 'direct_blk', value = df3.groupby(['DEF_PLAYER_ID'])['MATCHUP_BLK'].transform('sum'))

    totals = scrape_player_totals(season)
    player_blk = totals[['PLAYER_ID','BLK','STL']].drop_duplicates()
    player_blk = player_blk.rename(columns = {'STL':'DEFENDER_STL'})
    df4 = df3.merge(right = player_blk, how = 'left', left_on = ['DEF_PLAYER_ID'], right_on = ['PLAYER_ID'])
    df4.insert(loc = 89, column = 'help_blk', value = df4.BLK - df4.direct_blk)

    eff = df4[['DEF_PLAYER_ID','DEF_TEAM_ID','load_adj_effectiveness']].drop_duplicates()
    eff.insert(loc = 3, column = 'team_load_adj_effectiveness', 
               value = eff.groupby(['DEF_TEAM_ID'])['load_adj_effectiveness'].transform('sum'))
    eff = eff[['DEF_TEAM_ID','team_load_adj_effectiveness']].drop_duplicates()

    df5 = df4.merge(right = eff, how = 'left', left_on = 'DEF_TEAM_ID', right_on = 'DEF_TEAM_ID')
    df5.insert(loc = 91, column = 'opp_pts_per_fgm', value = ((df5.PTS - df5.FTM) / df5.FGM))
    df5.insert(loc = 92, column = 'weighted_opp_pts_per_fgm', value = df5.opp_pts_per_fgm * df.PARTIAL_POSS)
    df5.insert(loc = 93, column = 'agg_opp_pts_per_fgm', value = df5.groupby(['DEF_PLAYER_ID'])['weighted_opp_pts_per_fgm'].transform('sum'))

    #changed df5.team_blk to df5.team_help_blk below to avoid double-crediting shotblockers for blocking their own man's shot
    df5.insert(loc = 92, column = 'missed_fg_points_saved', 
               value = (
                   (df5.load_adj_effectiveness / df5.team_load_adj_effectiveness)
                   * (df5.total_possessions / df5.team_total_possessions)
                   * (df5.team_opp_fga - df5.team_opp_fgm - df5.team_help_blk)  #Here is the change
                   * (df5.agg_opp_pts_per_fgm / (df5.total_possessions / 5))
               )
              )
    
    #Changed df5.BLK to df5.HELP_BLK below to avoid double crediting shot blockers for blocking their own man's shot 
    df5.insert(loc = 95, column = 'total_shooting_defense', 
               value = (df5.missed_fg_points_saved + ((df5.agg_opp_pts_per_fgm / df5.total_possessions) * df5.HELP_BLK)))
    
    #df5['test'] = df5.team_opp_fga - df5.team_opp_fgm - df5.team_help_blk
    #print(f"There are {len(df5[df5['total_shooting_defense'] == 0])} records out of {len(df5)} that have a zero for total_shooting_defense")
    #print(len(df[df['load_adj_effectiveness'] == np.inf]))
    #print(f"{len([df5[df5['load_adj_effectiveness'] == 0]])} load_adj_effectiveness")
    #print(f"{len([df5[df5['total_possessions'] == 0]])} total_possessions")
    #print(f"{len([df5[df5['agg_opp_pts_per_fgm'] == 0]])} agg_opp_pts_per_fgm")
    #print(f"{len([df5[df5['test'] == 0]])} misses minus blocks")
    #print(df5[df5['total_shooting_defense'] == 0].head())
    
    
    
    hustle = scrape_hustle_stats(season)
    hustle = hustle.drop(columns = ['TEAM_ID','TEAM_ABBREVIATION']).drop_duplicates()
    df6 = df5.merge(right = hustle, how = 'left', left_on = 'DEF_PLAYER_ID', right_on = 'PLAYER_ID')
    df6.insert(loc = 96, column = 'solo_steals', value = df6.DEFENDER_STL - df6.DEF_LOOSE_BALLS_RECOVERED)
    df6.insert(loc = 97, column = 'ind_exp_opp_tov', value = ((df6.TOV / (df.offensive_player_possessions / df.GP_y))*df6.PARTIAL_POSS))
    df6.insert(loc = 98, column = 'total_exp_opp_tov', value = df6.groupby(['DEFENDER_ID'])['ind_exp_opp_tov'].transform('sum'))
    df6.insert(loc = 99, column = 'ind_opp_tov', value = df6.groupby(['DEFENDER_ID'])['MATCHUP_TOV'].transform('sum'))
    df6.insert(loc = 100, column = 'ooto', value = df6.ind_opp_tov - df6.solo_steals - df6.CHARGES_DRAWN)

    team_ooto = df6[['DEF_TEAM_ID','DEF_PLAYER_ID','ooto']].drop_duplicates()
    team_ooto.insert(loc = 3, column = 'team_ooto', value = team_ooto.groupby(['DEF_TEAM_ID'])['ooto'].transform('sum'))
    ooto = team_ooto.drop(columns = ['DEF_PLAYER_ID','ooto']).drop_duplicates()

    df7 = df6.merge(right = ooto, how = 'left', left_on = 'DEF_TEAM_ID', right_on = 'DEF_TEAM_ID')
    df7.insert(loc = 102, column = 'player:team_ooto', value = df7.ooto /df7.team_ooto)
    df7.insert(loc = 101, column = 'player:expected_ooto', value = df7.ooto /df7.total_exp_opp_tov)
    df7.insert(loc = 104, column = 'ooto_factor', value = df7['player:team_ooto'] * df7['player:expected_ooto'])
    df7.insert(loc = 105, column = 'team_ooto_factor', value = 
    df7[['DEF_PLAYER_ID','DEF_TEAM_ID','ooto_factor']].drop_duplicates().groupby(['DEF_TEAM_ID'])['ooto_factor'].transform('sum')
              )
    df7.insert(loc = 106, column = 'other_tov_forced', value = ((df7.ooto_factor / df7.team_ooto_factor) * df7.team_ooto))
    df7.insert(loc = 107, column = 'tov_forced', value = df7.CHARGES_DRAWN + df7.other_tov_forced + df7.solo_steals)
    df7.insert(loc = 108, column = 'adjusted_shooting_defense', value = 0.744 * df7.total_shooting_defense)
    df7.insert(loc = 109, column = 'points_saved', value = df7.adjusted_shooting_defense + df7.tov_forced)
    df7.insert(loc = 109, column = 'ShootingDefper100', value = ((df7.adjusted_shooting_defense / df7.total_possessions) * 100))
    df7.insert(loc = 109, column = 'TotalDefenseper100', value = ((df7.points_saved / df7.total_possessions) * 100))
    df7.insert(loc = 109, column = 'NonShootingDefper100', value = ((df7.tov_forced / df7.total_possessions) * 100))

    df8 = df7[['DEF_PLAYER_NAME','DEF_TEAM_NAME','SEASON_ID','defensive_load','relative_load','effectiveness',
               'load_adj_effectiveness','tov_forced','CHARGES_DRAWN','solo_steals','G','total_possessions',
               'adjusted_shooting_defense','ShootingDefper100','points_saved','TotalDefenseper100','NonShootingDefper100',
              'DEF_PLAYER_ID','DEF_TEAM_ID','points_allowed']].drop_duplicates()   #added points_allowed
    df8 = df8.dropna()

    #Manipulate pace
    pace = scrape_pace(season)
    pace.insert(loc = 1, column = 'weight', value = pace.MIN * pace.PACE)
    pace.insert(loc = 1, column = 'agg_weight', value = (pace['weight'].sum() / (30 * pace['MIN'].mean())))
    pace.insert(loc = 1, column = 'pace_factor', value = pace.PACE / pace.agg_weight)
    pace = pace[['TEAM_ID','pace_factor']]

    #Merge to df8
    mbd = df8.merge(right = pace, how = 'left', left_on = 'DEF_TEAM_ID', right_on = 'TEAM_ID')
    mbd = mbd.rename(columns = {'DEF_TEAM_NAME':'DEF_TEAM_ABBREVIATION','load_adj_efectiveness':'loadadjustedeffectiveness',
                         'tov_forced':'Non_Shooting_Defense','solo_steals':'solosteals','total_possessions':'totposs',
                         'adjusted_shooting_defense':'ShootingDefense','points_saved':'TotalDefense'})
    mbd['Non_Shooting_Defense'].update(mbd.Non_Shooting_Defense / mbd.pace_factor)
    mbd['ShootingDefense'].update(mbd.ShootingDefense / mbd.pace_factor)    
    mbd['TotalDefense'].update(mbd.TotalDefense / mbd.pace_factor)

    mbd = mbd.drop(['TEAM_ID'], axis = 1)

    #previous_seasons = pd.read_csv("C:\\Users\gsteele\Other\matchup_based_defense.csv")
    #previous_seasons['SEASON_ID'] = np.where(previous_seasons['SEASON_ID'] == '22019', '2019-20', previous_seasons['SEASON_ID'])
    #concatenator = [previous_seasons,mbd]
    #matchup_based_defense = pd.concat(concatenator)

    opponent = scrape_opponent(season)

    opponent['LEAGUE_PTS'] = opponent['OPP_PTS'].sum()
    opponent['LEAGUE_FGA'] = opponent['OPP_FGA'].sum()
    opponent['LEAGUE_TOV'] = opponent['OPP_TOV'].sum()
    opponent['LEAGUE_FTA'] = opponent['OPP_FTA'].sum()

    mbd = mbd.merge(right = opponent, how = 'left', left_on = 'DEF_TEAM_ID', right_on = 'TEAM_ID')
    mbd['TEAM_POINTS_SAVED'] = mbd.groupby(['DEF_TEAM_ID'])['TotalDefense'].transform('sum')
    
    #Tacking on player minutes
    p = scrape_player_totals(season)[['PLAYER_ID','MIN']]
    p = p.rename(columns = {"PLAYER_ID":"PID","MIN":"MP"})
    mbd = mbd.merge(right = p, how = 'left', left_on = 'DEF_PLAYER_ID', right_on = 'PID')
    mbd['POINTS_SAVED_48'] = (mbd['TotalDefense'] / mbd['MP'] * 48)
    #mbd['TEAM_MIN'] = mbd.groupby(['DEF_TEAM_ID'])['MIN'].transform('sum')
    #Changed to add ' / 5'
    mbd['TEAM_POINTS_SAVED_48'] = ((mbd['TEAM_POINTS_SAVED'] / mbd['MIN'] * 48)/5)

    #Handling edge cases with few possessions and non-shooting defense > 0
    mbd['TotalDefense'] = np.where(((mbd['TotalDefense'] > mbd['ShootingDefense']) & (mbd['totposs'] < 100)),
        mbd['ShootingDefense'], mbd['TotalDefense'])
    mbd['POINTS_SAVED_48'] = (mbd['TotalDefense'] / mbd['MP'] * 48)
    ##################
    
    team_totals = team_totals.rename(columns = {"TEAM_ID":"TEAM_ID_NUMBER"})
    team_totals = team_totals['TEAM_ID_NUMBER']
    mbd = mbd.merge(right = team_totals, how = 'left', left_on = 'DEF_TEAM_ID', right_on = 'TEAM_ID_NUMBER')
    mbd['TEAM_DEFENSE'] = (mbd['OPP_PTS'] / (mbd['OPP_FGA'] + mbd['OPP_TOV'] + (0.44 * mbd['OPP_FTA'])))
    mbd['LEAGUE_DEFENSE'] = (mbd['LEAGUE_PTS'] / (mbd['LEAGUE_FGA'] + mbd['LEAGUE_TOV'] + (0.44 * mbd['LEAGUE_FTA'])))
    mbd['TEAM_DEFENSIVE_WINS'] = ((mbd['LEAGUE_DEFENSE'] / mbd['TEAM_DEFENSE']) * (mbd['W']/2))
    
    #Next two changed
    mbd['DEFENSIVE_WINS'] = (mbd['TotalDefense'] / 
                               ((1 / (mbd['POINTS_SAVED_48'].astype(float) / mbd['TEAM_POINTS_SAVED_48'].astype(float)))
                               * (mbd['TEAM_POINTS_SAVED'] / mbd['TEAM_DEFENSIVE_WINS'])))
                             #/ mbd['pace_factor'])
    mbd['DEFENSIVE_WINS_48'] = (mbd['DEFENSIVE_WINS'] / mbd['MP'] * 48)
    #mbd['DEFENSIVE_WINS_48'] = ((mbd['DEFENSIVE_WINS'] / mbd['MP'] * 48) / mbd['pace_factor'])
    ###
                                
    mbd['Non_Shooting_Defense'].update(mbd.Non_Shooting_Defense / mbd.pace_factor)
    mbd['ShootingDefense'].update(mbd.ShootingDefense / mbd.pace_factor)    
    mbd['TotalDefense'].update(mbd.TotalDefense / mbd.pace_factor)
    
    #Defensive Losses
    mbd['TEAM_DEFENSIVE_LOSSES'] = ((mbd['TEAM_DEFENSE'] / mbd['LEAGUE_DEFENSE']) * (mbd['L']/2))
    mbd['TEAM_PTS_ALL_48'] = ((mbd['OPP_PTS'] / mbd['MIN'] * 48)/5)
    mbd['PTS_ALL_48'] = mbd['points_allowed'] / mbd['MP'] * 48
    mbd['DEFENSIVE_LOSSES'] = (mbd['points_allowed'] / 
                               ((1 / (mbd['PTS_ALL_48'].astype(float) / mbd['TEAM_PTS_ALL_48'].astype(float)))
                               * (mbd['OPP_PTS'] / mbd['TEAM_DEFENSIVE_LOSSES'])))
                             #/ mbd['pace_factor'])
    mbd['DEFENSIVE_LOSSES_48'] = (mbd['DEFENSIVE_LOSSES'] / mbd['MP'] * 48)
    mbd['DEFENSIVE_WIN_PCT'] = (mbd['DEFENSIVE_WINS']/(mbd['DEFENSIVE_WINS'] + mbd['DEFENSIVE_LOSSES']))
    
    mbd.to_csv(f"C:\\Users\gsteele\other\MATCHUP_BASED_DEFENSE\\{season}_advanced_defense.csv")

In [12]:
calculate_matchup_based_defense(this_year)

for s in ['2013-14','2014-15','2015-16','2016-17','2017-18','2018-19','2019-20']:
    time.sleep(5)
    calculate_matchup_based_defense(s)

In [13]:
path = r'C:\Users\gsteele\Other\MATCHUP_BASED_DEFENSE\\'
all_files = glob.glob(path + "/*.csv")

csv_list = []

for filename in all_files:    
    df = pd.read_csv(filename, index_col=None, header=0)
    csv_list.append(df)

frame = pd.concat(csv_list, axis=0, ignore_index=True)
frame.fillna('', inplace = True)

In [14]:
frame['SEASON_ID'] = np.where(frame['SEASON_ID'] == 22019,'2019-20',frame['SEASON_ID'])
frame['SEASON_ID'] = np.where(frame['SEASON_ID'] == 22020,'2020-21',frame['SEASON_ID'])
frame = frame.replace([np.inf, -np.inf], np.nan)
frame = frame.fillna('')
team_frame = frame[['DEF_TEAM_ID','DEF_TEAM_ABBREVIATION']].drop_duplicates()
team_frame = team_frame[team_frame['DEF_TEAM_ABBREVIATION'] !='']
frame = frame.drop(['DEF_TEAM_ABBREVIATION'], axis = 1)
frame = frame.merge(right = team_frame, how = 'inner', on = ['DEF_TEAM_ID'])

In [15]:
spread_client = gspread.service_account(filename='C:\\Users\gsteele\Other\loyal-skill-297317-d353a7b193db.json')
sheet = spread_client.open('DEFENSE')
worksheet = sheet.worksheet("D WINS")
worksheet.update([frame.columns.values.tolist()] + frame.values.tolist())

{'spreadsheetId': '1a4XVlWRgYkyJLVEilAw6oT9F9yE2OmY3Py0kUTFLANI',
 'updatedRange': "'D WINS'!A1:CV4472",
 'updatedRows': 4472,
 'updatedColumns': 100,
 'updatedCells': 447200}

In [20]:
def calculate_old_matchup_based_defense(season):
    
    team_totals = scrape_team_totals(season)
    player_totals = scrape_player_totals(season)
    per_game = scrape_per_game(season)
    opponent = scrape_opponent(season)

    matchup_data = pd.read_csv(f"C:\\Users\gsteele\Other\\{season}_matchups.csv")
    
    #Calculate two-pointers
    per_game.insert(loc = 16, column = 'FG2M', value = (per_game.FGM - per_game.FG3M))
    per_game.insert(loc = 17, column = 'FG2A', value = (per_game.FGA - per_game.FG3A))
    per_game.insert(loc = 18, column = 'FG2_PCT', value = (per_game.FG2M / per_game.FG2A))

    #Caclulate shooting eficiency
    per_game.insert(loc = 22, column = 'eFG', value = ((per_game.FGM + (0.5 * per_game.FG3M)) / per_game.FGA))
    per_game.insert(loc = 23, column = 'TS', value = (per_game.PTS / (2 * (per_game.FGA + (0.44 * per_game.FTA))))) 


    per_game = per_game[['PLAYER_ID','PLAYER_NAME','Season','TEAM_ID','TEAM_ABBREVIATION','GP','MIN','PTS','FGM',
                         'FGA','FG_PCT','FG2M','FG2A','FG2_PCT','FG3M','FG3A','FG3_PCT','FTM','FTA','FT_PCT','eFG','TS',
                         'OREB','DREB','REB','AST','TOV','STL','BLK','BLKA','PF','PLUS_MINUS']]

    #Rename columns
    matchup_data = matchup_data.rename(columns = {"FGM":"MATCHUP_FGM","FGA":"MATCHUP_FGA",
                                                 "FG3M":"MATCHUP_FG3M","FG3A":"MATCHUP_FG3A","TOV":"MATCHUP_TOV",
                                                 "FTM":"MATCHUP_FTM","POSS":"PARTIAL_POSS","BLK":"MATCHUP_BLK"})
    matchup_data['MATCHUP_FTA'] = matchup_data['SFL'] * 2
    
    #Calculate two-pointers
    matchup_data.insert(loc = 16, column = 'MATCHUP_FG2M', value = (matchup_data.MATCHUP_FGM - matchup_data.MATCHUP_FG3M))
    matchup_data.insert(loc = 17, column = 'MATCHUP_FG2A', value = (matchup_data.MATCHUP_FGA - matchup_data.MATCHUP_FG3A))
    matchup_data.insert(loc = 18, column = 'MATCHUP_FG2_PCT', 
                        value = np.where(matchup_data.MATCHUP_FG2A.astype(float) == 0,
                                         0,
                                            (matchup_data.MATCHUP_FG2M.astype(float) / matchup_data.MATCHUP_FG2A.astype(float))))

    #Caclulate shooting eficiency
    matchup_data.insert(loc = 22, column = 'MATCHUP_eFG', 
                value = np.where(matchup_data['MATCHUP_FGA'].astype(float) == 0,
                    0,
                        ((matchup_data.MATCHUP_FGM.astype(float) + (0.5 * matchup_data.MATCHUP_FG3M.astype(float))) 
                         / matchup_data.MATCHUP_FGA.astype(float))))
    matchup_data.insert(loc = 23, column = 'MATCHUP_TS', 
                        value = (matchup_data.PLAYER_PTS.astype(float) 
                        / (2 * (matchup_data.MATCHUP_FGA.astype(float) + (0.44 * matchup_data['MATCHUP_FTA'].astype(float)))))) 

    df = matchup_data.merge(right = per_game, how = 'inner', left_on = 'OFF_PLAYER_ID', right_on = 'PLAYER_ID')

    df.insert(loc = 63, column = 'weighted_difficulty', value = df['PTS']*df['PARTIAL_POSS'])
    df.insert(loc = 64, column = 'total_possessions', value = df.groupby(['DEF_PLAYER_ID'])['PARTIAL_POSS'].transform('sum'))
    df.insert(loc = 65, column = 'total_weight', value = df.groupby(['DEF_PLAYER_ID'])['weighted_difficulty'].transform('sum'))
    df.insert(loc = 66, column = 'defensive_load', value = df['total_weight']/df['total_possessions'])
    
    df = df.rename(columns = {"GP":"GP_y"})
    df.insert(loc = 67, column = 'offensive_player_possessions', value = df.groupby(['OFF_PLAYER_ID'])['PARTIAL_POSS'].transform('sum'))
    df.insert(loc = 68, column = 'off_pts_poss', value = (df.PTS / (df.offensive_player_possessions / df.GP_y)))
    df.insert(loc = 69, column = 'weighted_opp_pts_poss', value = df.off_pts_poss * df.PARTIAL_POSS)
    df.insert(loc = 70, column = 'total_expected_opp_pts', value = df.groupby('DEF_PLAYER_ID')['weighted_opp_pts_poss'].transform('sum'))

    df.insert(loc = 71, column = 'points_allowed', value = df.groupby(['DEF_PLAYER_ID'])['PLAYER_PTS'].transform('sum'))
    df.insert(loc = 72, column = 'effectiveness', value = df.total_expected_opp_pts / df.points_allowed)

    #teams = df[['PLAYER_ID','TEAM_ID','TEAM_ABBREVIATION']].drop_duplicates()
    #teams = teams.rename(columns = {'PLAYER_ID':'DEFENDER_ID', 'TEAM_ID':'DEF_TEAM_ID','TEAM_ABBREVIATION':'DEF_TEAM_NAME'})
    #df2 = df.merge(right = teams, how = 'left', left_on = ['DEF_PLAYER_ID'], right_on = ['DEFENDER_ID'])
    
    #Removed preceding because it is unnecessary for older format
    df2 = df
    
    df2.insert(loc = 75, column = 'team_total_possessions', value = df2.groupby(['DEF_TEAM_ID'])['PARTIAL_POSS'].transform('sum'))
    df2.insert(loc = 76, column = 'team_total_weight', value = df2.groupby(['DEF_TEAM_ID'])['weighted_difficulty'].transform('sum'))
    df2.insert(loc = 77, column = 'team_defensive_load', value = df2['team_total_weight']/df2['team_total_possessions'])

    df2.insert(loc = 78, column = 'relative_load', value = df2.defensive_load / df2.team_defensive_load)
    df2.insert(loc = 79, column = 'load_adj_effectiveness', value = df2.effectiveness * df2.relative_load)
    df2.insert(loc = 80, column = 'team_opp_fga', value = df2.groupby(['DEF_TEAM_ID'])['MATCHUP_FGA'].transform('sum'))
    df2.insert(loc = 80, column = 'team_opp_fgm', value = df2.groupby(['DEF_TEAM_ID'])['MATCHUP_FGM'].transform('sum'))

    team_blocks = team_totals[['TEAM_ID','BLK']]
    df3 = df2.merge(right = team_blocks, how = 'left', left_on = 'DEF_TEAM_ID', right_on = 'TEAM_ID')
    df3 = df3.rename(columns = {'BLK_y':'team_blk'})
    df3 = df3.drop(['TEAM_ID_y'], axis = 1)
    df3.insert(loc = 84, column = 'team_direct_blk', value = df3.groupby(['DEF_TEAM_ID'])['MATCHUP_BLK'].transform('sum'))
    df3.insert(loc = 85, column = 'team_help_blk', value = df3.team_blk - df3.team_direct_blk)
    df3.insert(loc = 86, column = 'direct_blk', value = df3.groupby(['DEF_PLAYER_ID'])['MATCHUP_BLK'].transform('sum'))
    
    #HELP_BLK needs to be summed
    df3['HELP_BLK'] = df3.groupby(['DEF_PLAYER_ID'])['HELP_BLK'].transform('sum')

    totals = scrape_player_totals(season)
    player_blk = totals[['PLAYER_ID','BLK','STL']].drop_duplicates()
    player_blk = player_blk.rename(columns = {'STL':'DEFENDER_STL'})
    df4 = df3.merge(right = player_blk, how = 'left', left_on = ['DEF_PLAYER_ID'], right_on = ['PLAYER_ID'])
    df4.insert(loc = 89, column = 'help_blk', value = df4.BLK - df4.direct_blk)

    eff = df4[['DEF_PLAYER_ID','DEF_TEAM_ID','load_adj_effectiveness']].drop_duplicates()
    eff.insert(loc = 3, column = 'team_load_adj_effectiveness', 
               value = eff.groupby(['DEF_TEAM_ID'])['load_adj_effectiveness'].transform('sum'))
    eff = eff[['DEF_TEAM_ID','team_load_adj_effectiveness']].drop_duplicates()

    df5 = df4.merge(right = eff, how = 'left', left_on = 'DEF_TEAM_ID', right_on = 'DEF_TEAM_ID')
    df5.insert(loc = 91, column = 'opp_pts_per_fgm', value = ((df5.PTS - df5.FTM) / df5.FGM))
    df5.insert(loc = 92, column = 'weighted_opp_pts_per_fgm', value = df5.opp_pts_per_fgm * df.PARTIAL_POSS)
    df5.insert(loc = 93, column = 'agg_opp_pts_per_fgm', value = df5.groupby(['DEF_PLAYER_ID'])['weighted_opp_pts_per_fgm'].transform('sum'))

    #changed df5.team_blk to df5.team_help_blk below to avoid double-crediting shotblockers for blocking their own man's shot
    df5.insert(loc = 92, column = 'missed_fg_points_saved', 
               value = (
                   (df5.load_adj_effectiveness / df5.team_load_adj_effectiveness)
                   * (df5.total_possessions / df5.team_total_possessions)
                   * (df5.team_opp_fga - df5.team_opp_fgm - df5.team_help_blk)  #Here is the change
                   * (df5.agg_opp_pts_per_fgm / (df5.total_possessions / 5))
               )
              )
    
    #Changed df5.BLK to df5.HELP_BLK below to avoid double crediting shot blockers for blocking their own man's shot 
    df5.insert(loc = 95, column = 'total_shooting_defense', 
               value = (df5.missed_fg_points_saved + ((df5.agg_opp_pts_per_fgm / df5.total_possessions) * df5.HELP_BLK)))

    
    #print((df5['missed_fg_points_saved'] - df5['total_shooting_defense']).drop_duplicates())
    
    hustle = scrape_hustle_stats(season)
    hustle = hustle.drop(columns = ['TEAM_ID','TEAM_ABBREVIATION']).drop_duplicates()
    #This line is attempting to resolve the problem occasioned by nulls for DEF_LOOSE_BALLS_RECOVERED
    hustle = hustle.fillna(0)
    
    df6 = df5.merge(right = hustle, how = 'left', left_on = 'DEF_PLAYER_ID', right_on = 'PLAYER_ID')
    df6['DEFENDER_ID'] = df6['DEF_PLAYER_ID']
    
    #
    df6 = df6.fillna(0)
    
    df6.insert(loc = 96, column = 'solo_steals', value = df6.DEFENDER_STL - df6.DEF_LOOSE_BALLS_RECOVERED)
    df6.insert(loc = 97, column = 'ind_exp_opp_tov', value = ((df6.TOV / (df.offensive_player_possessions / df.GP_y))*df6.PARTIAL_POSS))
    df6.insert(loc = 98, column = 'total_exp_opp_tov', value = df6.groupby(['DEFENDER_ID'])['ind_exp_opp_tov'].transform('sum'))
    df6.insert(loc = 99, column = 'ind_opp_tov', value = df6.groupby(['DEFENDER_ID'])['MATCHUP_TOV'].transform('sum'))
    df6.insert(loc = 100, column = 'ooto', value = df6.ind_opp_tov - df6.solo_steals - df6.CHARGES_DRAWN)

    team_ooto = df6[['DEF_TEAM_ID','DEF_PLAYER_ID','ooto']].drop_duplicates()
    team_ooto.insert(loc = 3, column = 'team_ooto', value = team_ooto.groupby(['DEF_TEAM_ID'])['ooto'].transform('sum'))
    ooto = team_ooto.drop(columns = ['DEF_PLAYER_ID','ooto']).drop_duplicates()

    df7 = df6.merge(right = ooto, how = 'left', left_on = 'DEF_TEAM_ID', right_on = 'DEF_TEAM_ID')
    df7.insert(loc = 102, column = 'player:team_ooto', value = df7.ooto /df7.team_ooto)
    df7.insert(loc = 101, column = 'player:expected_ooto', value = df7.ooto /df7.total_exp_opp_tov)
    df7.insert(loc = 104, column = 'ooto_factor', value = df7['player:team_ooto'] * df7['player:expected_ooto'])
    df7.insert(loc = 105, column = 'team_ooto_factor', value = 
    df7[['DEF_PLAYER_ID','DEF_TEAM_ID','ooto_factor']].drop_duplicates().groupby(['DEF_TEAM_ID'])['ooto_factor'].transform('sum')
              )
    df7.insert(loc = 106, column = 'other_tov_forced', value = ((df7.ooto_factor / df7.team_ooto_factor) * df7.team_ooto))
    df7.insert(loc = 107, column = 'tov_forced', value = df7.CHARGES_DRAWN + df7.other_tov_forced + df7.solo_steals)
    df7.insert(loc = 108, column = 'adjusted_shooting_defense', value = 0.744 * df7.total_shooting_defense)
    df7.insert(loc = 109, column = 'points_saved', value = df7.adjusted_shooting_defense + df7.tov_forced)
    df7.insert(loc = 109, column = 'ShootingDefper100', value = ((df7.adjusted_shooting_defense / df7.total_possessions) * 100))
    df7.insert(loc = 109, column = 'TotalDefenseper100', value = ((df7.points_saved / df7.total_possessions) * 100))
    df7.insert(loc = 109, column = 'NonShootingDefper100', value = ((df7.tov_forced / df7.total_possessions) * 100))
    
    #Change, 'DEF_TEAM_NAME' was no longer present
    df8 = df7[['DEF_PLAYER_NAME','SEASON_ID','defensive_load','relative_load','effectiveness',
               'load_adj_effectiveness','tov_forced','CHARGES_DRAWN','solo_steals','G','total_possessions',
               'adjusted_shooting_defense','ShootingDefper100','points_saved','TotalDefenseper100','NonShootingDefper100',
              'DEF_PLAYER_ID','DEF_TEAM_ID','points_allowed']].drop_duplicates()   #added points allowed
    df8 = df8.dropna()

    #Manipulate pace
    pace = scrape_pace(season)
    pace.insert(loc = 1, column = 'weight', value = pace.MIN * pace.PACE)
    pace.insert(loc = 1, column = 'agg_weight', value = (pace['weight'].sum() / (30 * pace['MIN'].mean())))
    pace.insert(loc = 1, column = 'pace_factor', value = pace.PACE / pace.agg_weight)
    pace = pace[['TEAM_ID','pace_factor']]

    #Merge to df8
    mbd = df8.merge(right = pace, how = 'left', left_on = 'DEF_TEAM_ID', right_on = 'TEAM_ID')
    mbd = mbd.rename(columns = {'DEF_TEAM_NAME':'DEF_TEAM_ABBREVIATION','load_adj_efectiveness':'loadadjustedeffectiveness',
                         'tov_forced':'Non_Shooting_Defense','solo_steals':'solosteals','total_possessions':'totposs',
                         'adjusted_shooting_defense':'ShootingDefense','points_saved':'TotalDefense'})
    mbd['Non_Shooting_Defense'].update(mbd.Non_Shooting_Defense / mbd.pace_factor)
    mbd['ShootingDefense'].update(mbd.ShootingDefense / mbd.pace_factor)    
    mbd['TotalDefense'].update(mbd.TotalDefense / mbd.pace_factor)

    mbd = mbd.drop(['TEAM_ID'], axis = 1)

    previous_seasons = pd.read_csv("C:\\Users\gsteele\Other\matchup_based_defense.csv")
    previous_seasons['SEASON_ID'] = np.where(previous_seasons['SEASON_ID'] == '22019', '2019-20', previous_seasons['SEASON_ID'])
    concatenator = [previous_seasons,mbd]
    matchup_based_defense = pd.concat(concatenator)

    opponent = scrape_opponent(season)

    opponent['LEAGUE_PTS'] = opponent['OPP_PTS'].sum()
    opponent['LEAGUE_FGA'] = opponent['OPP_FGA'].sum()
    opponent['LEAGUE_TOV'] = opponent['OPP_TOV'].sum()
    opponent['LEAGUE_FTA'] = opponent['OPP_FTA'].sum()

    mbd = mbd.merge(right = opponent, how = 'left', left_on = 'DEF_TEAM_ID', right_on = 'TEAM_ID')
    mbd['TEAM_POINTS_SAVED'] = mbd.groupby(['DEF_TEAM_ID'])['TotalDefense'].transform('sum')
    
    #Tacking on player minutes
    p = scrape_player_totals(season)[['PLAYER_ID','MIN']]
    p = p.rename(columns = {"PLAYER_ID":"PID","MIN":"MP"})
    mbd = mbd.merge(right = p, how = 'left', left_on = 'DEF_PLAYER_ID', right_on = 'PID')
    mbd['POINTS_SAVED_48'] = (mbd['TotalDefense'] / mbd['MP'] * 48)
    #mbd['TEAM_MIN'] = mbd.groupby(['DEF_TEAM_ID'])['MIN'].transform('sum')
    #Changed to add ' / 5'
    mbd['TEAM_POINTS_SAVED_48'] = ((mbd['TEAM_POINTS_SAVED'] / mbd['MIN'] * 48)/5)

    #Handling edge cases with few possessions and non-shooting defense > 0
    mbd['TotalDefense'] = np.where(((mbd['TotalDefense'] > mbd['ShootingDefense']) & (mbd['totposs'] < 100)),
        mbd['ShootingDefense'], mbd['TotalDefense'])
    mbd['POINTS_SAVED_48'] = (mbd['TotalDefense'] / mbd['MP'] * 48)
    ##################
    
    team_totals = team_totals.rename(columns = {"TEAM_ID":"TEAM_ID_NUMBER"})
    team_totals = team_totals['TEAM_ID_NUMBER']
    mbd = mbd.merge(right = team_totals, how = 'left', left_on = 'DEF_TEAM_ID', right_on = 'TEAM_ID_NUMBER')
    mbd['TEAM_DEFENSE'] = (mbd['OPP_PTS'] / (mbd['OPP_FGA'] + mbd['OPP_TOV'] + (0.44 * mbd['OPP_FTA'])))
    mbd['LEAGUE_DEFENSE'] = (mbd['LEAGUE_PTS'] / (mbd['LEAGUE_FGA'] + mbd['LEAGUE_TOV'] + (0.44 * mbd['LEAGUE_FTA'])))
    mbd['TEAM_DEFENSIVE_WINS'] = ((mbd['LEAGUE_DEFENSE'] / mbd['TEAM_DEFENSE']) * (mbd['W']/2))
    
    #Next two changed
    mbd['DEFENSIVE_WINS'] = (mbd['TotalDefense'] / 
                               ((1 / (mbd['POINTS_SAVED_48'] / mbd['TEAM_POINTS_SAVED_48']))
                               * (mbd['TEAM_POINTS_SAVED'] / mbd['TEAM_DEFENSIVE_WINS'])))
                             #/ mbd['pace_factor'])
    mbd['DEFENSIVE_WINS_48'] = (mbd['DEFENSIVE_WINS'] / mbd['MP'] * 48)
    ###
                                
    mbd['Non_Shooting_Defense'].update(mbd.Non_Shooting_Defense / mbd.pace_factor)
    mbd['ShootingDefense'].update(mbd.ShootingDefense / mbd.pace_factor)    
    mbd['TotalDefense'].update(mbd.TotalDefense / mbd.pace_factor)

    #Defensive Losses
    mbd['TEAM_DEFENSIVE_LOSSES'] = ((mbd['TEAM_DEFENSE'] / mbd['LEAGUE_DEFENSE']) * (mbd['L']/2))
    mbd['TEAM_PTS_ALL_48'] = ((mbd['OPP_PTS'] / mbd['MIN'] * 48)/5)
    mbd['PTS_ALL_48'] = mbd['points_allowed'] / mbd['MP'] * 48
    mbd['DEFENSIVE_LOSSES'] = (mbd['points_allowed'] / 
                               ((1 / (mbd['PTS_ALL_48'].astype(float) / mbd['TEAM_PTS_ALL_48'].astype(float)))
                               * (mbd['OPP_PTS'] / mbd['TEAM_DEFENSIVE_LOSSES'])))
                             #/ mbd['pace_factor'])
    mbd['DEFENSIVE_LOSSES_48'] = (mbd['DEFENSIVE_LOSSES'] / mbd['MP'] * 48)
    mbd['DEFENSIVE_WIN_PCT'] = (mbd['DEFENSIVE_WINS']/(mbd['DEFENSIVE_WINS'] + mbd['DEFENSIVE_LOSSES']))
    
    
    mbd.to_csv(f"C:\\Users\gsteele\other\MATCHUP_BASED_DEFENSE\{season}_advanced_defense.csv")

    #return mbd

In [None]:
#df[df['SEASON_ID'] == '2013-14'].to_csv(f"C:\\Users\gsteele\Other\\2013-14_matchups.csv")
#df[df['SEASON_ID'] == '2014-15'].to_csv(f"C:\\Users\gsteele\Other\\2014-15_matchups.csv")
#df[df['SEASON_ID'] == '2015-16'].to_csv(f"C:\\Users\gsteele\Other\\2015-16_matchups.csv")
#df[df['SEASON_ID'] == '2016-17'].to_csv(f"C:\\Users\gsteele\Other\\2016-17_matchups.csv")
#df[df['SEASON_ID'] == '2017-18'].to_csv(f"C:\\Users\gsteele\Other\\2017-18_matchups.csv")
#df[df['SEASON_ID'] == '2018-19'].to_csv(f"C:\\Users\gsteele\Other\\2018-19_matchups.csv")

In [None]:
#df = pd.read_csv("C:\\Users\gsteele\Other\old_matchups.csv", low_memory = False)

In [None]:
#for season in ['2013-14','2014-15','2015-16','2016-17','2017-18','2018-19','2019-20']:
#    time.sleep(3)
#    calculate_matchup_based_defense(season)

In [21]:
for season in ['2013-14','2014-15','2015-16','2016-17','2017-18','2018-19']:
    time.sleep(3)
    calculate_old_matchup_based_defense(season)