In [6]:
import pandas as pd
import requests
import time
import numpy as np
import glob
import bs4
import pypyodbc as podbc

In [7]:
#pandas params
pd.set_option('display.max_rows', None)
pd.set_option('display.min_rows', 200)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

scrape_per_game is a simple function to scrape per-game values for a given season

In [8]:
#Function to scrape per-game values
def scrape_per_game(season):
    headers = {
'Host': 'stats.nba.com',
'Connection': 'keep-alive',
'Accept': 'application/json, text/plain, */*',
'x-nba-stats-token': 'true',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'x-nba-stats-origin': 'stats',
'Referer': 'https://www.nba.com/',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9'}

    url = f"https://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=PerGame&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision=&Weight="
    r = requests.get(url, headers=headers).json()

    df = pd.DataFrame(r['resultSets'][0]['rowSet'], columns = r['resultSets'][0]['headers'])
    df['Season'] = season
    return df

Create per_game, a DataFrame to hold per-game values for relevant seasons

In [9]:
#Combine relevant per-game values from past two seasons
frame_list=[]
for season in ('2013-14','2014-15','2015-16','2016-17','2017-18','2018-19','2019-20'):
    time.sleep(np.random.randint(0,5 + 1))
    df = scrape_per_game(season)
    frame_list.append(df)

per_game = pd.concat(frame_list)

#Calculate two-pointers
per_game.insert(loc = 16, column = 'FG2M', value = (per_game.FGM - per_game.FG3M))
per_game.insert(loc = 17, column = 'FG2A', value = (per_game.FGA - per_game.FG3A))

#DraftKings average
per_game.insert(loc = 1, column = 'draftkings', value = (
        (per_game.FG3M*3.5)+(per_game.FG2M*2)+(per_game.FTM)+(per_game.REB*1.25)+(per_game.BLK*2)+(per_game.STL*2)
        +(per_game.TOV*(-0.5))+(per_game.AST*1.5)
        )
)
#FanDuel average
per_game.insert(loc = 1, column = 'fanduel', value = (
        (per_game.FG3M*3)+(per_game.FG2M*2)+(per_game.FTM)+(per_game.REB*1.2)+(per_game.BLK*2)+(per_game.STL*2)
        +(per_game.TOV*(-1))+(per_game.AST*1.5)
        )
)

per_game = per_game[['PLAYER_ID','PLAYER_NAME','Season','fanduel','draftkings','TEAM_ID','TEAM_ABBREVIATION','GP','MIN','PTS','FGM',
                     'FGA','FG2M','FG2A','FG3M','FG3A','FG_PCT','FG3_PCT','FTM','FTA','FT_PCT','OREB','DREB','REB','AST','TOV',
                    'STL','BLK','PF','PFD','DD2','TD3']]

# Update matchup data

The following function pulls down matchup data for a single in the current season, then concatenates them into a single DataFrame

In [10]:
def scrape_matchup_data(teamid):
    headers = {
'Host': 'stats.nba.com',
'Connection': 'keep-alive',
'Accept': 'application/json, text/plain, */*',
'x-nba-stats-token': 'true',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'x-nba-stats-origin': 'stats',
'Referer': f"https://stats.nba.com/team/{teamid}/matchups/",
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9'}

    url = f"https://stats.nba.com/stats/leagueseasonmatchups?DateFrom=&DateTo=&DefTeamID={teamid}&LeagueID=00&Outcome=&PORound=0&PerMode=Totals&Season=2019-20&SeasonType=Regular+Season"""
    r = requests.get(url, headers=headers).json()
    df = pd.DataFrame(r['resultSets'][0]['rowSet'], columns = r['resultSets'][0]['headers'])
    return df

csv_list = []
for team_id in range (1610612737,1610612768):
    time.sleep(np.random.randint(0,5 + 1))
    frame = scrape_matchup_data(team_id)
    csv_list.append(frame)
    
new_matchups = pd.concat(csv_list)

Combine current season's matchup data with previous data

In [11]:
#Import matchup data
old_matchups = pd.read_csv(r'c:\users\gsteele\Other\matchup_data.csv', low_memory = False)
matchups = [new_matchups,old_matchups]
matchups_df = pd.concat(matchups)

#Handle season naming conventions
matchups_df.loc[matchups_df['SEASON_ID'] == '22017', 'SEASON_ID'] = '2017-18'
matchups_df.loc[matchups_df['SEASON_ID'] == '22018', 'SEASON_ID'] = '2018-19'
matchups_df.loc[matchups_df['SEASON_ID'] == '22019', 'SEASON_ID'] = '2019-20'

#Calculate FG2M and FG2A
matchups_df.insert(loc = 17, column = 'MATCHUP_FG2M', value = (matchups_df['MATCHUP_FGM'] - matchups_df['MATCHUP_FG3M']))
matchups_df.insert(loc = 17, column = 'MATCHUP_FG2A', value = (matchups_df['MATCHUP_FGA'] - matchups_df['MATCHUP_FG3M']))

In [12]:
#Join per-game with matchup data
matchups2 = pd.merge(left = per_game, right = matchups_df, 
                     how = 'left', left_on = ['PLAYER_ID','Season'], right_on = ['OFF_PLAYER_ID','SEASON_ID'])

In [13]:
#Determining total possessions and possessions per game for offensive player
matchups2.insert(loc = 7, column = 'total_possessions', value = 
                 (matchups2.groupby(['PLAYER_ID','Season'])['PARTIAL_POSS'].transform('sum')
                 )
                )

matchups2.insert(loc = 8, column = 'poss_per_game', value = (matchups2['total_possessions']/(matchups2['GP_x'])))

In [14]:
#Calculate per-possession values for offensive player
matchups2.insert(loc = 9, column = 'fanduel_poss', value = (matchups2['fanduel']/matchups2['poss_per_game']))
matchups2.insert(loc = 10, column = 'draftkings_poss', value = (matchups2['draftkings']/matchups2['poss_per_game']))
matchups2.insert(loc = 11, column = 'PTS_poss', value = (matchups2['PTS']/matchups2['poss_per_game']))
matchups2.insert(loc = 12, column = 'FGM_poss', value = (matchups2['FGM']/matchups2['poss_per_game']))
matchups2.insert(loc = 13, column = 'FGA_poss', value = (matchups2['FGA']/matchups2['poss_per_game']))
matchups2.insert(loc = 14, column = 'REB_poss', value = (matchups2['REB']/matchups2['poss_per_game']))
matchups2.insert(loc = 15, column = 'OREB_poss', value = (matchups2['OREB']/matchups2['poss_per_game']))
matchups2.insert(loc = 16, column = 'DREB_poss', value = (matchups2['DREB']/matchups2['poss_per_game']))
matchups2.insert(loc = 17, column = 'AST_poss', value = (matchups2['AST']/matchups2['poss_per_game']))
matchups2.insert(loc = 18, column = 'TOV_poss', value = (matchups2['TOV']/matchups2['poss_per_game']))
matchups2.insert(loc = 19, column = 'STL_poss', value = (matchups2['STL']/matchups2['poss_per_game']))
matchups2.insert(loc = 20, column = 'BLK_poss', value = (matchups2['BLK']/matchups2['poss_per_game']))
matchups2.insert(loc = 21, column = 'FTM_poss', value = (matchups2['FTM']/matchups2['poss_per_game']))
matchups2.insert(loc = 22, column = 'FTA_poss', value = (matchups2['FTA']/matchups2['poss_per_game']))
matchups2.insert(loc = 21, column = 'FG3M_poss', value = (matchups2['FG3M']/matchups2['poss_per_game']))
matchups2.insert(loc = 22, column = 'FG3A_poss', value = (matchups2['FG3A']/matchups2['poss_per_game']))
matchups2.insert(loc = 21, column = 'FG2M_poss', value = (matchups2['FG2M']/matchups2['poss_per_game']))
matchups2.insert(loc = 22, column = 'FG2A_poss', value = (matchups2['FG2A']/matchups2['poss_per_game']))

#Drop matchups with zero possessions
matchups2 = matchups2[matchups2['PARTIAL_POSS'] != 0].copy()

In [15]:
#Calculate fantasy points for each individual matchups
#DraftKings
matchups2.insert(loc = 1, column = 'MATCHUP_draftkings', value = (
        (matchups2.MATCHUP_FG3M*3.5)+(matchups2.MATCHUP_FG2M*2)+(matchups2.MATCHUP_FTM)+
        ((matchups2.REB_poss*matchups2.PARTIAL_POSS)*1.25)+
        ((matchups2.BLK_poss*matchups2.PARTIAL_POSS)*2)+
        ((matchups2.STL_poss*matchups2.PARTIAL_POSS)*2)+
        (matchups2.MATCHUP_TOV*(-0.5))+(matchups2.MATCHUP_AST*1.5)
        )
)
#FanDuel
matchups2.insert(loc = 1, column = 'MATCHUP_fanduel', value = (
        (matchups2.MATCHUP_FG3M*3)+(matchups2.MATCHUP_FG2M*2)+(matchups2.MATCHUP_FTM)+
        ((matchups2.REB_poss*matchups2.PARTIAL_POSS)*1.2)+
        ((matchups2.BLK_poss*matchups2.PARTIAL_POSS)*2)+
        ((matchups2.STL_poss*matchups2.PARTIAL_POSS)*2)+
        (matchups2.MATCHUP_TOV*(-1))+(matchups2.MATCHUP_AST*1.5)
        )
)


In [None]:
matchups2.insert(loc = 9, column = 'MATCHUP_fanduel_poss', 
                 value = ((matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_fanduel'].transform('sum'))
                 /(matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['PARTIAL_POSS'].transform('sum'))))
matchups2.insert(loc = 9, column = 'MATCHUP_draftkings_poss', 
                 value = ((matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_draftkings'].transform('sum'))
                 /(matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['PARTIAL_POSS'].transform('sum'))))
matchups2.insert(loc = 9, column = 'MATCHUP_PTS_poss', 
                 value = ((matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['PLAYER_PTS'].transform('sum'))
                 /(matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['PARTIAL_POSS'].transform('sum'))))
matchups2.insert(loc = 9, column = 'MATCHUP_FGM_poss', 
                 value = ((matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FGM'].transform('sum'))
                 /(matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['PARTIAL_POSS'].transform('sum'))))
matchups2.insert(loc = 9, column = 'MATCHUP_FGA_poss', 
                 value = ((matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FGA'].transform('sum'))
                 /(matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['PARTIAL_POSS'].transform('sum'))))
matchups2.insert(loc = 9, column = 'MATCHUP_FG2M_poss', 
                 value = ((matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FG2M'].transform('sum'))
                 /(matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['PARTIAL_POSS'].transform('sum'))))
matchups2.insert(loc = 9, column = 'MATCHUP_FG2A_poss', 
                 value = ((matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FG2A'].transform('sum'))
                 /(matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['PARTIAL_POSS'].transform('sum'))))
matchups2.insert(loc = 9, column = 'MATCHUP_FG3M_poss', 
                 value = ((matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FG3M'].transform('sum'))
                 /(matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['PARTIAL_POSS'].transform('sum'))))
matchups2.insert(loc = 9, column = 'MATCHUP_FG3A_poss', 
                 value = ((matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FG3A'].transform('sum'))
                 /(matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['PARTIAL_POSS'].transform('sum'))))
matchups2.insert(loc = 9, column = 'MATCHUP_FTM_poss', 
                 value = ((matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FTM'].transform('sum'))
                 /(matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['PARTIAL_POSS'].transform('sum'))))
matchups2.insert(loc = 9, column = 'MATCHUP_FTA_poss', 
                 value = ((matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FTA'].transform('sum'))
                 /(matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['PARTIAL_POSS'].transform('sum'))))
matchups2.insert(loc = 9, column = 'MATCHUP_AST_poss', 
                 value = ((matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_AST'].transform('sum'))
                 /(matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['PARTIAL_POSS'].transform('sum'))))
matchups2.insert(loc = 9, column = 'MATCHUP_TOV_poss', 
                 value = ((matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_TOV'].transform('sum'))
                 /(matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['PARTIAL_POSS'].transform('sum'))))

In [17]:
#DO NOT RUN
#Recalculating above

#Calculate per-possession values for each matchup
matchups2.insert(loc = 9, column = 'MATCHUP_fanduel_poss', 
                 value = ((matchups2.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_fanduel'].transform('sum'))
                 /matchups2['total_possessions']))
matchups2.insert(loc = 10, column = 'MATCHUP_draftkings_poss', value = (matchups2['MATCHUP_draftkings']/matchups2['PARTIAL_POSS']))
matchups2.insert(loc = 11, column = 'MATCHUP_PTS_poss', value = (matchups2['PLAYER_PTS']/matchups2['PARTIAL_POSS']))
matchups2.insert(loc = 12, column = 'MATCHUP_FGM_poss', value = (matchups2['MATCHUP_FGM']/matchups2['PARTIAL_POSS']))
matchups2.insert(loc = 13, column = 'MATCHUP_FGA_poss', value = (matchups2['MATCHUP_FGA']/matchups2['PARTIAL_POSS']))
matchups2.insert(loc = 17, column = 'MATCHUP_AST_poss', value = (matchups2['MATCHUP_AST']/matchups2['PARTIAL_POSS']))
matchups2.insert(loc = 18, column = 'MATCHUP_TOV_poss', value = (matchups2['MATCHUP_TOV']/matchups2['PARTIAL_POSS']))
matchups2.insert(loc = 20, column = 'MATCHUP_BLK_poss', value = (matchups2['MATCHUP_BLK']/matchups2['PARTIAL_POSS']))
matchups2.insert(loc = 21, column = 'MATCHUP_FTM_poss', value = (matchups2['MATCHUP_FTM']/matchups2['PARTIAL_POSS']))
matchups2.insert(loc = 22, column = 'MATCHUP_FTA_poss', value = (matchups2['MATCHUP_FTA']/matchups2['PARTIAL_POSS']))
matchups2.insert(loc = 21, column = 'MATCHUP_FG3M_poss', value = (matchups2['MATCHUP_FG3M']/matchups2['PARTIAL_POSS']))
matchups2.insert(loc = 22, column = 'MATCHUP_FG3A_poss', value = (matchups2['MATCHUP_FG3A']/matchups2['PARTIAL_POSS']))
matchups2.insert(loc = 21, column = 'MATCHUP_FG2M_poss', value = (matchups2['MATCHUP_FG2M']/matchups2['PARTIAL_POSS']))
matchups2.insert(loc = 22, column = 'MATCHUP_FG2A_poss', value = (matchups2['MATCHUP_FG2A']/matchups2['PARTIAL_POSS']))


#Matchup_rebounding not supported in current data reporting
#matchups2.insert(loc = 14, column = 'MATCHUP_REB_poss', value = (matchups2['MATCHUP_REB']/matchups2['PARTIAL_POSS']))
#matchups2.insert(loc = 15, column = 'MATCHUP_OREB_poss', value = (matchups2['MATCHUP_OREB']/matchups2['PARTIAL_POSS']))
#matchups2.insert(loc = 16, column = 'MATCHUP_DREB_poss', value = (matchups2['MATCHUP_DREB']/matchups2['PARTIAL_POSS']))

#Matcup_STL is not reported
#matchups2.insert(loc = 19, column = 'MATCHUP_STL_poss', value = (matchups2['MATCHUP_STL']/matchups2['PARTIAL_POSS']))

In [18]:
#Calculate differentials caused by defender
matchups2.insert(loc = 11, column = 'MATCHUP_draftkings_diff', 
                 value = (matchups2['MATCHUP_draftkings_poss']-matchups2['draftkings_poss']))
matchups2.insert(loc = 12, column = 'MATCHUP_fanduel_diff', 
                 value = (matchups2['MATCHUP_fanduel_poss']-matchups2['fanduel_poss']))
matchups2.insert(loc = 11, column = 'MATCHUP_PTS_diff', value = (matchups2['MATCHUP_PTS_poss']-matchups2['PTS_poss']))
matchups2.insert(loc = 12, column = 'MATCHUP_FGM_diff', value = (matchups2['MATCHUP_FGM_poss']-matchups2['FGM_poss']))
matchups2.insert(loc = 13, column = 'MATCHUP_FGA_diff', value = (matchups2['MATCHUP_FGA_poss']-matchups2['FGA_poss']))
matchups2.insert(loc = 17, column = 'MATCHUP_AST_diff', value = (matchups2['MATCHUP_AST_poss']-matchups2['AST_poss']))
matchups2.insert(loc = 18, column = 'MATCHUP_TOV_diff', value = (matchups2['MATCHUP_TOV_poss']-matchups2['TOV_poss']))
matchups2.insert(loc = 20, column = 'MATCHUP_BLK_diff', value = (matchups2['MATCHUP_BLK_poss']-matchups2['BLK_poss']))
matchups2.insert(loc = 21, column = 'MATCHUP_FTM_diff', value = (matchups2['MATCHUP_FTM_poss']-matchups2['FTM_poss']))
matchups2.insert(loc = 22, column = 'MATCHUP_FTA_diff', value = (matchups2['MATCHUP_FTA_poss']-matchups2['FTA_poss']))
matchups2.insert(loc = 21, column = 'MATCHUP_FG3M_diff', value = (matchups2['MATCHUP_FG3M_poss']-matchups2['FG3M_poss']))
matchups2.insert(loc = 22, column = 'MATCHUP_FG3A_diff', value = (matchups2['MATCHUP_FG3A_poss']-matchups2['FG3A_poss']))
matchups2.insert(loc = 21, column = 'MATCHUP_FG2M_diff', value = (matchups2['MATCHUP_FG2M_poss']-matchups2['FG2M_poss']))
matchups2.insert(loc = 22, column = 'MATCHUP_FG2A_diff', value = (matchups2['MATCHUP_FG2A_poss']-matchups2['FG2A_poss']))

# Scrape Schedule

In [19]:
def scrape_schedule ():
    headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
    'Referer': 'https://www.nba.com/',
    }
    url = 'https://cdn.nba.com/static/json/staticData/scheduleLeagueV2.json'
    r = requests.get(url, headers=headers, allow_redirects = False).json()
    date_list = r['leagueSchedule']['gameDates']
    game_frames = []
    for date in date_list:
        for game in date['games']:
            day = {key:value for key,value in game.items() if key == 'gameDateEst'}
            game_dict = {key:value for key,value in game.items() if key == 'gameId'}
            home = {key:value for key,value in game.items() if key == 'homeTeam'}
            away = {key:value for key,value in game.items() if key == 'awayTeam'}
            for team in home.values():
                home_team = team['teamId']
            for team in away.values():
                away_team = team['teamId']
            game_dict['Home'] = home_team
            game_dict['Away'] = away_team
            game_dict['Date'] = day.values()
            df = pd.DataFrame.from_dict(game_dict, orient = 'index').swapaxes("index","columns")
            game_frames.append(df)
    schedule = pd.concat(game_frames).set_index('gameId')
    schedule['Date'] = schedule['Date'].astype(str).copy()
    schedule['Date'] = schedule['Date'].str[14:24].copy()
    return schedule

# Do not delete

In [20]:
#Pull schedule and filter for today's date
schedule = scrape_schedule()
schedule['GAME_ID'] = schedule.index
schedule.insert(loc = 4, column = 'Season',value = ("20"+schedule['GAME_ID'].str[3:5]+"-"+((schedule['GAME_ID'].str[3:5].astype(int))+1).astype(str)))
today = schedule[schedule['Date'] == '2019-10-22']

#Format from home team's POV
home = today.rename(columns = {"Home":"Team", "Away":"Opponent"})
home.insert(loc = 5,column = 'Location',value = 'H')

#Format from visitor's POV
away = today.rename(columns = {"Home":"Opponent", "Away":"Team"})
away = away[['Team','Opponent','Date','GAME_ID','Season']]
away.insert(loc = 5,column = 'Location',value = 'A')

#Combine home and away
sched = [home,away]
today2 = pd.concat(sched)

Now that we have today's schedule, we need to join it with matchups2 on matchups2.TEAM_ID = today.Home. This will allow the table "today" to function as a hinge to connect the offensive team's players with all possible matchups on the opposing team. 

In [21]:
#Scrape commonteamrosters
def scrape_commonteamrosters():
    headers = {
'Host': 'stats.nba.com',
'Connection': 'keep-alive',
'Accept': 'application/json, text/plain, */*',
'x-nba-stats-token': 'true',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'x-nba-stats-origin': 'stats',
'Referer': 'https://www.nba.com/',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9'}
    
    rosters = []
    for team in range(1610612737,1610612768):
        time.sleep(np.random.randint(0,5 + 1))
        url = f"https://stats.nba.com/stats/commonteamroster?LeagueID=00&Season=2019-20&TeamID={team}"
        r = requests.get(url, headers=headers).json()
        df = pd.DataFrame(r['resultSets'][0]['rowSet'], columns = r['resultSets'][0]['headers'])
        rosters.append(df)

    return rosters

In [22]:
roster_list = scrape_commonteamrosters()
rosters = pd.concat(roster_list)
todays_rosters = pd.merge(left = today2, right = rosters, how = 'inner', left_on = ['Team'], right_on = ['TeamID'])

In [23]:
#Get "offensive" rosters
roster_list = scrape_commonteamrosters()
rosters = pd.concat(roster_list)
todays_rosters = pd.merge(left = today2, right = rosters, how = 'inner', left_on = ['Team'], right_on = ['TeamID'])

#Review whether to use trimmed form or thick form here
matchups2_slim = matchups2[['PLAYER_ID','PLAYER_NAME','Season','fanduel','draftkings','TEAM_ID','TEAM_ABBREVIATION',
                    'fanduel_poss','draftkings_poss','total_possessions','poss_per_game','GP_x',
                   'PTS_poss','FGM_poss','FGA_poss','FG3M_poss','FG3A_poss','FG2M_poss','FG2A_poss','FTM_poss',
                   'FTA_poss','REB_poss','AST_poss','TOV_poss','STL_poss','BLK_poss']].drop_duplicates()
matchups3 = pd.merge(left = matchups2_slim, right = todays_rosters, how = 'inner', left_on = ['PLAYER_ID'], right_on = ['PLAYER_ID'])
matchups3 = matchups3[(matchups3['Season_x'] == '2019-20') | (matchups3['Season_x'] == '2018-19')].copy()
matchups3 = matchups3.drop(['SEASON','LeagueID','PLAYER','PLAYER_SLUG','BIRTH_DATE','EXP','SCHOOL','NUM'], axis = 1)

In [24]:
del away, csv_list,df, frame, frame_list,matchups, matchups2_slim, matchups_df, new_matchups, old_matchups, per_game, roster_list, sched, season, team_id, today 

defenders = matchups2[['SEASON_ID','OFF_PLAYER_ID','OFF_PLAYER_NAME','DEF_PLAYER_ID','DEF_PLAYER_NAME','PARTIAL_POSS',
                       'MATCHUP_fanduel_poss','MATCHUP_draftkings_poss','MATCHUP_PTS_poss','MATCHUP_FGM_poss','MATCHUP_FGA_poss',
                      'MATCHUP_FG3M_poss','MATCHUP_FG3A_poss','MATCHUP_FG2M_poss','MATCHUP_FG2A_poss','MATCHUP_FTM_poss',
                      'MATCHUP_FTA_poss','MATCHUP_AST_poss','MATCHUP_TOV_poss','REB_poss','STL_poss','BLK_poss',
                       'MATCHUP_fanduel_diff','MATCHUP_draftkings_diff','MATCHUP_PTS_diff','MATCHUP_FGM_diff','MATCHUP_FGA_diff',
                      'MATCHUP_FG3M_diff','MATCHUP_FG3A_diff','MATCHUP_FG2M_diff','MATCHUP_FG2A_diff','MATCHUP_FTM_diff',
                      'MATCHUP_FTA_diff','MATCHUP_AST_diff','MATCHUP_TOV_diff']]

#PARTIAL_POSS already transformed above
defenders.insert(loc = 35, column = 'PARTIAL_POSS2', value = (defenders.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['PARTIAL_POSS'].transform('sum')))
defenders.insert(loc = 36, column = 'MATCHUP_fanduel_diff2', value = (defenders.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_fanduel_diff'].transform('sum')))
defenders.insert(loc = 37, column = 'MATCHUP_draftkings_diff2', value = (defenders.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_draftkings_diff'].transform('sum')))
defenders.insert(loc = 38, column = 'MATCHUP_PTS_diff2', value = (defenders.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_PTS_diff'].transform('sum')))
defenders.insert(loc = 39, column = 'MATCHUP_FGM_diff2', value = (defenders.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FGM_diff'].transform('sum')))
defenders.insert(loc = 40, column = 'MATCHUP_FGA_diff2', value = (defenders.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FGA_diff'].transform('sum')))
defenders.insert(loc = 41, column = 'MATCHUP_FG2M_diff2', value = (defenders.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FG2M_diff'].transform('sum')))
defenders.insert(loc = 42, column = 'MATCHUP_FG2A_diff2', value = (defenders.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FG2A_diff'].transform('sum')))
defenders.insert(loc = 43, column = 'MATCHUP_FG3M_diff2', value = (defenders.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FG3M_diff'].transform('sum')))
defenders.insert(loc = 44, column = 'MATCHUP_FG3A_diff2', value = (defenders.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FG3A_diff'].transform('sum')))
defenders.insert(loc = 45, column = 'MATCHUP_FTM_diff2', value = (defenders.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FTM_diff'].transform('sum')))
defenders.insert(loc = 46, column = 'MATCHUP_FTA_diff2', value = (defenders.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FTA_diff'].transform('sum')))
defenders.insert(loc = 47, column = 'MATCHUP_AST_diff2', value = (defenders.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_AST_diff'].transform('sum')))
defenders.insert(loc = 48, column = 'MATCHUP_TOV_diff2', value = (defenders.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_TOV_diff'].transform('sum')))

#defenders['PARTIAL_POSS']= (defenders['PARTIAL_POSS2'].copy())
#defenders['MATCHUP_fanduel_diff'] = (defenders['MATCHUP_fanduel_diff2'].copy())
#defenders['MATCHUP_draftkings_diff'].update(defenders['MATCHUP_draftkings_diff2'].copy())
#defenders['MATCHUP_PTS_diff'].update(defenders['MATCHUP_PTS_diff2'].copy())
#defenders['MATCHUP_FGM_diff'].update(defenders['MATCHUP_FGM_diff2'].copy())
#defenders['MATCHUP_FGA_diff'].update(defenders['MATCHUP_FGA_diff2'].copy())
#defenders['MATCHUP_FG2M_diff'].update(defenders['MATCHUP_FG2M_diff2'].copy())
#defenders['MATCHUP_FG2A_diff'].update(defenders['MATCHUP_FG2A_diff2'].copy())
#defenders['MATCHUP_FG3M_diff'].update(defenders['MATCHUP_FG3M_diff2'].copy())
#defenders['MATCHUP_FG3A_diff'].update(defenders['MATCHUP_FG3A_diff2'].copy())
#defenders['MATCHUP_FTM_diff'].update(defenders['MATCHUP_FTM_diff2'].copy())
#defenders['MATCHUP_FTA_diff'].update(defenders['MATCHUP_FTA_diff2'].copy())
#defenders['MATCHUP_AST_diff'].update(defenders['MATCHUP_AST_diff2'].copy())
#defenders['MATCHUP_TOV_diff'].update(defenders['MATCHUP_TOV_diff2'].copy())

defenders = defenders[['SEASON_ID','OFF_PLAYER_ID','OFF_PLAYER_NAME','DEF_PLAYER_ID','DEF_PLAYER_NAME','PARTIAL_POSS',
                       'MATCHUP_fanduel_poss','MATCHUP_draftkings_poss','MATCHUP_PTS_poss','MATCHUP_FGM_poss','MATCHUP_FGA_poss',
                      'MATCHUP_FG3M_poss','MATCHUP_FG3A_poss','MATCHUP_FG2M_poss','MATCHUP_FG2A_poss','MATCHUP_FTM_poss',
                      'MATCHUP_FTA_poss','MATCHUP_AST_poss','MATCHUP_TOV_poss','REB_poss','STL_poss','BLK_poss',
                       'MATCHUP_fanduel_diff','MATCHUP_draftkings_diff','MATCHUP_PTS_diff','MATCHUP_FGM_diff','MATCHUP_FGA_diff',
                      'MATCHUP_FG3M_diff','MATCHUP_FG3A_diff','MATCHUP_FG2M_diff','MATCHUP_FG2A_diff','MATCHUP_FTM_diff',
                      'MATCHUP_FTA_diff','MATCHUP_AST_diff','MATCHUP_TOV_diff']]

#Join schedule to opponent roster
opponent = pd.merge(left = today2, right = rosters, how = 'inner', left_on = ['Opponent'], right_on = ['TeamID'])

#Join matchups3 to opponent
matchups4 = pd.merge (left = matchups3, right = opponent, how = 'outer', left_on = ['TEAM_ID'], right_on = ['Team'])

#Drop unnecessary columns
matchups4 = matchups4.drop(['Team_x','Opponent_x','Date_x','GAME_ID_x','Season_y','POSITION_x','HEIGHT_x','WEIGHT_x','AGE_x',
                           'PLAYER_SLUG','LeagueID','NUM','BIRTH_DATE','EXP','SCHOOL','TeamID_x','Location_y'], axis = 1)

#Join matchups4 to defenders
matchups5 = pd.merge(left = matchups4, right = defenders, 
                     how = 'inner', left_on = ['PLAYER_ID_x','PLAYER_ID_y'], right_on = ['OFF_PLAYER_ID','DEF_PLAYER_ID'])

In [46]:
defenders = matchups2[['SEASON_ID','OFF_PLAYER_ID','OFF_PLAYER_NAME','DEF_PLAYER_ID','DEF_PLAYER_NAME','PARTIAL_POSS',
                       'MATCHUP_fanduel_poss','MATCHUP_draftkings_poss','MATCHUP_PTS_poss','MATCHUP_FGM_poss','MATCHUP_FGA_poss',
                      'MATCHUP_FG3M_poss','MATCHUP_FG3A_poss','MATCHUP_FG2M_poss','MATCHUP_FG2A_poss','MATCHUP_FTM_poss',
                      'MATCHUP_FTA_poss','MATCHUP_AST_poss','MATCHUP_TOV_poss','REB_poss','STL_poss','BLK_poss',
                       'MATCHUP_fanduel_diff','MATCHUP_draftkings_diff','MATCHUP_PTS_diff','MATCHUP_FGM_diff','MATCHUP_FGA_diff',
                      'MATCHUP_FG3M_diff','MATCHUP_FG3A_diff','MATCHUP_FG2M_diff','MATCHUP_FG2A_diff','MATCHUP_FTM_diff',
                      'MATCHUP_FTA_diff','MATCHUP_AST_diff','MATCHUP_TOV_diff']]

#Join schedule to opponent roster
opponent = pd.merge(left = today2, right = rosters, how = 'inner', left_on = ['Opponent'], right_on = ['TeamID'])

#Join matchups3 to opponent
matchups4 = pd.merge (left = matchups3, right = opponent, how = 'outer', left_on = ['TEAM_ID'], right_on = ['Team'])

#Drop unnecessary columns
matchups4 = matchups4.drop(['Team_x','Opponent_x','Date_x','GAME_ID_x','Season_y','POSITION_x','HEIGHT_x','WEIGHT_x','AGE_x',
                           'PLAYER_SLUG','LeagueID','NUM','BIRTH_DATE','EXP','SCHOOL','TeamID_x','Location_y'], axis = 1)

#Join matchups4 to defenders
matchups5 = pd.merge(left = matchups4, right = defenders, 
                     how = 'inner', left_on = ['PLAYER_ID_x','PLAYER_ID_y'], right_on = ['OFF_PLAYER_ID','DEF_PLAYER_ID'])

In [51]:
matchups5.insert(loc = 35, column = 'PARTIAL_POSS2', value = (matchups5.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['PARTIAL_POSS'].transform('sum')))
matchups5.insert(loc = 36, column = 'MATCHUP_fanduel_diff2', value = (matchups5.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_fanduel_diff'].transform('sum')))
matchups5.insert(loc = 37, column = 'MATCHUP_draftkings_diff2', value = (matchups5.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_draftkings_diff'].transform('sum')))
matchups5.insert(loc = 38, column = 'MATCHUP_PTS_diff2', value = (matchups5.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_PTS_diff'].transform('sum')))
matchups5.insert(loc = 39, column = 'MATCHUP_FGM_diff2', value = (matchups5.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FGM_diff'].transform('sum')))
matchups5.insert(loc = 40, column = 'MATCHUP_FGA_diff2', value = (matchups5.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FGA_diff'].transform('sum')))
matchups5.insert(loc = 41, column = 'MATCHUP_FG2M_diff2', value = (matchups5.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FG2M_diff'].transform('sum')))
matchups5.insert(loc = 42, column = 'MATCHUP_FG2A_diff2', value = (matchups5.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FG2A_diff'].transform('sum')))
matchups5.insert(loc = 43, column = 'MATCHUP_FG3M_diff2', value = (matchups5.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FG3M_diff'].transform('sum')))
matchups5.insert(loc = 44, column = 'MATCHUP_FG3A_diff2', value = (matchups5.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FG3A_diff'].transform('sum')))
matchups5.insert(loc = 45, column = 'MATCHUP_FTM_diff2', value = (matchups5.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FTM_diff'].transform('sum')))
matchups5.insert(loc = 46, column = 'MATCHUP_FTA_diff2', value = (matchups5.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_FTA_diff'].transform('sum')))
matchups5.insert(loc = 47, column = 'MATCHUP_AST_diff2', value = (matchups5.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_AST_diff'].transform('sum')))
matchups5.insert(loc = 48, column = 'MATCHUP_TOV_diff2', value = (matchups5.groupby(['OFF_PLAYER_ID','DEF_PLAYER_ID'])['MATCHUP_TOV_diff'].transform('sum')))

In [53]:
matchups5.head()

Unnamed: 0,PLAYER_ID_x,PLAYER_NAME,Season_x,fanduel,draftkings,TEAM_ID,TEAM_ABBREVIATION,fanduel_poss,draftkings_poss,total_possessions,poss_per_game,GP_x,PTS_poss,FGM_poss,FGA_poss,FG3M_poss,FG3A_poss,FG2M_poss,FG2A_poss,FTM_poss,FTA_poss,REB_poss_x,AST_poss,TOV_poss,STL_poss_x,BLK_poss_x,Location_x,Team_y,Opponent_y,Date_y,GAME_ID_y,Season,TeamID_y,SEASON,PLAYER,PARTIAL_POSS2,MATCHUP_fanduel_diff2,MATCHUP_draftkings_diff2,MATCHUP_PTS_diff2,MATCHUP_FGM_diff2,MATCHUP_FGA_diff2,MATCHUP_FG2M_diff2,MATCHUP_FG2A_diff2,MATCHUP_FG3M_diff2,MATCHUP_FG3A_diff2,MATCHUP_FTM_diff2,MATCHUP_FTA_diff2,MATCHUP_AST_diff2,MATCHUP_TOV_diff2,POSITION_y,HEIGHT_y,WEIGHT_y,AGE_y,PLAYER_ID_y,SEASON_ID,OFF_PLAYER_ID,OFF_PLAYER_NAME,DEF_PLAYER_ID,DEF_PLAYER_NAME,PARTIAL_POSS,MATCHUP_fanduel_poss,MATCHUP_draftkings_poss,MATCHUP_PTS_poss,MATCHUP_FGM_poss,MATCHUP_FGA_poss,MATCHUP_FG3M_poss,MATCHUP_FG3A_poss,MATCHUP_FG2M_poss,MATCHUP_FG2A_poss,MATCHUP_FTM_poss,MATCHUP_FTA_poss,MATCHUP_AST_poss,MATCHUP_TOV_poss,REB_poss_y,STL_poss_y,BLK_poss_y,MATCHUP_fanduel_diff,MATCHUP_draftkings_diff,MATCHUP_PTS_diff,MATCHUP_FGM_diff,MATCHUP_FGA_diff,MATCHUP_FG3M_diff,MATCHUP_FG3A_diff,MATCHUP_FG2M_diff,MATCHUP_FG2A_diff,MATCHUP_FTM_diff,MATCHUP_FTA_diff,MATCHUP_AST_diff,MATCHUP_TOV_diff
0,203076,Anthony Davis,2018-19,52.25,54.3,1610612740,NOP,1.100737,1.143923,2658.22,47.468214,56,0.545628,0.200134,0.385521,0.01896,0.054773,0.181174,0.330748,0.128507,0.162214,0.252801,0.08216,0.042133,0.033707,0.05056,A,1610612740,1610612761,2019-10-22,21900001,2019-20,1610612761,2019,Terence Davis,4.04,-0.974563,1.353318,1.441102,0.806269,1.614604,0.831055,1.686897,-0.024786,-0.072293,-0.148716,-0.175568,-0.066096,-0.051638,G,6-4,201,23.0,1629056,2019-20,203076,Anthony Davis,1629056,Terence Davis,2.02,0.000797,1.18865,0.990099,0.49505,0.990099,0,0,0.49505,0.990099,0,0,0,0,0.096046,0.015491,0.023753,-0.487282,0.676659,0.720551,0.403135,0.807302,-0.012393,-0.0361463,0.415528,0.843448,-0.0743581,-0.0877839,-0.033048,-0.0258188
1,203076,Anthony Davis,2018-19,52.25,54.3,1610612740,NOP,1.100737,1.143923,2658.22,47.468214,56,0.545628,0.200134,0.385521,0.01896,0.054773,0.181174,0.330748,0.128507,0.162214,0.252801,0.08216,0.042133,0.033707,0.05056,A,1610612740,1610612761,2019-10-22,21900001,2019-20,1610612761,2019,Terence Davis,4.04,-0.974563,1.353318,1.441102,0.806269,1.614604,0.831055,1.686897,-0.024786,-0.072293,-0.148716,-0.175568,-0.066096,-0.051638,G,6-4,201,23.0,1629056,2019-20,203076,Anthony Davis,1629056,Terence Davis,2.02,0.000797,1.18865,0.990099,0.49505,0.990099,0,0,0.49505,0.990099,0,0,0,0,0.096046,0.015491,0.023753,-0.487282,0.676659,0.720551,0.403135,0.807302,-0.012393,-0.0361463,0.415528,0.843448,-0.0743581,-0.0877839,-0.033048,-0.0258188
2,203076,Anthony Davis,2018-19,52.25,54.3,1610612740,NOP,1.100737,1.143923,2658.22,47.468214,56,0.545628,0.200134,0.385521,0.01896,0.054773,0.181174,0.330748,0.128507,0.162214,0.252801,0.08216,0.042133,0.033707,0.05056,A,1610612740,1610612761,2019-10-22,21900001,2019-20,1610612761,2019,OG Anunoby,34.05,-3.04751,-1.251545,-0.99765,-0.269656,-0.749474,-0.212337,-0.579751,-0.057319,-0.169722,-0.405192,-0.492895,-0.192852,-0.136427,F,6-7,232,22.0,1628384,2017-18,203076,Anthony Davis,1628384,OG Anunoby,5.71,0.004197,0.428017,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0,0,0.21522,0.029084,0.050412,-0.982132,-0.597188,-0.544837,-0.201648,-0.37809,-0.0135724,-0.0426563,-0.188075,-0.335433,-0.127969,-0.155114,-0.0445952,-0.0426563
3,203076,Anthony Davis,2018-19,52.25,54.3,1610612740,NOP,1.100737,1.143923,2658.22,47.468214,56,0.545628,0.200134,0.385521,0.01896,0.054773,0.181174,0.330748,0.128507,0.162214,0.252801,0.08216,0.042133,0.033707,0.05056,A,1610612740,1610612761,2019-10-22,21900001,2019-20,1610612761,2019,OG Anunoby,34.05,-3.04751,-1.251545,-0.99765,-0.269656,-0.749474,-0.212337,-0.579751,-0.057319,-0.169722,-0.405192,-0.492895,-0.192852,-0.136427,F,6-7,232,22.0,1628384,2018-19,203076,Anthony Davis,1628384,OG Anunoby,15.68,0.006107,0.484535,0.0,0.0,0.0637755,0,0,0.0,0.0637755,0,0,0,0,0.252801,0.033707,0.05056,-1.094629,-0.659389,-0.545628,-0.200134,-0.321746,-0.0189601,-0.0547735,-0.181174,-0.266972,-0.128507,-0.162214,-0.0821602,-0.0421335
4,203076,Anthony Davis,2018-19,52.25,54.3,1610612740,NOP,1.100737,1.143923,2658.22,47.468214,56,0.545628,0.200134,0.385521,0.01896,0.054773,0.181174,0.330748,0.128507,0.162214,0.252801,0.08216,0.042133,0.033707,0.05056,A,1610612740,1610612761,2019-10-22,21900001,2019-20,1610612761,2019,OG Anunoby,34.05,-3.04751,-1.251545,-0.99765,-0.269656,-0.749474,-0.212337,-0.579751,-0.057319,-0.169722,-0.405192,-0.492895,-0.192852,-0.136427,F,6-7,232,22.0,1628384,2019-20,203076,Anthony Davis,1628384,OG Anunoby,6.33,0.002704,0.514502,0.315956,0.157978,0.157978,0,0,0.157978,0.157978,0,0,0,0,0.096046,0.015491,0.023753,-0.485374,0.00251567,0.0464076,0.066063,-0.0248191,-0.012393,-0.0361463,0.078456,0.0113272,-0.0743581,-0.0877839,-0.033048,-0.0258188


# Scraping and Cleaning Data

In [None]:
def matchupteam(teamid,season):
    headers = {
'Host': 'stats.nba.com',
'Connection': 'keep-alive',
'Accept': 'application/json, text/plain, */*',
'x-nba-stats-token': 'true',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'x-nba-stats-origin': 'stats',
'Referer': f"https://stats.nba.com/team/{teamid}/matchups/",
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9'}

    url = f"https://stats.nba.com/stats/leagueseasonmatchups?DateFrom=&DateTo=&DefTeamID={teamid}&LeagueID=00&Outcome=&PORound=0&PerMode=Totals&Season={season}&SeasonType=Regular+Season"""
    r = requests.get(url, headers=headers).json()
    df = pd.DataFrame(r['resultSets'][0]['rowSet'], columns = r['resultSets'][0]['headers'])
    df.to_csv(f"C:\\Users\gsteele\Other\matchups\\{season}_matchups_{teamid}.csv")

In [None]:
for season in ('2017-18','2018-19','2019-20'):
    for teamid in range (1610612737,1610612768):
        time.sleep(2.3)
        matchupteam(teamid,season)

In [None]:
path = r'C:\Users\gsteele\Other\matchups'
all_files = glob.glob(path + "/*.csv")

csv_list = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    csv_list.append(df)

df2 = pd.concat(csv_list, axis=0, ignore_index=True)

df2.to_csv("C:\\Users\gsteele\Other\matchups\\all_matchups.csv", index=False, encoding='ascii')

In [None]:
#Import
new_format = pd.read_csv(r'C:\Users\gsteele\other\matchups\all_matchups.csv', low_memory = False)
old_format = pd.read_csv(r'C:\users\gsteele\Other\old_matchups.csv', low_memory = False)

#Clean old_format
old_format_df = old_format[(old_format['SEASON_ID'] == '2013-14')
                          | (old_format['SEASON_ID'] == '2014-15')
                          | (old_format['SEASON_ID'] == '2015-16')
                          | (old_format['SEASON_ID'] == '2016-17')
                          ]

#Create/rename columns to match new_format
old_format_df.insert(loc = 1, column = 'PARTIAL_POSS', value = old_format_df['POSS'])
old_format_df.insert(loc = 1, column = 'MATCHUP_AST', value = old_format_df['AST'])
old_format_df.insert(loc = 1, column = 'MATCHUP_TOV', value = old_format_df['TOV'])
old_format_df.insert(loc = 1, column = 'MATCHUP_BLK', value = old_format_df['BLK'])
old_format_df.insert(loc = 1, column = 'MATCHUP_FGM', value = old_format_df['FGM'])
old_format_df.insert(loc = 1, column = 'MATCHUP_FGA', value = old_format_df['FGA'])
old_format_df.insert(loc = 1, column = 'MATCHUP_FG_PCT', value = old_format_df['FG_PCT'])
old_format_df.insert(loc = 1, column = 'MATCHUP_FG3M', value = old_format_df['FG3M'])
old_format_df.insert(loc = 1, column = 'MATCHUP_FG3A', value = old_format_df['FG3A'])
old_format_df.insert(loc = 1, column = 'MATCHUP_FG3_PCT', value = old_format_df['FG3_PCT'])
old_format_df.insert(loc = 1, column = 'HELP_FGM', value = np.nan)
old_format_df.insert(loc = 1, column = 'HELP_FGA', value = np.nan)
old_format_df.insert(loc = 1, column = 'HELP_FG_PERC', value = np.nan)
old_format_df.insert(loc = 1, column = 'MATCHUP_FTM', value = old_format_df['FTM'])
old_format_df.insert(loc = 1, column = 'MATCHUP_FTA', value = (old_format_df['SFL']*2))
old_format_df.insert(loc = 1, column = 'GP', value = np.nan)
old_format_df.insert(loc = 1, column = 'MATCHUP_MIN', value = (old_format_df['POSS'].astype(int)/2))

#Order columns correctly
new_df = new_format.drop('Unnamed: 0',axis=1)
old_df = old_format_df[['SEASON_ID','OFF_PLAYER_ID','OFF_PLAYER_NAME','DEF_PLAYER_ID','DEF_PLAYER_NAME','GP','MATCHUP_MIN',
                       'PARTIAL_POSS','PLAYER_PTS','TEAM_PTS','MATCHUP_AST','MATCHUP_TOV','MATCHUP_BLK','MATCHUP_FGM',
                        'MATCHUP_FGA','MATCHUP_FG_PCT','MATCHUP_FG3M','MATCHUP_FG3A','MATCHUP_FG3_PCT',
                       'HELP_BLK','HELP_FGM','HELP_FGA','HELP_FG_PERC','MATCHUP_FTM','MATCHUP_FTA','SFL']]
data_list = [new_df,old_df]

#Concatenate, then push out csv
matchup_data = pd.concat(data_list)
matchup_data.to_csv(r'c:\users\gsteele\Other\matchup_data.csv')