# Scraping

NBA.com has lots of data, like, this: https://www.nba.com/stats/teams/boxscores-advanced

## Team Game Logs

In [166]:
from dataclasses import dataclass
import json
import numpy as np
import pandas as pd
import requests

In [167]:
@dataclass
class BundledData:
    measure: str = None
    season: str = None
    response: json = None
    endpoint: str = None

In [168]:
def get_params(bundle):

    params = base_params.copy()
    
    params['MeasureType'] = bundle.measure
    params['Season'] = bundle.season
    
    return params

def get_response(bundle):
    response = requests.get(
        BASE_URL + bundle.endpoint,
        headers=headers,
        params=get_params(bundle)
    )
    
    return response

def get_data(bundle):
    response = get_response(bundle)
    
    df = (
        pd.DataFrame(
            response.json()['resultSets'][0]['rowSet'],
            columns=[x.lower() for x in response.json()['resultSets'][0]['headers']]
        )
        .assign(
            endpoint=bundle.endpoint,
            input_season=bundle.season,
            input_measure=bundle.measure
        )
    )
    
    return df

def clean(input_string):
    return input_string.replace(' ', '')

In [169]:
headers = {
    'Accept': '*/*',
    'Accept-Language': 'en-US,en;q=0.9',
    'Connection': 'keep-alive',
    'Origin': 'https://www.nba.com',
    'Referer': 'https://www.nba.com/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-site',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
    'sec-ch-ua': '"Not?A_Brand";v="8", "Chromium";v="108", "Google Chrome";v="108"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"macOS"',
}

base_params = {
    'DateFrom': '',
    'DateTo': '',
    'GameSegment': '',
    'LastNGames': '0',
    'LeagueID': '00',
    'Location': '',
    'Month': '0',
    'OpponentTeamID': '0',
    'Outcome': '',
    'PORound': '0',
    'PaceAdjust': 'N',
    'PerMode': 'Totals',
    'Period': '0',
    'PlusMinus': 'N',
    'Rank': 'N',
    'SeasonSegment': '',
    'SeasonType': 'Regular Season',
    'ShotClockRange': '',
    'VsConference': '',
    'VsDivision': '',
}

BASE_URL = 'https://stats.nba.com/stats/'

endpoint = 'teamgamelogs'

measures = ['Advanced', 'Four Factors', 'Base', 'Misc', 'Scoring']

seasons = ['2007-08', '2008-09'] + [
    str(x) + '-' + str(x-1999)for x 
    in np.arange(2009,2023,1)
]

seasons

['2007-08',
 '2008-09',
 '2009-10',
 '2010-11',
 '2011-12',
 '2012-13',
 '2013-14',
 '2014-15',
 '2015-16',
 '2016-17',
 '2017-18',
 '2018-19',
 '2019-20',
 '2020-21',
 '2021-22',
 '2022-23']

In [171]:
for season in seasons:
    for measure in measures:
        bundle = BundledData(season=season, measure=measure, endpoint=endpoint)
        print(bundle)
        df = get_data(bundle)
        file_name = f'{season}_{endpoint}_{measure}.csv'
        df.to_csv('../data/nba_stats/' + file_name, index=False)

BundledData(measure='Advanced', season='2007-08', response=None, endpoint='teamgamelogs')
BundledData(measure='Four Factors', season='2007-08', response=None, endpoint='teamgamelogs')
BundledData(measure='Base', season='2007-08', response=None, endpoint='teamgamelogs')
BundledData(measure='Misc', season='2007-08', response=None, endpoint='teamgamelogs')
BundledData(measure='Scoring', season='2007-08', response=None, endpoint='teamgamelogs')
BundledData(measure='Advanced', season='2008-09', response=None, endpoint='teamgamelogs')
BundledData(measure='Four Factors', season='2008-09', response=None, endpoint='teamgamelogs')
BundledData(measure='Base', season='2008-09', response=None, endpoint='teamgamelogs')
BundledData(measure='Misc', season='2008-09', response=None, endpoint='teamgamelogs')
BundledData(measure='Scoring', season='2008-09', response=None, endpoint='teamgamelogs')
BundledData(measure='Advanced', season='2009-10', response=None, endpoint='teamgamelogs')
BundledData(measure=