## Scraping 'Player Index' data in order to access player´s positions and be able to merge with other DF later on. 

In [4]:
import pandas as pd
import requests
pd.set_option('display.max_columns', None) # so we can see all columns in a wide DataFrame
import time
import numpy as np

In [5]:
headers_bio = {
    'Accept': '*/*', 
    'Accept-Encoding': 'gzip, deflate, br, zstd',
    'Accept-Language': 'de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7',
    'Connection': 'keep-alive',
    'Host': 'stats.nba.com',
    'Origin': 'https://www.nba.com',
    'Referer': 'https://www.nba.com/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-site',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:123.0) Gecko/20100101 Firefox/123.0'
}

In [6]:
# Scraping data from 'Player Index' 
raw_api_index = "https://stats.nba.com/stats/playerindex?College=&Country=&DraftPick=&DraftRound=&DraftYear=&Height=&Historical=1&LeagueID=00&Season=2023-24&SeasonType=Regular%20Season&TeamID=0&Weight="

# Using requests to access the JSON
r_index = requests.get(url=raw_api_index, headers=headers_bio).json()

# Accessing the column names and data in JSON
df_cols_short_index = r_index['resultSets'][0]['headers']
data_bio_index = r_index['resultSets'][0]['rowSet']

In [7]:
# Defining list of European countries
european_counties = ['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czechia', 'Denmark','Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Ireland',
'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands','Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden', 'Serbia', 'Ukraine', 'Russia', 'Turkey', 'Georgia','Bosnia and Herzegovina', 'Kosovo', 'Belarus', 'Albania', 'Armenia' ]

# Converting JSON into pandas DF
data_link = pd.DataFrame(data_bio_index, columns=df_cols_short_index)

# Filtering out players who played from 1996 and only from European countries
data_link_filtered = data_link[(data_link['TO_YEAR'] >= '1996') & (data_link['COUNTRY'].isin(european_counties))]

In [9]:
# Calling dataframe 
data_link_filtered.head(40)


Unnamed: 0,PERSON_ID,PLAYER_LAST_NAME,PLAYER_FIRST_NAME,PLAYER_SLUG,TEAM_ID,TEAM_SLUG,IS_DEFUNCT,TEAM_CITY,TEAM_NAME,TEAM_ABBREVIATION,JERSEY_NUMBER,POSITION,HEIGHT,WEIGHT,COLLEGE,COUNTRY,DRAFT_YEAR,DRAFT_ROUND,DRAFT_NUMBER,ROSTER_STATUS,PTS,REB,AST,STATS_TIMEFRAME,FROM_YEAR,TO_YEAR
4,1505,Abdul-Wahad,Tariq,tariq-abdul-wahad,1610612758,kings,0,Sacramento,Kings,SAC,9,F-G,6-6,235,San Jose State,France,1997.0,1.0,11.0,,7.8,3.3,1.1,Career,1997,2003
9,203518,Abrines,Alex,alex-abrines,1610612760,thunder,0,Oklahoma City,Thunder,OKC,8,G,6-6,190,FC Barcelona,Spain,2013.0,2.0,32.0,,5.3,1.4,0.5,Career,2016,2018
34,201582,Ajinca,Alexis,alexis-ajinca,1610612740,pelicans,0,New Orleans,Pelicans,NOP,42,C,7-2,248,Hyeres-Toulon,France,2008.0,1.0,20.0,,5.3,3.9,0.5,Career,2008,2017
41,1630583,Aldama,Santi,santi-aldama,1610612763,grizzlies,0,Memphis,Grizzlies,MEM,7,F-C,7-0,215,Loyola-Maryland,Spain,2021.0,1.0,30.0,1.0,10.7,5.8,2.3,Season,2021,2023
42,203128,Aldemir,Furkan,furkan-aldemir,1610612755,sixers,0,Philadelphia,76ers,PHI,19,F-C,6-10,240,Galatasaray,Turkey,2012.0,2.0,53.0,,2.3,4.3,0.7,Career,2014,2014
106,101149,Andriuskevicius,Martynas,martynas-andriuskevicius,1610612739,cavaliers,0,Cleveland,Cavaliers,CLE,15,C,7-2,240,Zalgiris,Lithuania,2005.0,2.0,44.0,,0.0,0.7,0.0,Career,2005,2006
111,203507,Antetokounmpo,Giannis,giannis-antetokounmpo,1610612749,bucks,0,Milwaukee,Bucks,MIL,34,F,6-11,243,Filathlitikos,Greece,2013.0,1.0,15.0,1.0,30.8,11.5,6.4,Season,2013,2023
112,1628961,Antetokounmpo,Kostas,kostas-antetokounmpo,1610612741,bulls,0,Chicago,Bulls,CHI,37,F,6-10,200,Dayton,Greece,2018.0,2.0,60.0,,1.0,1.0,0.1,Career,2018,2020
113,203648,Antetokounmpo,Thanasis,thanasis-antetokounmpo,1610612749,bucks,0,Milwaukee,Bucks,MIL,43,F,6-7,219,Panathinaikos,Greece,2014.0,2.0,51.0,1.0,0.9,0.4,0.5,Season,2015,2023
145,201600,Asik,Omer,omer-asik,1610612740,pelicans,0,New Orleans,Pelicans,NOP,3,C,7-0,255,Fenerbahce,Turkey,2008.0,2.0,36.0,,5.3,7.1,0.6,Career,2010,2017
