## Scraping bio data (height, weight, nationality) from NBA Stats  

In [None]:
import pandas as pd
import requests
pd.set_option('display.max_columns', None) # so we can see all columns in a wide DataFrame
import time
import numpy as np

In [None]:
raw_api_url ='https://stats.nba.com/stats/leaguedashplayerbiostats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&Season=2023-24&SeasonSegment=&SeasonType=Regular Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='

In [None]:
headers = {
    'Accept': '*/*', 
    'Accept-Encoding': 'gzip, deflate, br, zstd',
    'Accept-Language': 'de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7',
    'Connection': 'keep-alive',
    'Host': 'stats.nba.com',
    'Origin': 'https://www.nba.com',
    'Referer': 'https://www.nba.com/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-site',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:123.0) Gecko/20100101 Firefox/123.0'
}

In [None]:
r = requests.get(url=raw_api_url, headers=headers).json() #parses return as JSON and returns a Python object(dictionary) 
r

In [None]:
# Accessing the headers and resultSets in dict.

df_cols_short = r['resultSets']['headers']

df_cols_short

In [None]:
col = r['resultSets'][0]['rowSet']
col

In [None]:
df_cols = ['Year','Season_type'] + df_cols_short

years = ['1996-97', '1997-98', '1998-99', '1999-00', '2000-01', '2001-02', '2002-03', '2003-04', '2004-05', '2005-06', '2006-07', '2007-08', '2008-09', '2009-10', '2010-11', '2011-12',
         '2012-13','2013-14','2014-15','2015-16','2016-17','2017-18','2018-19','2019-20','2020-21','2021-22']
season_types = ['Regular%20Season','Playoffs']

df = pd.DataFrame(columns=df_cols)
df

In [None]:
# begin_loop = time.time()
df = pd.DataFrame()
for y in years:
    for s in season_types:
        api_url = "https://stats.nba.com/stats/leaguedashplayerbiostats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&Season="+y+"&SeasonSegment=&SeasonType="+s+"&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight="
        r = requests.get(url=api_url, headers=headers).json()
        data = r['resultSets'][0]['rowSet']
        columns = r['resultSets'][0]['headers']
        temp_df1 = pd.DataFrame(data, columns=columns)
        temp_df1['Year'] = y
        temp_df1['Season'] = s
        df = pd.concat([df, temp_df1])
        
        # temp_df2 = pd.DataFrame({'Year':[y for i in range(len(temp_df1))],
        #                          'Season_type':[s for i in range(len(temp_df1))]})
        # temp_df3 = pd.concat([temp_df2, temp_df1], axis=1)
        # df = pd.concat([df, temp_df3], axis=0)

#         print(f'Finished scraping data from the {y} {s}')
#         lag = np.random.uniform(low=20, high=40)
#         print(f'...waiting {round(lag,1)} seconds...')
#         time.sleep(lag)
# print(f'Process completed!! Total run time: {round(time.time()-begin_loop,1)} seconds')

In [None]:
df

In [None]:
df_1996 = df[df['Year'] == '1996-97']
df_1996[df_1996['PLAYER_ID'] == 893]


In [None]:
df_cnt = df[df['COUNTRY']]
df_cnt

In [None]:
# Script to scrape data on European players using Beautiful Soup

import requests
from bs4 import BeautifulSoup
import pandas as pd

years = list(range(2000, 2025))

url_start = 'https://basketball.realgm.com/international/league/1/Euroleague/stats/{}/Averages/Qualified/All/points/All/desc/1/Regular_Season'

data = []  # List to store parsed data

for year in years:
    url = url_start.format(year)
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    player_data = []
    for row in soup.select('.tablesaw tbody tr'):
        player = [year]  # Add the year to the player data
        player += [cell.text.strip() for cell in row.find_all('td')]
        player_data.append(player)

    data.extend(player_data)

# Creating DataFrame from the collected data
columns = ['Year', '#', 'Player', 'Team', 'GP', 'MPG', 'PPG', 'FGM', 'FGA', 'FG%', '3PM', '3PA', '3P%',
           'FTM', 'FTA', 'FT%', 'ORB', 'DRB', 'RPG', 'APG', 'SPG', 'BPG', 'TOV', 'PF']
df = pd.DataFrame(data, columns=columns)

# Saving DataFrame to a CSV file
df.to_csv('euroleague_stats.csv', index=False)

# Display DataFrame
df
