In [1]:
import requests
import json
import pandas as pd
pd.set_option('display.max_rows', 999999)

player_stats_api = 'https://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=PerGame&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season=2021-22&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision=&Weight='
sixers_id = '1610612755'
player_misc_api = 'http://data.nba.net/data/10s/prod/v1/2022/players.json'
from difflib import get_close_matches

import re
from bs4 import BeautifulSoup
pattern = re.compile('([^\s\w]|_)+')
from tqdm.auto import tqdm 

In [14]:
headers_player_data = {
    'Host': 'stats.nba.com',
    'Connection': 'keep-alive',
    'Pragma': 'no-cache',
    'Cache-Control': 'no-cache',
    'sec-ch-ua': '"Chromium";v="94", "Google Chrome";v="94", ";Not A Brand";v="99"',
    'Accept': 'application/json, text/plain, */*',
    'x-nba-stats-token': 'true',
    'sec-ch-ua-mobile': '?0',
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36',
    'x-nba-stats-origin': 'stats',
    'sec-ch-ua-platform': "Linux",
    'Origin': 'https://www.nba.com',
    'Sec-Fetch-Site': 'same-site',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Dest': 'empty',
    'Referer': 'https://www.nba.com/',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'pt,en;q=0.9',
}

In [15]:
player_stats = requests.get(
    player_stats_api.replace('\n', ''),
    headers=headers_player_data
).json()

In [16]:
columns = player_stats['resultSets'][0]['headers']
df_stats = pd.DataFrame(player_stats['resultSets'][0]['rowSet'], columns=columns)
df_stats = df_stats[['PLAYER_ID', 'MIN_RANK']]
df_stats.to_csv('player_min_rank.csv')

In [None]:
df_stats

In [5]:
df_player_info = requests.get(player_misc_api).json()
df_player_info = pd.DataFrame(df_player_info['league']['standard'])
df_player_info = df_player_info[[
    'personId', 'jersey', 'pos',
    'heightFeet', 'heightInches', 'weightPounds',
    'dateOfBirthUTC', 'firstName', 'lastName', 'teamId'
]]

df_stats['PLAYER_ID'] = df_stats['PLAYER_ID'].astype(str)
df_player_info['personId'] = df_player_info['personId'].astype(str)

df_stats = pd.merge(
    left=df_stats,
    right=df_player_info,
    left_on='PLAYER_ID',
    right_on='personId',
    how='right'
)
df_stats['MIN_RANK'] = df_stats['MIN_RANK'].fillna(99999).astype(int)

In [6]:
# Full name
df_stats.loc[:, "fullName"] = df_stats['firstName'] + ' ' + df_stats['lastName']

# Lower just for comparison when searching
df_stats.loc[:, 'fullName_comp'] = df_stats['fullName'].apply(
    lambda x: pattern.sub('', x).lower()
)
df_stats.loc[:, 'firstName_comp'] = df_stats['firstName'].apply(
    lambda x: pattern.sub('', x).lower()
)
df_stats.loc[:, 'lastName_comp'] = df_stats['lastName'].apply(
    lambda x: pattern.sub('', x).lower()
)

# Sixers priority
df_stats['flag_sixers'] = df_stats['teamId'].apply(lambda id_: 0 if id_ == sixers_id else 1)
df_stats['teamId'] = df_stats['teamId'].astype(str)

df_stats = df_stats.sort_values(['flag_sixers', 'MIN_RANK'])
df_stats = df_stats.fillna("")

In [7]:
from basketball_reference_web_scraper import client
df_players_bbref = pd.DataFrame(client.players_advanced_season_totals(season_end_year=2021))[['name', 'slug']]

In [8]:
df_players_bbref = df_players_bbref.rename(columns={'name': 'fullName'})

In [9]:
df_players_bbref = pd.merge(
    left=df_stats,
    right=df_players_bbref,
    how='inner',
    on='fullName'
)

In [10]:
df_players_bbref['slug'] = df_players_bbref['slug'].fillna("")
df_players_bbref = df_players_bbref[df_players_bbref['slug'] != ""]

In [11]:
base_bbref_url = 'https://www.basketball-reference.com/players'
df_players_bbref['bbref_url'] = df_players_bbref['slug'].apply(
    lambda x: "{}/{}/{}.html".format(base_bbref_url, x[0], x) if x != "" else ""
)

In [12]:
def get_nicknames(url):
    # collect HTML data
    html = requests.get(url)

    # create beautiful soup object from HTML
    soup = BeautifulSoup(html.content, features="lxml")

    for i, tag in enumerate(soup.find_all('p')[:3]):
        text = tag.get_text()
        if ('(' in text) and ('cm' not in text):
            return text
    
    return ""

In [13]:
tqdm.pandas()
df_players_bbref['nicknames'] = df_players_bbref['bbref_url'].progress_apply(get_nicknames)

  from pandas import Panel


HBox(children=(IntProgress(value=0, max=491), HTML(value='')))




In [192]:
df_players_bbref

Unnamed: 0,PLAYER_ID,MIN_RANK,personId,jersey,pos,heightFeet,heightInches,weightPounds,dateOfBirthUTC,firstName,lastName,teamId,fullName,fullName_comp,firstName_comp,lastName_comp,flag_sixers,slug,bbref_url,nicknames
0,202699,53,202699,12,F,6,7,226.0,1992-07-15,Tobias,Harris,1610612755.0,Tobias Harris,tobias harris,tobias,harris,0,harrito02,https://www.basketball-reference.com/players/h...,\n(Tobi)\n
1,1627732,55,1627732,25,G-F,6,11,240.0,1996-07-20,Ben,Simmons,1610612755.0,Ben Simmons,ben simmons,ben,simmons,0,simmobe01,https://www.basketball-reference.com/players/s...,"\n(Simmo the Savage, Young Socialite, Fresh Pr..."
2,203954,80,203954,21,C-F,7,0,280.0,1994-03-16,Joel,Embiid,1610612755.0,Joel Embiid,joel embiid,joel,embiid,0,embiijo01,https://www.basketball-reference.com/players/e...,"\n(JoJo, The Process, Do-a-180, Troel)\n"
3,203552,119,203552,31,G,6,2,185.0,1990-08-23,Seth,Curry,1610612755.0,Seth Curry,seth curry,seth,curry,0,curryse01,https://www.basketball-reference.com/players/c...,
4,201980,128,201980,14,G,6,6,215.0,1987-06-22,Danny,Green,1610612755.0,Danny Green,danny green,danny,green,0,greenda02,https://www.basketball-reference.com/players/g...,\n(Icy-Hot)\n
5,203083,148,203083,2,C,6,10,279.0,1993-08-10,Andre,Drummond,1610612755.0,Andre Drummond,andre drummond,andre,drummond,0,drumman01,https://www.basketball-reference.com/players/d...,\n(Big Penguin)\n
6,203083,148,203083,2,C,6,10,279.0,1993-08-10,Andre,Drummond,1610612755.0,Andre Drummond,andre drummond,andre,drummond,0,drumman01,https://www.basketball-reference.com/players/d...,\n(Big Penguin)\n
7,1629003,208,1629003,18,G-F,6,5,205.0,1996-09-26,Shake,Milton,1610612755.0,Shake Milton,shake milton,shake,milton,0,miltosh01,https://www.basketball-reference.com/players/m...,\n(Protein Shake)\n
8,1629680,263,1629680,22,G-F,6,5,201.0,1997-03-04,Matisse,Thybulle,1610612755.0,Matisse Thybulle,matisse thybulle,matisse,thybulle,0,thybuma01,https://www.basketball-reference.com/players/t...,"\n(Mathief, The Disruptor, Spiderman)\n"
9,1627788,279,1627788,30,G-F,6,7,202.0,1997-07-24,Furkan,Korkmaz,1610612755.0,Furkan Korkmaz,furkan korkmaz,furkan,korkmaz,0,korkmfu01,https://www.basketball-reference.com/players/k...,\n(Furky)\n
