In [2]:
import os
import sys
import time
import json
import requests
from loguru import logger
from datetime import datetime
import polars as pl
from polars import col, lit
from polars.datatypes import *
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Set up logging
log = logger

In [3]:
log.info("Starting up...")

[32m2024-02-19 23:43:06.191[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mStarting up...[0m


In [5]:
from backend.data.nba.constants import *

from nba_api.stats.endpoints import playerprofilev2


In [6]:
from nba_api.stats.static import players as static_players

players = static_players.get_players()

In [7]:
# Test player profile
player_profile = playerprofilev2.PlayerProfileV2(player_id=2544)

In [12]:
# Get the data into a dataframe
player_profile_df = player_profile.get_data_frames()[0]

player_profile_df.head()

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,2544,2003-04,0,1610612739,CLE,19.0,79,79,3120,622,...,0.754,99,333,432,465,130,58,273,149,1654
1,2544,2004-05,0,1610612739,CLE,20.0,80,80,3388,795,...,0.75,111,477,588,577,177,52,262,146,2175
2,2544,2005-06,0,1610612739,CLE,21.0,79,79,3361,875,...,0.738,75,481,556,521,123,66,260,181,2478
3,2544,2006-07,0,1610612739,CLE,22.0,78,78,3190,772,...,0.698,83,443,526,470,125,55,250,171,2132
4,2544,2007-08,0,1610612739,CLE,23.0,75,74,3027,794,...,0.712,133,459,592,539,138,81,255,165,2250


In [19]:
# Read csv data from file into a dataframe
bball_ref_df = pd.read_csv(
    '/Users/joshuasturm/pyproject/src/backend/data/nba/processed/Data/Player Career Info.csv'
)

# Get the NBA_API version of the player data and compare
from nba_api.stats.endpoints import CommonPlayerInfo, CommonAllPlayers
from backend.data.nba.constants import *
from nba_api.stats.static import players as static_players

# Get the player data from the NBA_API
players = static_players.get_players()

# Send static data to csv
players_df = pd.DataFrame(players)
players_df.to_csv(
    '/Users/joshuasturm/pyproject/src/backend/data/nba/processed/Data/NBA_API_Players.csv'
)

In [22]:
# Create a constant player_ids and full name mapping to be able to use later for looping through the player data
player_ids = players_df['id']
player_full_name = players_df['full_name']

player_id_full_name = dict(zip(player_ids, player_full_name))

# Create a csv file for the player_ids and full name mapping
player_id_full_name_df = pd.DataFrame(player_id_full_name.items(),
                                      columns=['player_id', 'full_name'])
player_id_full_name_df.to_csv(
    '/Users/joshuasturm/pyproject/src/backend/data/nba/Player_ID_Full_Name.csv'
)

# Read the csv file to delete the index column
player_id_full_name_df = pd.read_csv(
    '/Users/joshuasturm/pyproject/src/backend/data/nba/Player_ID_Full_Name.csv'
)





In [None]:
# Add to the PlayerNameIDMap.csv file to iclude if player is active or not their position and team, school, and country, and height, weight, and birthdate, and draft year and round and pick
from nba_api.stats.endpoints import CommonPlayerInfo, CommonAllPlayers


def get_basic_player_info(player_id):
    player_info = CommonPlayerInfo(player_id=player_id)
    # Loop through the dataframes to get the data
    player_info_df = player_info.get_data_frames()[0]
    # Get the basic player info
    player_info_df = player_info_df[[
        'PERSON_ID', 'DISPLAY_FIRST_LAST', 'BIRTHDATE', 'HEIGHT', 'WEIGHT',
        'POSITION', 'TEAM_CITY', 'TEAM_NAME', 'SCHOOL', 'COUNTRY',
        'DRAFT_YEAR', 'DRAFT_ROUND', 'DRAFT_NUMBER'
    ]]
    # Lowercase the column names
    player_info_df.columns = player_info_df.columns.str.lower()
    # Rename the columns
    player_info_df.rename(columns={
        'person_id': 'player_id',
        'display_first_last': 'full_name',
        'team_city': 'team',
        'team_name': 'team_name'
    },
                          inplace=True)
    return player_info_df.drop_duplicates().reset_index(drop=True)

# Function to load in player_id_full_name_df and get the basic player info
def get_basic_player_info_df(player_id_full_name_df):
    # Create an empty dataframe to store the player info
    player_info_df = pd.DataFrame()
    # Loop through the player_id_full_name_df and get the basic player info
    for index, row in player_id_full_name_df.iterrows():
        player_id = row['player_id']
        player_info = get_basic_player_info(player_id)
        player_info_df = pd.concat([player_info_df, player_info])
    return player_info_df

get_basic_player_info_df(player_id_full_name_df)

In [29]:
from nba_api.stats.endpoints import CommonAllPlayers, CommonTeamRoster, CommonPlayerInfo
from backend.core.constants import *

PLAYER_IDS_FILE = '/Users/joshuasturm/pyproject/src/backend/core/constants.py'
# Create a player_id list and write data to constats.py file
players = CommonAllPlayers().get_data_frames()[0]
player_ids = players['PERSON_ID']
player_ids = player_ids.to_list()
# Write the player_ids to the constants.py file
with open(PLAYER_IDS_FILE, 'w') as f:
    f.write(f'PLAYER_IDS = {player_ids}')

In [30]:
def write_player_ids_to_file(player_ids, file_path):
    # Write the player_ids to the constants.py file
    file_path = '/Users/joshuasturm/pyproject/src/backend/core/constants.py'    
    with open(file_path, 'w') as f:
        f.write(f'NBA_PLAYER_IDS = {repr(player_ids)}')

write_player_ids_to_file(player_ids, PLAYER_IDS_FILE)

def write_player_name_id_map_to_file(player_id_full_name_df, file_path):
    # Write the player_ids to the constants.py file
    file_path = '/Users/joshuasturm/pyproject/src/backend/core/constants.py'
    with open(file_path, 'w') as f:
        f.write(f'NBA_PLAYER_NAME_ID_MAP = {player_id_full_name_df.to_dict()}')


In [35]:
# CommonTeamRoster
# Get the team rosters
from backend.core.constants import NBA_TEAM_IDS

team_rosters = pd.DataFrame()
for team_id in NBA_TEAM_IDS:
    team_roster = CommonTeamRoster(team_id=team_id).get_data_frames()[0]
    team_rosters = pd.concat([team_rosters, team_roster])
    team_rosters_csv = team_rosters.to_csv(
        '/Users/joshuasturm/pyproject/src/backend/data/nba/processed/Data/NBA_Team_Rosters.csv'
    )


In [37]:
# TeamInfoCommon
# Get the team info
from nba_api.stats.endpoints import TeamInfoCommon
from nba_api.stats.static import teams as static_teams

teams = static_teams.get_teams()
team_id_dict = dict(zip([team['full_name'] for team in teams], teams))

df = pd.DataFrame()

for team_id in team_id_dict.values():
    team_info = TeamInfoCommon(team_id=team_id['id']).get_data_frames()[0]
    df = pd.concat([df, team_info], ignore_index=True)

In [39]:
import polars as pl

# Create a polars dataframe
team_info_df = pl.from_pandas(df)

# Write the team info to a csv file
team_info_df.write_csv(
    '/Users/joshuasturm/pyproject/src/backend/data/nba/processed/Data/nba/NBA_Team_Info.csv'
)

In [40]:
# CommonAllPlayers
# Get the player info

from nba_api.stats.endpoints import CommonAllPlayers

common_all_players = CommonAllPlayers(is_only_current_season=0,
                                      league_id='00',
                                      season='2020-21')


def get_common_all_players():
    season_list = [
        '2019-20', '2018-19', '2017-18', '2016-17', '2015-16', '2014-15',
        '2013-14', '2012-13', '2011-12', '2010-11', '2009-10', '2008-09',
        '2007-08', '2006-07', '2005-06', '2004-05', '2003-04', '2002-03',
        '2001-02', '2000-01', '1999-00', '1998-99', '1997-98', '1996-97',
        '1995-96', '1994-95', '1993-94', '1992-93', '1991-92', '1990-91',
        '1989-90', '1988-89', '1987-88', '1986-87', '1985-86', '1984-85',
        '1983-84', '1982-83', '1981-82', '1980-81', '1979-80', '1978-79',
        '1977-78', '1976-77', '1975-76', '1974-75', '1973-74', '1972-73',
        '1971-72', '1970-71', '1969-70', '1968-69', '1967-68', '1966-67',
        '1965-66', '1964-65', '1963-64', '1962-63', '1961-62', '1960-61',
        '1959-60', '1958-59', '1957-58', '1956-57', '1955-56', '1954-55',
        '1953-54', '1952-53', '1951-52', '1950-51', '1949-50', '1948-49',
        '1947-48', '1946-47'
    ]
    df = pl.DataFrame()
    for season in season_list:
        common_all_players = CommonAllPlayers(is_only_current_season=0,
                                              league_id='00',
                                              season=season)
        common_all_players_df = common_all_players.get_data_frames()[0]
        df = pl.concat(
            [df,
             pl.from_pandas(common_all_players_df, include_index=False)])
    return df


common_all_players_df = get_common_all_players()

# Write the common_all_players_df to a csv file
common_all_players_df.write_csv(
    '/Users/joshuasturm/pyproject/src/backend/data/nba/processed/Data/nba/NBA_Players.csv'
)



In [42]:
# LeagueDashPlayerStats and LeagueDashTeamStats
from nba_api.stats.endpoints import LeagueDashPlayerStats, LeagueDashTeamStats
from backend.core.constants import NBA_TEAM_IDS

request_headers = {
    'Host':
    'stats.nba.com',
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    'GET /stats/playerdashboardbygeneralsplits?DateFrom=&DateTo=&GameSegment=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=PerGame&Period=0&PlayerID=2544&PlusMinus=N&Rank=N&Season=2016-17&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&VsConference=&VsDivision= HTTP/1.1',
    'Accept':
    'application/json, text/plain, */*',
    'Accept-Language':
    'en-US,en;q=0.5',
    'Referer':
    'http://stats.nba.com/',
    'Connection':
    'keep-alive',
}



In [47]:
league_dash_player_stats = LeagueDashPlayerStats(
    # I want this by quarters
    period=1,
    league_id_nullable='00',
    season='2015-16',
    season_type_all_star='Regular Season',
    team_id_nullable=0,
)

# Put league_dash_player_stats into a dataframe
league_dash_player_stats_df = league_dash_player_stats.get_data_frames()[0]

# Convert from pandas to polars
league_dash_player_stats_df = pl.from_pandas(league_dash_player_stats_df)



# Write the league_dash_player_stats_df to a csv file using polars
league_dash_player_stats_df.write_csv(
    '/Users/joshuasturm/pyproject/src/backend/data/nba/processed/Data/nba/stats/years/2015/NBA_League_Dash_Player_Stats.csv'
)


In [None]:
from backend.core.path_config import (BASE_PATH, DATA_PATH, NBA_DATA_PATH)
# Have to always import the BASE_PATH otherwise it will not work as it doesnt know where to write the file
league_dash_player_stats_df.write_csv(f"{NBA_DATA_PATH}/stats/years")

In [52]:
# Basic LeagueDashPlayerStats vs Advanced LeagueDashPlayerStats
# Basic LeagueDashPlayerStats
# Get the basic player stats for the 2016-17 season
from nba_api.stats.endpoints import LeagueDashPlayerStats

league_dash_player_stats = LeagueDashPlayerStats(
    league_id_nullable='00',
    per_mode_detailed='PerGame',
    period='0',
    season='2016-17',
    season_type_all_star='Regular Season',
)

# Put league_dash_player_stats into a dataframe
league_dash_player_stats_df = league_dash_player_stats.get_data_frames()[0]

# Convert from pandas to polars
league_dash_player_stats_df = pl.from_pandas(league_dash_player_stats_df)

# Write the league_dash_player_stats_df to a csv file using polars
league_dash_player_stats_df.write_csv(
    '/Users/joshuasturm/pyproject/src/backend/data/nba/processed/Data/nba/stats/years/2016/NBA_League_Dash_Player_Stats.csv'
)

