In [None]:
from sqlalchemy import create_engine
from requests.exceptions import ReadTimeout
import random
import time 
import pandas as pd
import requests

#full team stats, aquired and stored in a sql database
#note that the nba_api has rate limiting, so we cant go too fast
from nba_api.stats.static import teams
from nba_api.stats.endpoints import commonteamroster
from nba_api.stats.endpoints import playercareerstats



#start by creating an engine to add each dataframe to the sql database
from dotenv import load_dotenv
import os

load_dotenv()

DATABASE_URL = os.getenv("DATABASE_URL")

engine = create_engine(DATABASE_URL)

#now I iterate through a list of all of the NBA teams
# get_teams returns a list of 30 dictionaries, each an NBA team.
nba_teams = teams.get_teams()

all_players = pd.DataFrame()

for team in nba_teams:
    #print(team['id'])
    #print(team['full_name'])
    #print(team)
    while True:
        try:
            print(f"Getting roster for {team['full_name']} ---------------")
            roster = commonteamroster.CommonTeamRoster(team_id=team['id'])
            roster_df = roster.get_data_frames()[0]

            #create an empty team roster 
            team_roster = pd.DataFrame()
            player_names = []

            #next, as we loop through that, i want to pull their career statistics
            for index, row in roster_df.iterrows():
                
                player_id = row['PLAYER_ID']
                player_name = row['PLAYER']
                age = row['AGE'] 
                player_names.append(player_name)
                
                while True:
                    try:
                        print(f"getting stats for {player_name}")
                        stats = playercareerstats.PlayerCareerStats(player_id=player_id)
                        player_stats = stats.get_data_frames()[0]
                        
                        #ok now we need to process the data we are pulling 
                        if not player_stats.empty:
                            most_recent_season = pd.DataFrame([player_stats.iloc[-1]])
                            team_roster = pd.concat([team_roster, most_recent_season])
                        else:
                            print(f"No stats found for player")
                            player_names.pop()
                                                
                        
                        time.sleep(round(random.uniform(2, 6), 1))
                        break
                    except ReadTimeout:
                        print(f"Timeout for {player_name} - retrying after 5 seconds")
                        time.sleep(60)
                        continue
                    except (ReadTimeout, json.decoder.JSONDecodeError) as e:
                        print(f"Error for {player_name}: {e} - retrying after 60 seconds")
                        time.sleep(60)
                        continue
            #attach player names, add team roster to all players dataframe
            team_roster.insert(0, 'PLAYER_NAME', player_names)
            all_players = pd.concat([all_players, team_roster])
            print(f"DataFrame created for {team['full_name']}")
            print()
            
            break
            
        except ReadTimeout:
            print(f"Timeout for {team['full_name']} - retrying after 5 seconds")
            time.sleep(60)
            continue

# with engine.connect() as conn:
#     #start sql transaction
#     with conn.begin():
#         # Delete recent records of teams if we already added them 
#         delete_query = text("""
#         DELETE FROM "all_players_misc_stats" 
#         """)
        
#         conn.execute(delete_query)
            
all_players = all_players.set_index('PLAYER_ID')
filename = f"all_players_misc_stats"
all_players.to_sql(filename, engine, if_exists='append', index=True)
print('finished all NBA teams')

In [None]:
roster_df

In [11]:
from nba_api.stats.endpoints import playercareerstats
from sqlalchemy import create_engine, text
from nba_api.stats.endpoints import leaguedashplayerstats
from nba_api.stats.endpoints import playerestimatedmetrics
from nba_api.stats.endpoints import playerdashboardbyyearoveryear
import pandas as pd

stats = playercareerstats.PlayerCareerStats(player_id=1629027)
player_stats = stats.get_data_frames()[0]
player_stats

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,1629027,2018-19,0,1610612737,ATL,20.0,81,81,2503.0,525,...,0.829,64,237,301,653,72,15,308,140,1549
1,1629027,2019-20,0,1610612737,ATL,21.0,60,60,2120.0,546,...,0.86,32,223,255,560,65,8,289,104,1778
2,1629027,2020-21,0,1610612737,ATL,22.0,63,63,2125.0,487,...,0.886,38,207,245,594,53,12,261,111,1594
3,1629027,2021-22,0,1610612737,ATL,23.0,76,76,2652.0,711,...,0.904,50,234,284,737,72,7,303,128,2155
4,1629027,2022-23,0,1610612737,ATL,24.0,73,73,2541.0,597,...,0.886,56,161,217,741,80,9,300,104,1914
5,1629027,2023-24,0,1610612737,ATL,25.0,54,54,1942.0,433,...,0.855,23,126,149,583,72,11,235,109,1389
6,1629027,2024-25,0,1610612737,ATL,26.0,45,45,1619.0,315,...,0.858,23,125,148,518,58,10,208,87,1026


In [38]:
advanced_stats = leaguedashplayerstats.LeagueDashPlayerStats(
    measure_type_detailed_defense='Advanced',
    per_mode_detailed='PerGame',
    season='2024-25',
    season_type_all_star='Regular Season'
)
advanced_df = advanced_stats.get_data_frames()[0]

engine = create_engine('postgresql://chase:yourpassword@localhost:5433/TeamData')
advanced_df.set_index('PLAYER_ID')
filename=f"all_players_advanced_stats"
advanced_df.to_sql(filename, engine, if_exists='append', index=True)


pd.set_option('display.max_columns', 79)
advanced_df[advanced_df["PLAYER_ID"] == 1629027]

Unnamed: 0,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,MIN,E_OFF_RATING,OFF_RATING,sp_work_OFF_RATING,E_DEF_RATING,DEF_RATING,sp_work_DEF_RATING,E_NET_RATING,NET_RATING,sp_work_NET_RATING,AST_PCT,AST_TO,AST_RATIO,OREB_PCT,DREB_PCT,REB_PCT,TM_TOV_PCT,E_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,E_USG_PCT,E_PACE,PACE,PACE_PER40,sp_work_PACE,PIE,POSS,FGM,FGA,FGM_PG,FGA_PG,FG_PCT,GP_RANK,W_RANK,L_RANK,W_PCT_RANK,MIN_RANK,E_OFF_RATING_RANK,OFF_RATING_RANK,sp_work_OFF_RATING_RANK,E_DEF_RATING_RANK,DEF_RATING_RANK,sp_work_DEF_RATING_RANK,E_NET_RATING_RANK,NET_RATING_RANK,sp_work_NET_RATING_RANK,AST_PCT_RANK,AST_TO_RANK,AST_RATIO_RANK,OREB_PCT_RANK,DREB_PCT_RANK,REB_PCT_RANK,TM_TOV_PCT_RANK,E_TOV_PCT_RANK,EFG_PCT_RANK,TS_PCT_RANK,USG_PCT_RANK,E_USG_PCT_RANK,E_PACE_RANK,PACE_RANK,sp_work_PACE_RANK,PIE_RANK,FGM_RANK,FGA_RANK,FGM_PG_RANK,FGA_PG_RANK,FG_PCT_RANK
482,1629027,Trae Young,Trae,1610612737,ATL,26.0,45,20,25,0.444,36.0,111.6,112.7,112.7,112.1,113.1,113.1,-0.5,-0.4,-0.4,0.453,2.49,31.5,0.013,0.076,0.043,12.6,12.6,0.484,0.556,0.274,0.281,107.09,106.06,88.38,106.06,0.124,3577,315,786,7.0,17.5,0.401,80,165,458,321,13,181,151,151,329,359,359,248,239,239,2,97,29,462,457,485,411,408,390,269,39,39,28,61,61,91,37,17,51,28,403


In [40]:
metrics = playerestimatedmetrics.PlayerEstimatedMetrics()
metrics_df = metrics.get_data_frames()[0]

engine = create_engine('postgresql://chase:yourpassword@localhost:5433/TeamData')
metrics_df.set_index('PLAYER_ID')
filename=f"all_players_estimated_stats"
metrics_df.to_sql(filename, engine, if_exists='replace', index=True)

pd.set_option('display.max_columns', 55)
metrics_df[metrics_df["PLAYER_ID"] == 1629027]

Unnamed: 0,PLAYER_ID,PLAYER_NAME,GP,W,L,W_PCT,MIN,E_OFF_RATING,E_DEF_RATING,E_NET_RATING,E_AST_RATIO,E_OREB_PCT,E_DREB_PCT,E_REB_PCT,E_TOV_PCT,E_USG_PCT,E_PACE,GP_RANK,W_RANK,L_RANK,W_PCT_RANK,MIN_RANK,E_OFF_RATING_RANK,E_DEF_RATING_RANK,E_NET_RATING_RANK,E_AST_RATIO_RANK,E_OREB_PCT_RANK,E_DREB_PCT_RANK,E_REB_PCT_RANK,E_TOV_PCT_RANK,E_USG_PCT_RANK,E_PACE_RANK
26,1629027,Trae Young,45,20,25,0.444,36.0,111.6,112.1,-0.5,31.4,0.015,0.086,0.049,12.622,0.281,107.09,80,165,458,321,13,180,328,248,30,460,458,485,34,39,27


In [10]:
import requests

url = 'https://ipv4.icanhazip.com'
proxy_auth = 'LksbLc71bllcVMWb:UWV4Hnu9uTI47tbL'
proxy = 'geo.iproyal.com:12321'  # Remove 'http://' prefix

proxies = {
    'http': f'http://{proxy_auth}@{proxy}',
    'https': f'http://{proxy_auth}@{proxy}'  # Changed to https://
}

try:
    response = requests.get(url, proxies=proxies, timeout=10)
    print(response.text)
except requests.exceptions.RequestException as e:
    print(f"Error: {e}")

80.142.92.77



In [25]:
import requests
url = 'https://ip.smartproxy.com/json'
username = 'sp3mqfz959'
password = '46ss+ryCxjyL3KdaQ0'
proxy = f"http://{username}:{password}@gate.smartproxy.com:10001"
result = requests.get(url, proxies = {
    'http': proxy,
    'https': proxy
})
print(result.text)

{
    "browser": {
        "name": "",
        "version": ""
    },
    "platform": {
        "os": "undefined undefined"
    },
    "engine": {},
    "isp": {
        "isp": "Unifique Telecomunicacoes SA",
        "asn": 28343,
        "domain": "unifique.net",
        "organization": "Unifique Telecomunicacoes SA"
    },
    "city": {
        "name": "Itajaí",
        "code": "SC",
        "state": "Santa Catarina",
        "time_zone": "America/Sao_Paulo",
        "zip_code": "88301",
        "latitude": -26.9065,
        "longitude": -48.6657
    },
    "proxy": {
        "ip": "191.6.89.212"
    },
    "country": {
        "code": "BR",
        "name": "Brazil",
        "continent": "South America"
    }
}


In [12]:
    def setup_logging(self):
        logging.basicConfig(
            filename='nba_scraper.log',
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s'
        )

In [None]:
def get_games(self, start_date, end_date):
        try:
            time.sleep(0.6)
            gamefinder = leaguegamefinder.LeagueGameFinder(
                date_from_nullable=start_date,
                date_to_nullable=end_date,
                proxy=self.proxies,
                timeout=30
            )
            return gamefinder.get_data_frames()[0]
        except Exception as e:
            logging.error(f"Failed to get games: {str(e)}")
            raise