In [None]:
#script to run every 3 days in order to scrape recent games for players

In [10]:
from sqlalchemy import create_engine, text
from datetime import datetime, timedelta

import pandas as pd
import random
import time 

from nba_api.stats.static import teams
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog

#start by creating an engine to add each dataframe to the sql database
engine = create_engine('postgresql://chase:yourpassword@localhost:5433/TeamData')

#filter all players for active players
active_players = [p for p in players.get_players() if p['is_active']]


#function used inthe set_team_id function.
#determines which team is home and returns the proper team ID
def parse_team_from_matchup(matchup, is_home):
    """Extract team abbreviation from MATCHUP string
    Example: 'PHI vs. HOU' or 'PHI @ HOU'"""
    teams = matchup.replace(' vs. ', ' @ ').split(' @ ')
    return teams[0] if is_home else teams[1]

def set_team_id(games_df):

    team_mapping = {}
    nba_teams = teams.get_teams()
    for team in nba_teams:
        team_mapping[team['abbreviation']] = team['id']
    
    games_df['TEAM_ID'] = None

    # print(f"Processing {len(games_df)} records...")
    
    games_df['TEAM_ID'] = games_df.apply(  # Changed to TEAM_ID to match column name
        lambda row: team_mapping[parse_team_from_matchup(
            row['MATCHUP'], 
            '@' not in row['MATCHUP']
        )], 
        axis=1
    )


#lets define our functions for getting most recent data
def get_recent_games(player_id, days_back=20):
    # Calculate the date threshold
    cutoff_date = datetime.now() - timedelta(days=days_back)
    
    # Get games for the season
    game_log = playergamelog.PlayerGameLog(
        player_id=player_id,
        season='2024-25'
    )
    
    # Convert to dataframe
    df = game_log.get_data_frames()[0]
    
    # Convert GAME_DATE to datetime
    df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'], format='%b %d, %Y')

    set_team_id(df)
    
    # Filter for only recent games
    recent_games = df[df['GAME_DATE'] >= cutoff_date]
    
    return recent_games, cutoff_date


def update_player_stats(engine):
    i = 1
    for player in active_players:
        player_id = player['id']
        try:
            print(f"Scraping recent games for {player['full_name']} - {i}")
            recent_games, cutoff_date = get_recent_games(player_id, days_back=20)

            if not recent_games.empty:

                
                # Don't add PLAYER_ID column since it already exists in the database
                # Just ensure we have the correct column mapping
                if 'Player_ID' in recent_games.columns:
                    recent_games = recent_games.rename(columns={'Player_ID': 'PLAYER_ID'})
                    
                #update the database 
                with engine.connect() as conn:
                    with conn.begin():
                        # Delete recent records for this player
                        delete_query = text("""
                            DELETE FROM "all_player_game_stats"
                            WHERE "PLAYER_ID" = :player_id
                            AND "GAME_DATE"::date >= :cutoff_date
                        """)
                        
                        # Execute with parameters
                        conn.execute(delete_query, 
                                    {"player_id": player_id, 
                                     "cutoff_date": cutoff_date})
                                                
                        # Append new records
                        recent_games.to_sql('all_player_game_stats', 
                                          conn, 
                                          if_exists='append', 
                                          index=False)
                    
            #handle rate limiting issues
            time.sleep(round(random.uniform(2, 3), 1))
            if i % 40 == 0:
                time.sleep(round(random.uniform(60, 120), 1))
                print()
                print("Long Sleep!")
                print()
                
            else:
                time.sleep(round(random.uniform(2, 3), 1))
            i += 1
            
        except Exception as e:
            print(f"Error updating player {player_id}: {e}")


update_player_stats(engine)

Scraping recent games for Precious Achiuwa - 1
Got team mappings...
Processing 27 records...
Scraping recent games for Steven Adams - 2
Got team mappings...
Processing 31 records...
Scraping recent games for Bam Adebayo - 3
Got team mappings...
Processing 45 records...


KeyboardInterrupt: 

In [None]:
from nba_api.stats.static import teams

def parse_team_from_matchup(matchup, is_home):
    """Extract team abbreviation from MATCHUP string
    Example: 'PHI vs. HOU' or 'PHI @ HOU'"""
    teams = matchup.replace(' vs. ', ' @ ').split(' @ ')
    return teams[0] if is_home else teams[1]

def set_team_id(games_df):

    team_mapping = {}
    nba_teams = teams.get_teams()
    for team in nba_teams:
        team_mapping[team['abbreviation']] = team['id']

    games_df['TEAM_ID'] = None

    games_df['TEAM_ID'] = games_df.apply(  # Changed to TEAM_ID to match column name
        lambda row: team_mapping[parse_team_from_matchup(
            row['MATCHUP'], 
            '@' not in row['MATCHUP']
        )], 
        axis=1
    )

In [18]:
from sqlalchemy import create_engine, text
from datetime import datetime, timedelta

import pandas as pd
import random
import time 

from nba_api.stats.static import teams
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog

#start by creating an engine to add each dataframe to the sql database
engine = create_engine('postgresql://chase:yourpassword@localhost:5433/TeamData')

#filter all players for active players
active_players = [p for p in players.get_players() if p['is_active']]


#function used inthe set_team_id function.
#determines which team is home and returns the proper team ID
def parse_team_from_matchup(matchup, is_home):
    """Extract team abbreviation from MATCHUP string
    Example: 'PHI vs. HOU' or 'PHI @ HOU'"""
    teams = matchup.replace(' vs. ', ' @ ').split(' @ ')
    return teams[0] if is_home else teams[1]

def set_team_id(games_df):

    team_mapping = {}
    nba_teams = teams.get_teams()
    for team in nba_teams:
        team_mapping[team['abbreviation']] = team['id']
    
    games_df['TEAM_ID'] = None

    # print(f"Processing {len(games_df)} records...")
    
    games_df['TEAM_ID'] = games_df.apply(  # Changed to TEAM_ID to match column name
        lambda row: team_mapping[parse_team_from_matchup(
            row['MATCHUP'], 
            '@' not in row['MATCHUP']
        )], 
        axis=1
    )


#lets define our functions for getting most recent data
def get_recent_games(player_id, days_back):
    # Calculate the date threshold
    cutoff_date = datetime.now() - timedelta(days=days_back)
    
    # Get games for the season
    game_log = playergamelog.PlayerGameLog(
        player_id=player_id,
        season='2024-25'
    )
    
    # Convert to dataframe
    df = game_log.get_data_frames()[0]
    
    # Convert GAME_DATE to datetime
    df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'], format='%b %d, %Y')
    print(df['GAME_DATE'])

    set_team_id(df)
    
    # Filter for only recent games
    recent_games = df[df['GAME_DATE'] >= cutoff_date].copy()

    #recent_games.loc[:,'GAME_DATE'] = recent_games['GAME_DATE'].dt.strftime('%b %d, %Y')
    recent_games['GAME_DATE'] = recent_games['GAME_DATE'].dt.strftime('%b %d, %Y').str.upper()
    print(recent_games['GAME_DATE'])
    
    return recent_games, cutoff_date


def update_player_stats(engine):
    i = 1
    player_id = '1627827'
    try:
        print(f"Scraping recent games for {' '} - {i}")
        recent_games, cutoff_date = get_recent_games(player_id, days_back=30)

        if not recent_games.empty:

            
            # Don't add PLAYER_ID column since it already exists in the database
            # Just ensure we have the correct column mapping
            if 'Player_ID' in recent_games.columns:
                recent_games = recent_games.rename(columns={'Player_ID': 'PLAYER_ID'})
                
            #update the database 
            with engine.connect() as conn:
                with conn.begin():
                    # Delete recent records for this player
                    delete_query = text("""
                        DELETE FROM "all_player_game_stats"
                        WHERE "PLAYER_ID" = :player_id
                        AND "GAME_DATE"::date >= :cutoff_date
                    """)
                    
                    # Execute with parameters
                    conn.execute(delete_query, 
                                {"player_id": player_id, 
                                 "cutoff_date": cutoff_date})
                                            
                    # Append new records
                    recent_games.to_sql('all_player_game_stats', 
                                      conn, 
                                      if_exists='append', 
                                      index=False)
                
        #handle rate limiting issues
        time.sleep(round(random.uniform(2, 3), 1))
        if i % 40 == 0:
            time.sleep(round(random.uniform(60, 120), 1))
            print()
            print("Long Sleep!")
            print()
            
        else:
            time.sleep(round(random.uniform(2, 3), 1))
        i += 1
        
    except Exception as e:
        print(f"Error updating player {player_id}: {e}")

update_player_stats(engine)

Scraping recent games for   - 1
0    2025-02-01
1    2025-01-28
2    2025-01-27
3    2025-01-25
4    2025-01-23
5    2025-01-21
6    2025-01-19
7    2025-01-13
8    2025-01-07
9    2025-01-05
10   2025-01-03
11   2025-01-02
12   2024-12-31
13   2024-12-27
14   2024-12-19
15   2024-12-16
16   2024-12-13
17   2024-12-08
18   2024-11-27
19   2024-11-22
20   2024-11-19
21   2024-11-17
22   2024-11-15
23   2024-11-09
24   2024-11-08
25   2024-11-04
26   2024-11-03
27   2024-11-01
28   2024-10-30
29   2024-10-29
30   2024-10-27
31   2024-10-25
32   2024-10-23
Name: GAME_DATE, dtype: datetime64[ns]
0    FEB 01, 2025
1    JAN 28, 2025
2    JAN 27, 2025
3    JAN 25, 2025
4    JAN 23, 2025
5    JAN 21, 2025
6    JAN 19, 2025
7    JAN 13, 2025
8    JAN 07, 2025
9    JAN 05, 2025
Name: GAME_DATE, dtype: object
