In [5]:
import pandas as pd
from nba_api.stats.endpoints import leaguedashplayerstats
import time

# Define the season you want data for
season = '2024-25' 
season_type = 'Regular Season' 


try:

    player_stats = leaguedashplayerstats.LeagueDashPlayerStats(
        season=season,
        season_type_all_star=season_type,
        per_mode_detailed='Totals' 
    )


    df_player_totals = player_stats.get_data_frames()[0]


    relevant_columns = ['PLAYER_NAME', 'TEAM_ABBREVIATION', 'GP', 'MIN'] 
    
    cols_to_show = [col for col in relevant_columns if col in df_player_totals.columns]
    df = df_player_totals[cols_to_show]


except Exception as e:
    print(f"An error occurred while fetching data: {e}")
    print("This could be due to API changes, network issues, or invalid parameters.")



In [7]:
df.to_csv('../../data/rawdata/PlayerMinutes.csv')

In [4]:
# Install nba_api if you haven't already
# pip install nba_api

import pandas as pd
from nba_api.stats.endpoints import leaguedashteamstats
import time
import os # Import os for directory creation and path handling
import warnings

# Suppress potential warnings from nba_api or pandas
warnings.simplefilter(action='ignore', category=FutureWarning)

# Define the seasons to fetch data for
start_year = 2021
end_year = 2024 # Fetch up to the season starting in 2024 (2024-25)

seasons = [f"{year}-{str(year+1)[-2:]}" for year in range(start_year, end_year + 1)]

# Define the base paths relative to the notebook location
base_save_path = '../../Project-8---Sports-Analysis/data/rawdata/' 
clean_save_path = '../../Project-8---Sports-Analysis/data/cleandata/' 

# Create the main directories if they don't exist
try:
    os.makedirs(base_save_path, exist_ok=True)
    os.makedirs(clean_save_path, exist_ok=True)
    print(f"Ensured raw data directory exists: {os.path.abspath(base_save_path)}")
    print(f"Ensured clean data directory exists: {os.path.abspath(clean_save_path)}")
except OSError as e:
    print(f"Error creating base directories: {e}")
    # Exit or handle error appropriately if base directories can't be made
    exit()

# List to potentially store dataframes if needed later (optional now)
# all_seasons_data = [] 

print(f"\nFetching data for seasons: {seasons}")

for season in seasons:
    # Create season-specific directory name (e.g., '21-22' from '2021-22')
    season_folder_name = f"{season[2:4]}-{season[-2:]}"
    season_raw_save_dir = os.path.join(base_save_path, season_folder_name)
    
    # Create the season-specific raw directory
    try:
        os.makedirs(season_raw_save_dir, exist_ok=True)
        print(f"\nProcessing {season}... Saving raw data to: {season_raw_save_dir}")
    except OSError as e:
        print(f"Error creating directory {season_raw_save_dir}: {e}")
        continue # Skip this season if directory creation fails

    try:
        # --- Fetch Base Stats ---
        print(f"  Fetching Base stats for {season}...")
        base_stats = leaguedashteamstats.LeagueDashTeamStats(
            season=season,
            measure_type_detailed_defense='Base',
            per_mode_detailed='Totals' 
        )
        base_df = base_stats.get_data_frames()[0]
        
        # --- Save Raw Base Stats ---
        base_filename = os.path.join(season_raw_save_dir, 'base_stats.csv')
        base_df.to_csv(base_filename, index=False)
        print(f"    Saved raw base stats to {base_filename}")
        time.sleep(1) 

        # --- Fetch Advanced Stats ---
        print(f"  Fetching Advanced stats for {season}...")
        advanced_stats = leaguedashteamstats.LeagueDashTeamStats(
            season=season,
            measure_type_detailed_defense='Advanced',
            per_mode_detailed='Totals' 
        )
        advanced_df = advanced_stats.get_data_frames()[0]

        # --- Save Raw Advanced Stats ---
        advanced_filename = os.path.join(season_raw_save_dir, 'advanced_stats.csv')
        advanced_df.to_csv(advanced_filename, index=False)
        print(f"    Saved raw advanced stats to {advanced_filename}")
        time.sleep(1) 

        # --- Process Data for Clean Output ---
        
        # Select Columns
        base_subset = base_df[['TEAM_ID', 'TEAM_NAME', 'W', 'FG_PCT', 'FG3_PCT', 'FGA', 'FG3A']].copy()
        advanced_subset = advanced_df[[
            'TEAM_ID', 'TEAM_NAME', 'OFF_RATING', 'DEF_RATING', 'TS_PCT', 
            'EFG_PCT', 'TM_TOV_PCT'
        ]].copy()

        # Merge
        season_df = pd.merge(base_subset, advanced_subset, on=['TEAM_ID', 'TEAM_NAME'], how='inner')

        # Calculate Missing Stats & Add Season Column
        season_df['FG2A'] = season_df['FGA'] - season_df['FG3A']
        season_df['SEASON'] = season # Add SEASON column before renaming/saving clean

        # Rename columns for the clean DataFrame
        season_df.rename(columns={
             'W': 'Wins', 'OFF_RATING': 'ORTG', 'DEF_RATING': 'DRTG', 
             'TS_PCT': 'TS_Percent', 'FG3_PCT': 'FG3_Percent', 'FG_PCT': 'FG_Percent',
             'FG3A': 'Team_3s_Attempted', 'FG2A': 'Team_2s_Attempted',
             'EFG_PCT': 'eFG_Percent', 'TM_TOV_PCT': 'TOV_Percent'
        }, inplace=True)

        # Select and Reorder Columns for the clean DataFrame
        season_df = season_df[[
            'SEASON', 'TEAM_NAME', 'TEAM_ID', 'Wins', 'ORTG', 'DRTG', 
            'eFG_Percent', 'TS_Percent', 'TOV_Percent', 'FG3_Percent', 
            'FG_Percent', 'Team_3s_Attempted', 'Team_2s_Attempted'
        ]]

        # --- Save Clean Season DataFrame ---
        clean_filename = os.path.join(clean_save_path, f"{season_folder_name}.csv")
        season_df.to_csv(clean_filename, index=False)
        print(f"    Saved clean data for {season} to {clean_filename}")

        # Optional: Append to list if you still want a combined DataFrame later
        # all_seasons_data.append(season_df) 
        
        print(f"  Successfully processed and saved data for {season}")

    except Exception as e:
        print(f"  Could not fetch, process, or save data for season {season}: {e}")
    
    time.sleep(2) # Longer delay between seasons

print("\nScript finished. Individual cleaned season files saved in:", os.path.abspath(clean_save_path))

# --- Combine all seasons into one DataFrame for analysis (Optional - uncomment if needed) ---
# if all_seasons_data:
#     final_df = pd.concat(all_seasons_data, ignore_index=True)
#     print("\n--- Sample of the final combined DataFrame ---")
#     print(final_df.head())
#     print(f"\nShape of final combined DataFrame: {final_df.shape}")
    
#     # Optionally save the combined dataframe (relative to notebook)
#     # combined_filename = '../../data/processed/combined_team_stats_2021_onwards.csv'
#     # os.makedirs(os.path.dirname(combined_filename), exist_ok=True) # Ensure processed dir exists
#     # final_df.to_csv(combined_filename, index=False)
#     # print(f"\nSaved combined data to {combined_filename}")

# else:
#      print("\nNo data was processed successfully to create a combined DataFrame.")

Ensured raw data directory exists: /home/grenadi3/Project-8---Sports-Analysis/data/rawdata
Ensured clean data directory exists: /home/grenadi3/Project-8---Sports-Analysis/data/cleandata

Fetching data for seasons: ['2021-22', '2022-23', '2023-24', '2024-25']

Processing 2021-22... Saving raw data to: ../../Project-8---Sports-Analysis/data/rawdata/21-22
  Fetching Base stats for 2021-22...
    Saved raw base stats to ../../Project-8---Sports-Analysis/data/rawdata/21-22/base_stats.csv
  Fetching Advanced stats for 2021-22...
    Saved raw advanced stats to ../../Project-8---Sports-Analysis/data/rawdata/21-22/advanced_stats.csv
    Saved clean data for 2021-22 to ../../Project-8---Sports-Analysis/data/cleandata/21-22.csv
  Successfully processed and saved data for 2021-22

Processing 2022-23... Saving raw data to: ../../Project-8---Sports-Analysis/data/rawdata/22-23
  Fetching Base stats for 2022-23...
    Saved raw base stats to ../../Project-8---Sports-Analysis/data/rawdata/22-23/base_s

In [7]:

import pandas as pd
# Make sure to import the correct endpoint for player stats
from nba_api.stats.endpoints import leaguedashplayerstats 
import time
import os 
import warnings

# Suppress potential warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Define the seasons to fetch data for (same as before)
start_year = 2021
end_year = 2024 

seasons = [f"{year}-{str(year+1)[-2:]}" for year in range(start_year, end_year + 1)]

# Define the base paths using the exact strings provided
# IMPORTANT: Ensure these paths are correct relative to where your script/notebook is running
# If your notebook is in Project-8---Sports-Analysis/notebooks/, these paths might need adjustment (e.g., remove '../../')
# Assuming the script is run from a location where these paths make sense:
base_save_path = '../../Project-8---Sports-Analysis/data/rawdata/' 
clean_save_path = '../../Project-8---Sports-Analysis/data/cleandata/' 

# --- Create Base Directories ---
# Use a consistent approach for directory creation based on the provided paths
# Let's adjust the path creation logic slightly to handle the explicit project name in the path
# We will create the base 'rawdata' and 'cleandata' folders if they don't exist within the specified structure

# Get the directory part of the provided paths
raw_base_dir = os.path.dirname(base_save_path) # Should be '../../Project-8---Sports-Analysis/data'
clean_base_dir = os.path.dirname(clean_save_path) # Should be '../../Project-8---Sports-Analysis/data'

try:
    # Create the parent directories ('data') if needed
    os.makedirs(raw_base_dir, exist_ok=True)
    os.makedirs(clean_base_dir, exist_ok=True)
    # Now ensure the 'rawdata' and 'cleandata' subdirectories exist
    os.makedirs(base_save_path, exist_ok=True)
    os.makedirs(clean_save_path, exist_ok=True)
    print(f"Ensured raw data directory exists: {os.path.abspath(base_save_path)}")
    print(f"Ensured clean data directory exists: {os.path.abspath(clean_save_path)}")
except OSError as e:
    print(f"Error creating base directories: {e}")
    exit()


print(f"\nFetching player minutes data for seasons: {seasons}")

for season in seasons:
    # Create season-specific directory name (e.g., '21-22')
    season_folder_name = f"{season[2:4]}-{season[-2:]}"
    # Construct the full path for the season's raw data using the provided base_save_path
    season_raw_save_dir = os.path.join(base_save_path, season_folder_name)
    
    # Create the season-specific raw directory
    try:
        os.makedirs(season_raw_save_dir, exist_ok=True)
        print(f"\nProcessing {season}... Saving raw player data to: {season_raw_save_dir}")
    except OSError as e:
        print(f"Error creating directory {season_raw_save_dir}: {e}")
        continue 

    try:
        # --- Fetch Player Stats (including Minutes) ---
        print(f"  Fetching Base player stats for {season}...")
        player_stats = leaguedashplayerstats.LeagueDashPlayerStats(
            season=season,
            measure_type_detailed_defense='Base', # Base stats include MIN
            per_mode_detailed='Totals' # Get total minutes for the season
        )
        player_stats_df = player_stats.get_data_frames()[0]
        
        # --- Save Raw Player Stats ---
        raw_filename = os.path.join(season_raw_save_dir, f'player_minutes_raw_{season_folder_name}.csv')
        player_stats_df.to_csv(raw_filename, index=False)
        print(f"    Saved raw player stats to {raw_filename}")
        time.sleep(1) 

        # --- Create Clean Player Minutes DataFrame ---
        # Select relevant columns
        # Note: Includes TEAM_ABBREVIATION. Players traded mid-season might have multiple rows 
        # or a row with TEAM_ABBREVIATION = 'TOT' representing their total across teams.
        clean_df = player_stats_df[[
            'PLAYER_ID', 
            'PLAYER_NAME', 
            'TEAM_ID', 
            'TEAM_ABBREVIATION', 
            'GP', # Games Played
            'MIN' # Minutes Played
        ]].copy() # Use copy to avoid SettingWithCopyWarning

        # Add Season Column
        clean_df['SEASON'] = season

        # --- Save Clean Player Minutes DataFrame ---
        # Construct filename for the clean data directory
        clean_filename = os.path.join(clean_save_path, f"player_minutes_{season_folder_name}.csv")
        clean_df.to_csv(clean_filename, index=False)
        print(f"    Saved clean player minutes data for {season} to {clean_filename}")
        
        print(f"  Successfully processed and saved player minutes for {season}")

    except Exception as e:
        print(f"  Could not fetch, process, or save player data for season {season}: {e}")
    
    time.sleep(2) # Longer delay between seasons

print("\nScript finished. Individual raw and cleaned player minutes files saved.")

Ensured raw data directory exists: /home/grenadi3/Project-8---Sports-Analysis/data/rawdata
Ensured clean data directory exists: /home/grenadi3/Project-8---Sports-Analysis/data/cleandata

Fetching player minutes data for seasons: ['2021-22', '2022-23', '2023-24', '2024-25']

Processing 2021-22... Saving raw player data to: ../../Project-8---Sports-Analysis/data/rawdata/21-22
  Fetching Base player stats for 2021-22...
    Saved raw player stats to ../../Project-8---Sports-Analysis/data/rawdata/21-22/player_minutes_raw_21-22.csv
    Saved clean player minutes data for 2021-22 to ../../Project-8---Sports-Analysis/data/cleandata/player_minutes_21-22.csv
  Successfully processed and saved player minutes for 2021-22

Processing 2022-23... Saving raw player data to: ../../Project-8---Sports-Analysis/data/rawdata/22-23
  Fetching Base player stats for 2022-23...
    Saved raw player stats to ../../Project-8---Sports-Analysis/data/rawdata/22-23/player_minutes_raw_22-23.csv
    Saved clean playe