#### Import libraries

In [1]:
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.static import players
import pandas as pd
import time

#### Fetch Data

Fetch data for target seasons

In [2]:
players_seasons = {
    'Shaquille O\'Neal': ['2007-08'],
    'Kobe Bryant': ['2003-04'],
    'Derrick Rose': ['2016-17'],
    'Dwight Howard': ['2012-13'],
    'Carmelo Anthony': ['2016-17'],
    'Allen Iverson': ['2000-01'],
    'Gilbert Arenas': ['2009-10'],
    'Paul Pierce': ['2000-01'],
    'Kevin Love': ['2018-19'],
    'DeMar DeRozan': ['2018-19']
}


Design the data fetching process

Data Validation: Ensure that the data retrieved is complete and accurate by adding check for empty dataFrames, missing values, and verify the data consistency.  
Rate Limiting: Introduce a delay between each request to the API

In [3]:
def fetch_data(fetch_players_seasons, target_stats = ['PTS', 'AST', 'REB', 'FG_PCT'], request_delay = 0.1):
    
    average_stats = {}
    
    for name, seasons in fetch_players_seasons.items():
        player_dict = players.find_players_by_full_name(name)
        if player_dict:
            player_id = player_dict[0]['id']
            for season in seasons:
                # Delay each API request
                time.sleep(request_delay)
                
                gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season)
                df = gamelog.get_data_frames()[0]

                # Data validation checks
                
                # Check for Empty DataFrames
                if df.empty:
                    print(f"No data available for {name} in the {season} season.")
                    continue
                
                # Check for Missing Values
                if df[target_stats].isnull().values.any():
                    print(f"Missing data for {name} in the {season} season.")
                    continue
                
                # Verify Data Consistency
                if not all([pd.api.types.is_numeric_dtype(df[col]) for col in target_stats]):
                    print(f"Incorrect data types for {name} in the {season} season.")
                    continue

                # Calculate average stats
                avg_points = df['PTS'].mean()
                avg_assists = df['AST'].mean()
                avg_rebounds = df['REB'].mean()
                avg_fg_pct = df['FG_PCT'].mean()

                # Store in the dictionary
                average_stats[name] = {
                    'PPG': avg_points,
                    'APG': avg_assists,
                    'RPG': avg_rebounds,
                    'FG%': avg_fg_pct
                }
    return average_stats

In [4]:
target_average_stats = fetch_data(players_seasons)

In [5]:
print(target_average_stats)

{"Shaquille O'Neal": {'PPG': 13.639344262295081, 'APG': 1.5245901639344261, 'RPG': 9.081967213114755, 'FG%': 0.609327868852459}, 'Kobe Bryant': {'PPG': 23.953846153846154, 'APG': 5.076923076923077, 'RPG': 5.523076923076923, 'FG%': 0.42938461538461536}, 'Derrick Rose': {'PPG': 18.03125, 'APG': 4.421875, 'RPG': 3.84375, 'FG%': 0.463984375}, 'Dwight Howard': {'PPG': 17.05263157894737, 'APG': 1.4210526315789473, 'RPG': 12.43421052631579, 'FG%': 0.563407894736842}, 'Carmelo Anthony': {'PPG': 22.41891891891892, 'APG': 2.8783783783783785, 'RPG': 5.918918918918919, 'FG%': 0.4342297297297298}, 'Allen Iverson': {'PPG': 31.08450704225352, 'APG': 4.577464788732394, 'RPG': 3.8450704225352115, 'FG%': 0.4128450704225352}, 'Gilbert Arenas': {'PPG': 22.5625, 'APG': 7.1875, 'RPG': 4.15625, 'FG%': 0.412125}, 'Paul Pierce': {'PPG': 25.25609756097561, 'APG': 3.0853658536585367, 'RPG': 6.365853658536586, 'FG%': 0.4525243902439025}, 'Kevin Love': {'PPG': 17.0, 'APG': 2.1818181818181817, 'RPG': 10.86363636363

In [6]:
# Convert the dictionary of average stats to a DataFrame
average_stats_df = pd.DataFrame.from_dict(target_average_stats, orient='index')

# Save the DataFrame to a CSV file
average_stats_df.to_csv('OriginalDatasets/target_average_player_stats.csv')

print("Data saved to 'OriginalDatasets/target_average_player_stats.csv'")

Data saved to 'OriginalDatasets/target_average_player_stats.csv'


Fetch data for baseline seasons

In [7]:
players_baseline_seasons = {
    'Shaquille O\'Neal': ['2005-06', '2006-07', '2008-09', '2009-10'],
    'Kobe Bryant': ['2001-02', '2002-03', '2004-05', '2005-06'],
    'Derrick Rose': ['2014-15', '2015-16', '2017-18', '2018-19'],
    'Dwight Howard': ['2010-11', '2011-12', '2013-14', '2014-15'],
    'Carmelo Anthony': ['2014-15', '2015-16', '2017-18', '2018-19'],
    'Allen Iverson': ['1998-99', '1999-00', '2001-02', '2002-03'],
    'Gilbert Arenas': ['2007-08', '2008-09', '2010-11', '2011-12'],
    'Paul Pierce': ['1998-99', '1999-00', '2001-02', '2002-03'],
    'Kevin Love': ['2016-17', '2017-18', '2019-20', '2020-21'],
    'DeMar DeRozan': ['2016-17', '2017-18', '2019-20', '2020-21']
}

In [8]:
def fetch_baseline_data(fetch_players_seasons, target_stats = ['PTS', 'AST', 'REB', 'FG_PCT'], request_delay = 0.1):
    
    baseline_average_stats = {}
    
    for name, seasons in fetch_players_seasons.items():
        player_dict = players.find_players_by_full_name(name)
        if player_dict:
            player_id = player_dict[0]['id']
            for season in seasons:
                # Delay each API request
                time.sleep(request_delay)
                
                gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season)
                df = gamelog.get_data_frames()[0]

                # Data validation checks
                
                # Check for Empty DataFrames
                if df.empty:
                    print(f"No data available for {name} in the {season} season.")
                    continue
                
                # Check for Missing Values
                if df[target_stats].isnull().values.any():
                    print(f"Missing data for {name} in the {season} season.")
                    continue
                
                # Verify Data Consistency
                if not all([pd.api.types.is_numeric_dtype(df[col]) for col in target_stats]):
                    print(f"Incorrect data types for {name} in the {season} season.")
                    continue
                
                # Calculate average stats for baseline seasons
                avg_points = df['PTS'].mean()
                avg_assists = df['AST'].mean()
                avg_rebounds = df['REB'].mean()
                avg_fg_pct = df['FG_PCT'].mean()

                # Store in the baseline stats dictionary
                if name not in baseline_average_stats:
                    baseline_average_stats[name] = {'PPG': [], 'APG': [], 'RPG': [], 'FG%': []}
                
                baseline_average_stats[name]['PPG'].append(avg_points)
                baseline_average_stats[name]['APG'].append(avg_assists)
                baseline_average_stats[name]['RPG'].append(avg_rebounds)
                baseline_average_stats[name]['FG%'].append(avg_fg_pct)
                
    return baseline_average_stats

In [9]:
baseline_avg_stats = fetch_baseline_data(players_baseline_seasons)

# Calculate overall average for baseline seasons
for player, stats in baseline_avg_stats.items():
    for stat, values in stats.items():
        baseline_avg_stats[player][stat] = sum(values) / len(values)

In [10]:
print(baseline_avg_stats)

{"Shaquille O'Neal": {'PPG': 16.760070621468927, 'APG': 1.7699220498880717, 'RPG': 7.933151183242724, 'FG%': 0.5900916562733184}, 'Kobe Bryant': {'PPG': 29.552575295639322, 'APG': 5.467789172209904, 'RPG': 5.910610679970436, 'FG%': 0.4537438077420547}, 'Derrick Rose': {'PPG': 15.107379679144387, 'APG': 3.8568538324420674, 'RPG': 2.6777629233511586, 'FG%': 0.42432275401069514}, 'Dwight Howard': {'PPG': 19.376652294421103, 'APG': 1.59057585384449, 'RPG': 12.83083458362572, 'FG%': 0.5912839724749721}, 'Carmelo Anthony': {'PPG': 18.890972222222224, 'APG': 2.2558226495726497, 'RPG': 6.382478632478632, 'FG%': 0.418889423076923}, 'Allen Iverson': {'PPG': 28.533246225319395, 'APG': 5.0961999128919855, 'RPG': 4.352351916376307, 'FG%': 0.40829231271777006}, 'Gilbert Arenas': {'PPG': 11.851405946994182, 'APG': 5.012508080155139, 'RPG': 3.045475113122172, 'FG%': 0.3345525371687137}, 'Paul Pierce': {'PPG': 22.024375992118035, 'APG': 3.255969735465735, 'RPG': 6.520263063709837, 'FG%': 0.434786631865

In [11]:
# Convert the dictionary of average stats to a DataFrame
average_stats_df = pd.DataFrame.from_dict(baseline_avg_stats, orient='index')

# Save the DataFrame to a CSV file
average_stats_df.to_csv('OriginalDatasets/baseline_average_player_stats.csv')

print("Data saved to 'OriginalDatasets/baseline_average_player_stats.csv'")

Data saved to 'OriginalDatasets/baseline_average_player_stats.csv'
