#### Import libraries

In [1]:
import pandas as pd
import fetchData_OffCourt
from dataProcessing_OffCourt import split_player_season, calculate_averages

#### Fetch Data

Fetch data for target seasons

In [2]:
players_seasons = {
    'Shaquille O\'Neal': ['2007-08'],
    'Kobe Bryant': ['2003-04'],
    'Derrick Rose': ['2016-17'],
    'Dwight Howard': ['2012-13'],
    'Carmelo Anthony': ['2016-17'],
    'Allen Iverson': ['2000-01'],
    'Gilbert Arenas': ['2009-10'],
    'Paul Pierce': ['2000-01'],
    'Kevin Love': ['2018-19'],
    'DeMar DeRozan': ['2018-19']
}


Design the data fetching process

Data Validation: Ensure that the data retrieved is complete and accurate by adding check for empty dataFrames, missing values, and verify the data consistency.  
Rate Limiting: Introduce a delay between each request to the API

In [3]:
target_average_stats = fetchData_OffCourt.fetch_data(fetch_players_seasons=players_seasons, target_stats=['PTS', 'AST', 'REB', 'FG_PCT'])

In [4]:
print(target_average_stats)

{"Shaquille O'Neal_2007-08": {'PTS': 13.639344262295081, 'AST': 1.5245901639344261, 'REB': 9.081967213114755, 'FG_PCT': 0.609327868852459}, 'Kobe Bryant_2003-04': {'PTS': 23.953846153846154, 'AST': 5.076923076923077, 'REB': 5.523076923076923, 'FG_PCT': 0.42938461538461536}, 'Derrick Rose_2016-17': {'PTS': 18.03125, 'AST': 4.421875, 'REB': 3.84375, 'FG_PCT': 0.463984375}, 'Dwight Howard_2012-13': {'PTS': 17.05263157894737, 'AST': 1.4210526315789473, 'REB': 12.43421052631579, 'FG_PCT': 0.563407894736842}, 'Carmelo Anthony_2016-17': {'PTS': 22.41891891891892, 'AST': 2.8783783783783785, 'REB': 5.918918918918919, 'FG_PCT': 0.4342297297297298}, 'Allen Iverson_2000-01': {'PTS': 31.08450704225352, 'AST': 4.577464788732394, 'REB': 3.8450704225352115, 'FG_PCT': 0.4128450704225352}, 'Gilbert Arenas_2009-10': {'PTS': 22.5625, 'AST': 7.1875, 'REB': 4.15625, 'FG_PCT': 0.412125}, 'Paul Pierce_2000-01': {'PTS': 25.25609756097561, 'AST': 3.0853658536585367, 'REB': 6.365853658536586, 'FG_PCT': 0.4525243

In [5]:
# Convert the dictionary of average stats to a DataFrame
average_stats_df = pd.DataFrame.from_dict(target_average_stats, orient='index')

# Save the DataFrame to a CSV file
average_stats_df.to_csv('OriginalDatasets/target_average_player_stats.csv')

print("Data saved to 'OriginalDatasets/target_average_player_stats.csv'")

Data saved to 'OriginalDatasets/target_average_player_stats.csv'


Fetch data for baseline seasons

In [6]:
players_baseline_seasons = {
    'Shaquille O\'Neal': ['2005-06', '2006-07', '2008-09', '2009-10'],
    'Kobe Bryant': ['2001-02', '2002-03', '2004-05', '2005-06'],
    'Derrick Rose': ['2014-15', '2015-16', '2017-18', '2018-19'],
    'Dwight Howard': ['2010-11', '2011-12', '2013-14', '2014-15'],
    'Carmelo Anthony': ['2014-15', '2015-16', '2017-18', '2018-19'],
    'Allen Iverson': ['1998-99', '1999-00', '2001-02', '2002-03'],
    'Gilbert Arenas': ['2007-08', '2008-09', '2010-11', '2011-12'],
    'Paul Pierce': ['1998-99', '1999-00', '2001-02', '2002-03'],
    'Kevin Love': ['2016-17', '2017-18', '2019-20', '2020-21'],
    'DeMar DeRozan': ['2016-17', '2017-18', '2019-20', '2020-21']
}

In [7]:
baseline_avg_stats = fetchData_OffCourt.fetch_data(fetch_players_seasons=players_baseline_seasons, target_stats=['PTS', 'AST', 'REB', 'FG_PCT'])

In [8]:
print(baseline_avg_stats)

{"Shaquille O'Neal_2005-06": {'PTS': 20.016949152542374, 'AST': 1.9152542372881356, 'REB': 9.169491525423728, 'FG_PCT': 0.5963220338983052}, "Shaquille O'Neal_2006-07": {'PTS': 17.25, 'AST': 1.975, 'REB': 7.425, 'FG_PCT': 0.58515}, "Shaquille O'Neal_2008-09": {'PTS': 17.773333333333333, 'AST': 1.68, 'REB': 8.44, 'FG_PCT': 0.6110266666666666}, "Shaquille O'Neal_2009-10": {'PTS': 12.0, 'AST': 1.509433962264151, 'REB': 6.69811320754717, 'FG_PCT': 0.5678679245283018}, 'Kobe Bryant_2001-02': {'PTS': 25.2375, 'AST': 5.475, 'REB': 5.5125, 'FG_PCT': 0.4760999999999999}, 'Kobe Bryant_2002-03': {'PTS': 30.01219512195122, 'AST': 5.865853658536586, 'REB': 6.878048780487805, 'FG_PCT': 0.45297560975609763}, 'Kobe Bryant_2004-05': {'PTS': 27.560606060606062, 'AST': 6.03030303030303, 'REB': 5.9393939393939394, 'FG_PCT': 0.43371212121212116}, 'Kobe Bryant_2005-06': {'PTS': 35.4, 'AST': 4.5, 'REB': 5.3125, 'FG_PCT': 0.4521875000000001}, 'Derrick Rose_2014-15': {'PTS': 17.725490196078432, 'AST': 4.921568

In [9]:
# Convert the dictionary of average stats to a DataFrame
average_stats_df = pd.DataFrame.from_dict(baseline_avg_stats, orient='index')

# Save the DataFrame to a CSV file
average_stats_df.to_csv('OriginalDatasets/baseline_average_player_stats.csv')

print("Data saved to 'OriginalDatasets/baseline_average_player_stats.csv'")

Data saved to 'OriginalDatasets/baseline_average_player_stats.csv'


#### Data Processing

In [10]:
# Load the datasets
baseline_path = './OriginalDatasets/baseline_average_player_stats.csv'
target_path = './OriginalDatasets/target_average_player_stats.csv'

# Read the files
base_df = pd.read_csv(baseline_path)
tgt_df = pd.read_csv(target_path)

In [11]:
# Split 'Player_Season' into 'Player' and 'Season' for both datasets
base_df = split_player_season(base_df)
tgt_df = split_player_season(tgt_df)

# Calculate averages
before_df, after_df = calculate_averages(base_df, tgt_df)

# Displaying the first few rows of the resulting dataframes (optional)
print(before_df.head())
print(after_df.head())

                        PTS       AST        REB    FG_PCT
Shaquille O'Neal  18.633475  1.945127   8.297246  0.590736
Kobe Bryant       27.624848  5.670427   6.195274  0.464538
Derrick Rose      17.044563  4.816845   3.282977  0.407634
Dwight Howard     21.741453  1.648860  14.306980  0.583527
Carmelo Anthony   22.998611  3.601389   7.161111  0.444208
                        PTS       AST        REB    FG_PCT
Shaquille O'Neal  14.886667  1.594717   7.569057  0.589447
Kobe Bryant       31.480303  5.265152   5.625947  0.442950
Derrick Rose      13.170196  2.896863   2.072549  0.441011
Dwight Howard     17.011852  1.532291  11.354689  0.599041
Carmelo Anthony   14.783333  0.910256   5.603846  0.393571


In [12]:
before_df.to_csv('./OriginalDatasets/before_averages.csv')
after_df.to_csv('./OriginalDatasets/after_averages.csv')