In [105]:

import pandas as pd
import os
import numpy as np

In [106]:
all_years_data = pd.read_csv('all_player_playoff_stats.csv',index_col=0)
all_years_data['Player'] = all_years_data['Player'].str.strip('*')

all_years_data

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,Year
0,Kareem Abdul-Jabbar,C,32,LAL,15,0,41.2,13.2,23.1,0.572,...,28.5,2.2,1.0,3.3,0.253,7.4,1.5,8.9,1.7,1980
1,Alvan Adams,C,25,PHO,8,0,31.4,7.0,12.4,0.566,...,21.6,0.5,0.3,0.8,0.157,4.2,2.1,6.2,0.5,1980
2,Tiny Archibald,PG,31,BOS,9,0,36.9,5.0,9.9,0.506,...,18.6,0.4,0.4,0.7,0.105,1.9,-1.8,0.1,0.2,1980
3,James Bailey,PF,22,SEA,12,0,11.5,1.8,3.7,0.477,...,18.4,0.1,0.2,0.3,0.113,-1.1,2.3,1.2,0.1,1980
4,Greg Ballard,SF,25,WSB,2,0,36.5,4.5,14.0,0.321,...,21.9,-0.3,0.0,-0.3,-0.170,-7.4,-1.7,-9.1,-0.1,1980
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8369,Ziaire Williams,SF,21,MEM,4,0,3.0,0.5,1.8,0.286,...,31.4,-0.1,0.0,-0.1,-0.275,-6.3,-1.9,-8.2,0.0,2023
8370,Trae Young,PG,24,ATL,6,6,38.3,10.0,24.8,0.403,...,34.3,0.3,0.0,0.3,0.066,4.7,-1.1,3.6,0.3,2023
8371,Omer Yurtseven,C,24,MIA,8,0,2.0,0.3,0.9,0.286,...,22.5,-0.1,0.0,0.0,-0.113,-8.6,-4.5,-13.1,0.0,2023
8372,Cody Zeller,C,30,MIA,21,0,8.3,1.0,1.7,0.571,...,14.1,-0.1,0.2,0.1,0.024,-6.7,-0.1,-6.8,-0.2,2023


In [116]:
import pandas as pd
import os

# Create an empty DataFrame to store combined player stats
combined_player_stats = pd.DataFrame()

# Directory path where player stats CSV files are located
player_stats_directory = '../Basketball Reference Stat Scraper/player_stats'

# List of years based on the filenames in the directory
years = range(1980,2024)

# Loop through the list of years and read player stats CSV files
for year in years:

    csv_filename = os.path.join(player_stats_directory, f'{year}_player_stats.csv')

    if os.path.exists(csv_filename):

        print(year)
        year_data = pd.read_csv(csv_filename, index_col=0)
        year_data['Player'] = year_data['Player'].str.strip('*')

        # Concatenate the player stats for the current year to the combined_player_stats DataFrame
        combined_player_stats = pd.concat([combined_player_stats, year_data], ignore_index=True)

# Now, you have the combined_player_stats DataFrame with all player stats

# Merge combined_player_stats with all_years_data based on both 'Year' and 'Player'
merged_data = pd.merge(combined_player_stats, all_years_data, on=['Year', 'Player'], how='outer',suffixes=('','_playoffs'))

# Now, you have a merged DataFrame containing both playoff stats and player stats,
# where player data is correctly matched to the corresponding player and team for each year.
merged_data.to_csv('all_player_stats_reg_and_playoffs.csv')

1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023


In [108]:
merged_data = merged_data.sort_values(['Year','Player']).copy()

In [110]:
import pandas as pd
import numpy as np

# Assuming you have filtered the data as mentioned in your previous code
filtered_data = merged_data[(merged_data['PTS'] >= 15) & (merged_data['G'] > 25)].copy()

# Function to calculate weighted average PPG
def weighted_average_ppg(group, points_col, games_col):
    values = group[points_col] / group[games_col]
    weights = group[games_col]
    return np.average(values, weights=weights)

# Calculate the weighted average PPG in the playoffs for each player across all the years
weighted_playoff_ppg = filtered_data.groupby('Player').apply(weighted_average_ppg, 'PTS_playoffs', 'G_playoffs')

# Calculate the weighted average PPG during the regular season for each player across all the years
weighted_regular_season_ppg = filtered_data.groupby('Player').apply(weighted_average_ppg, 'PTS', 'G')

# Calculate the career increase in average PPG during the playoffs for each player
career_playoff_ppg_increase = weighted_playoff_ppg - weighted_regular_season_ppg

# Create a DataFrame to store the results
result_df = pd.DataFrame({
    'Player': career_playoff_ppg_increase.index,
    'Career_Playoff_PPG_Increase': career_playoff_ppg_increase
})

# Sort the DataFrame by Career Playoff PPG Increase in descending order
result_df = result_df.sort_values(by='Career_Playoff_PPG_Increase', ascending=False).reset_index(drop=True)

# Display the top 25 players with the largest career increase in average PPG during the playoffs
top_25_players = result_df.head(25)
top_25_players.reset_index(drop=True)


Unnamed: 0,Player,Career_Playoff_PPG_Increase
0,Campy Russell,11.292405
1,Albert King,10.28481
2,Ray Williams,9.375932
3,Billy Knight,9.036585
4,Walter Berry,7.094977
5,Michael Adams,7.082051
6,Ron Anderson,6.702439
7,Elvin Hayes,6.560123
8,Billy Owens,6.376793
9,World B. Free,6.258099


In [114]:
# Columns that you want to include in the result DataFrame
additional_cols = ['FG%', '3P%', 'FT%', 'AST', 'TRB', 'STL', 'BLK']

# Function to calculate weighted average for a particular column
def weighted_average(group, col, weight_col):
    values = group[col]
    weights = group[weight_col]
    return np.average(values, weights=weights)

# Create a dictionary to store career stats for each player
career_stats_dict = {}
for col in additional_cols:
    career_stats_dict[col] = filtered_data.groupby('Player').apply(weighted_average, col, 'G')

# Convert the dictionary to a DataFrame
career_stats_df = pd.DataFrame(career_stats_dict)

# Merge with the existing result DataFrame
final_result_df = pd.merge(result_df, career_stats_df, left_on='Player', right_index=True, how='left')

# The final_result_df will now have additional career stats for each player
final_result_df


Unnamed: 0,Player,Career_Playoff_PPG_Increase,FG%,3P%,FT%,AST,TRB,STL,BLK
0,Campy Russell,11.292405,0.464000,0.308000,0.78100,3.300000,4.500000,1.300000,0.1
1,Albert King,10.284810,0.475000,0.261000,0.77500,3.700000,5.800000,1.200000,0.5
2,Ray Williams,9.375932,0.461509,0.200366,0.82464,5.754658,4.049068,2.350932,0.5
3,Billy Knight,9.036585,0.533000,0.158000,0.83200,1.900000,5.000000,1.000000,0.1
4,Walter Berry,7.094977,0.563000,0.000000,0.60000,1.500000,5.400000,0.800000,0.9
...,...,...,...,...,...,...,...,...,...
436,Jameer Nelson,0.362381,0.503000,0.453000,0.88700,5.400000,3.500000,1.200000,0.1
437,Kevin Gamble,0.355211,0.587000,0.000000,0.81500,3.100000,3.300000,1.200000,0.4
438,Kendrick Nunn,0.178308,0.439000,0.350000,0.85000,3.300000,2.700000,0.800000,0.2
439,Richard Dumas,0.144746,0.524000,0.333000,0.70700,1.300000,4.600000,1.800000,0.8


In [111]:
result_df

Unnamed: 0,Player,Career_Playoff_PPG_Increase
0,Campy Russell,11.292405
1,Albert King,10.284810
2,Ray Williams,9.375932
3,Billy Knight,9.036585
4,Walter Berry,7.094977
...,...,...
436,Jameer Nelson,0.362381
437,Kevin Gamble,0.355211
438,Kendrick Nunn,0.178308
439,Richard Dumas,0.144746
