In [None]:
# Importing needed packages

import pandas as pd
import numpy as np

In [None]:
def add_year_column(df, year):
    """
    Add a column named 'year' to the DataFrame with the specified year for every observation.

    Parameters:
    - df (DataFrame): DataFrame object.
    - year (int): Year to be added to the DataFrame.

    Returns:
    - None: Modifies the DataFrame in place.
    """
    # Add a new column named 'year' with the specified year
    df['year'] = year

In [None]:
def inclusion_criteria(df):
    """
    Filter the dataset based on the specified criteria:
    - 'GS' needs to be greater than or equal to 25
    - 'MP' needs to be greater than or equal to 15
    - 'G' needs to be greater than or equal to 50

    Parameters:
    - df (DataFrame): DataFrame to be filtered.

    Returns:
    - filtered_df (DataFrame): Filtered DataFrame based on the criteria.
    """
    filtered_df = df[(df['GS'] >= 25) & (df['MP'] >= 15) & (df['G'] >= 50)]
    return filtered_df


In [None]:
def add_all_nba_column(df, names_list):
    """
    Add a new column named 'All_NBA' to the DataFrame based on whether the player's name is in the provided list.

    Parameters:
    - df (DataFrame): DataFrame to which the new column will be added.
    - names_list (list): List of player names.

    Returns:
    - None: Modifies the DataFrame in place.
    """
    # Create a new column 'All_NBA' and set default value to 0 for no
    df['All_NBA'] = 0

    # Update 'All_NBA' column to 1 for yes for players in the names_list
    df.loc[df['Player'].isin(names_list), 'All_NBA'] = 1

In [None]:
# Loading data

player_data_2022_2023 = pd.read_csv("2022-2023 NBA Player Data.csv")
player_data_2021_2022 = pd.read_csv("2021-2022 NBA Player Data.csv")
player_data_2020_2021 = pd.read_csv("2020-2021 NBA Player Data.csv")
player_data_2019_2020 = pd.read_csv("2019-2020 NBA Player Data.csv")
player_data_2018_2019 = pd.read_csv("2018-2019 NBA Player Data.csv")
player_data_2017_2018 = pd.read_csv("2017-2018 NBA Player Data.csv")
player_data_2016_2017 = pd.read_csv("2016-2017 NBA Player Data.csv")
player_data_2015_2016 = pd.read_csv("2015-2016 NBA Player Data.csv")
player_data_2014_2015 = pd.read_csv("2014-2015 NBA Player Data.csv")
player_data_2013_2014 = pd.read_csv("2013-2014 NBA Player Data.csv")
player_data_2012_2013 = pd.read_csv("2012-2013 NBA Player Data.csv")
player_data_2011_2012 = pd.read_csv("2011-2012 NBA Player Data.csv")
player_data_2010_2011 = pd.read_csv("2010-2011 NBA Player Data.csv")
player_data_2009_2010 = pd.read_csv("2009-2010 NBA Player Data.csv")
player_data_2008_2009 = pd.read_csv("2008-2009 NBA Player Data.csv")
player_data_2007_2008 = pd.read_csv("2007-2008 NBA Player Data.csv")
player_data_2006_2007 = pd.read_csv("2006-2007 NBA Player Data.csv")
player_data_2005_2006 = pd.read_csv("2005-2006 NBA Player Data.csv")
player_data_2004_2005 = pd.read_csv("2004-2005 NBA Player Data.csv")


In [None]:
def remove_star(df):
    for index, row in df.iterrows():
        if '*' in row['Player']:
            df.at[index, 'Player'] = row['Player'].replace('*', '')
    return df

# List of your DataFrames
list_of_dataframes = [player_data_2004_2005, player_data_2005_2006, player_data_2006_2007,
                      player_data_2007_2008, player_data_2008_2009, player_data_2009_2010,
                      player_data_2010_2011, player_data_2011_2012, player_data_2012_2013,
                      player_data_2013_2014, player_data_2014_2015, player_data_2015_2016,
                      player_data_2016_2017, player_data_2017_2018, player_data_2018_2019,
                      player_data_2019_2020, player_data_2020_2021, player_data_2021_2022,
                      player_data_2022_2023]  # Add all your DataFrames here

# Apply the function to each DataFrame in the list
modified_dataframes = [remove_star(df) for df in list_of_dataframes]


In [None]:
add_year_column(player_data_2022_2023, 2023)
add_year_column(player_data_2021_2022, 2022)
add_year_column(player_data_2020_2021, 2021)
add_year_column(player_data_2019_2020, 2020)
add_year_column(player_data_2018_2019, 2019)
add_year_column(player_data_2017_2018, 2018)
add_year_column(player_data_2016_2017, 2017)
add_year_column(player_data_2015_2016, 2016)
add_year_column(player_data_2014_2015, 2015)
add_year_column(player_data_2013_2014, 2014)
add_year_column(player_data_2012_2013, 2013)
add_year_column(player_data_2011_2012, 2012)
add_year_column(player_data_2010_2011, 2011)
add_year_column(player_data_2009_2010, 2010)
add_year_column(player_data_2008_2009, 2009)
add_year_column(player_data_2007_2008, 2008)
add_year_column(player_data_2006_2007, 2007)
add_year_column(player_data_2005_2006, 2006)
add_year_column(player_data_2004_2005, 2005)

In [None]:
All_NBA_2023 = ["Giannis Antetokounmpo",
    "Jayson Tatum",
    "Joel Embiid",
    "Shai Gilgeous-Alexander",
    "Luka Doncic",
    "Jimmy Butler",
    "Jaylen Brown",
    "Nikola Jokic",
    "Donovan Mitchell",
    "Stephen Curry",
    "Julius Randle",
    "LeBron James",
    "Domantas Sabonis",
    "De'Aaron Fox",
    "Damian Lillard"]

All_NBA_2022 = ["Giannis Antetokounmpo",
    "Devin Booker",
    "Luka Dončić",
    "Nikola Jokić",
    "Jayson Tatum",
    "Stephen Curry",
    "DeMar DeRozan",
    "Kevin Durant",
    "Joel Embiid",
    "Ja Morant",
    "LeBron James",
    "Chris Paul",
    "Pascal Siakam",
    "Karl-Anthony Towns"
    "Trae Young"]

All_NBA_2021 = ["Giannis Antetokounmpo",
    "Kawhi Leonard",
    "Nikola Jokic",
    "Stephen Curry",
    "Luka Doncic",
    "Julius Randle",
    "LeBron James",
    "Joel Embiid",
    "Chris Paul",
    "Damian Lillard",
    "Jimmy Butler",
    "Paul George",
    "Rudy Gobert",
    "Bradley Beal",
    "Kyrie Irving"]

All_NBA_2020 = ["Giannis Antetokounmpo",
    "LeBron James",
    "Anthony Davis",
    "James Harden",
    "Luka Doncic",
    "Kawhi Leonard",
    "Pascal Siakam",
    "Nikola Jokic",
    "Damian Lillard",
    "Chris Paul",
    "Jayson Tatum",
    "Jimmy Butler",
    "Rudy Gobert",
    "Ben Simmons",
    "Russell Westbrook"]

All_NBA_2019 = ["Giannis Antetokounmpo",
    "Paul George",
    "Nikola Jokić",
    "James Harden",
    "Stephen Curry",
    "Kevin Durant",
    "Kawhi Leonard",
    "Joel Embiid",
    "Damian Lillard",
    "Kyrie Irving",
    "Blake Griffin",
    "LeBron James",
    "Rudy Gobert",
    "Russell Westbrook",
    "Kemba Walker"]

All_NBA_2018 = ["LeBron James",
    "Kevin Durant",
    "Anthony Davis",
    "James Harden",
    "Damian Lillard",
    "LaMarcus Aldridge",
    "Giannis Antetokounmpo",
    "Joel Embiid",
    "DeMar DeRozan",
    "Russell Westbrook",
    "Paul George",
    "Jimmy Butler",
    "Karl-Anthony Towns",
    "Stephen Curry",
    "Victor Oladipo"]

All_NBA_2017 = ["LeBron James",
    "Kawhi Leonard",
    "Anthony Davis",
    "James Harden",
    "Russell Westbrook",
    "Giannis Antetokounmpo",
    "Kevin Durant",
    "Rudy Gobert",
    "Stephen Curry",
    "Isaiah Thomas",
    "Draymond Green",
    "Jimmy Butler",
    "DeAndre Jordan",
    "John Wall",
    "DeMar DeRozan"]

All_NBA_2016 = ["LeBron James",
    "Kawhi Leonard",
    "DeAndre Jordan",
    "Stephen Curry",
    "Russell Westbrook",
    "Kevin Durant",
    "Draymond Green",
    "DeMarcus Cousins",
    "Chris Paul",
    "Damian Lillard",
    "Paul George",
    "LaMarcus Aldridge",
    "Andre Drummond",
    "Klay Thompson",
    "Kyle Lowry"]

All_NBA_2015 = ["LeBron James",
    "Anthony Davis",
    "Marc Gasol",
    "Stephen Curry",
    "James Harden",
    "LaMarcus Aldridge",
    "DeMarcus Cousins",
    "Pau Gasol",
    "Russell Westbrook",
    "Chris Paul",
    "Blake Griffin",
    "Tim Duncan",
    "DeAndre Jordan",
    "Klay Thompson",
    "Kyrie Irving"]

All_NBA_2014 = ["Kevin Durant",
    "LeBron James",
    "Joakim Noah",
    "James Harden",
    "Chris Paul",
    "Blake Griffin",
    "Kevin Love",
    "Dwight Howard",
    "Stephen Curry",
    "Tony Parker",
    "Paul George",
    "LaMarcus Aldridge",
    "Al Jefferson",
    "Goran Dragic",
    "Damian Lillard"]

All_NBA_2013 = ["LeBron James",
    "Kevin Durant",
    "Tim Duncan",
    "Kobe Bryant",
    "Chris Paul",
    "Carmelo Anthony",
    "Blake Griffin",
    "Marc Gasol",
    "Tony Parker",
    "Russell Westbrook",
    "David Lee",
    "Paul George",
    "Dwight Howard",
    "Dwyane Wade",
    "James Harden"]

All_NBA_2012 = ["Kevin Durant",
    "LeBron James",
    "Dwight Howard",
    "Kobe Bryant",
    "Chris Paul",
    "Kevin Love",
    "Blake Griffin",
    "Andrew Bynum",
    "Tony Parker",
    "Russell Westbrook",
    "Carmelo Anthony",
    "Dirk Nowitzki",
    "Tyson Chandler",
    "Dwyane Wade",
    "Rajon Rondo"]

All_NBA_2011 = ["Kevin Durant",
    "LeBron James",
    "Dwight Howard",
    "Kobe Bryant",
    "Derrick Rose",
    "Pau Gasol",
    "Dirk Nowitzki",
    "Amar’e Stoudemire",
    "Dwyane Wade",
    "Russell Westbrook",
    "LaMarcus Aldridge",
    "Zach Randolph",
    "Al Horford",
    "Manu Ginobili",
    "Chris Paul"]

All_NBA_2010 = ["Kevin Durant",
    "LeBron James",
    "Dwight Howard",
    "Kobe Bryant",
    "Dwyane Wade",
    "Carmelo Anthony",
    "Dirk Nowitzki",
    "Amar’e Stoudemire",
    "Steve Nash",
    "Deron Williams",
    "Tim Duncan",
    "Pau Gasol",
    "Andrew Bogut",
    "Joe Johnson",
    "Brandon Roy"]

All_NBA_2009 = ["Dirk Nowitzki",
    "LeBron James",
    "Dwight Howard",
    "Kobe Bryant",
    "Dwyane Wade",
    "Tim Duncan",
    "Paul Pierce",
    "Yao Ming",
    "Chris Paul",
    "Brandon Roy",
    "Carmelo Anthony",
    "Pau Gasol",
    "Shaquille O’Neal",
    "Chauncey Billups",
    "Tony Parker"]

All_NBA_2008 = ["Kevin Garnett",
    "LeBron James",
    "Dwight Howard",
    "Kobe Bryant",
    "Chris Paul",
    "Tim Duncan",
    "Dirk Nowitzki",
    "Amar’e Stoudemire",
    "Steve Nash",
    "Deron Williams",
    "Carlos Boozer",
    "Paul Pierce",
    "Yao Ming",
    "Manu Ginobili",
    "Tracy McGrady"]

All_NBA_2007 = ["Tim Duncan",
    "Dirk Nowitzki",
    "Amar’e Stoudemire",
    "Kobe Bryant",
    "Steve Nash",
    "Chris Bosh",
    "LeBron James",
    "Yao Ming",
    "Gilbert Arenas",
    "Tracy McGrady",
    "Carmelo Anthony",
    "Kevin Garnett",
    "Dwight Howard",
    "Chauncey Billups",
    "Dwyane Wade"]

All_NBA_2006 = ["LeBron James",
    "Dirk Nowitzki",
    "Shaquille O’Neal",
    "Kobe Bryant",
    "Steve Nash",
    "Elton Brand",
    "Tim Duncan",
    "Ben Wallace",
    "Chauncey Billups",
    "Dwyane Wade",
    "Carmelo Anthony",
    "Shawn Marion",
    "Yao Ming",
    "Gilbert Arenas",
    "Allen Iverson"]

All_NBA_2005 = ["Tim Duncan",
    "Dirk Nowitzki",
    "Shaquille O’Neal",
    "Allen Iverson",
    "Steve Nash",
    "Kevin Garnett",
    "LeBron James",
    "Amar’e Stoudemire",
    "Ray Allen",
    "Dwyane Wade",
    "Shawn Marion",
    "Tracy McGrady",
    "Ben Wallace",
    "Gilbert Arenas",
    "Kobe Bryant"]


In [None]:
add_all_nba_column(player_data_2022_2023, All_NBA_2023)
add_all_nba_column(player_data_2021_2022, All_NBA_2022)
add_all_nba_column(player_data_2020_2021, All_NBA_2021)
add_all_nba_column(player_data_2019_2020, All_NBA_2020)
add_all_nba_column(player_data_2018_2019, All_NBA_2019)
add_all_nba_column(player_data_2017_2018, All_NBA_2018)
add_all_nba_column(player_data_2016_2017, All_NBA_2017)
add_all_nba_column(player_data_2015_2016, All_NBA_2016)
add_all_nba_column(player_data_2014_2015, All_NBA_2015)
add_all_nba_column(player_data_2013_2014, All_NBA_2014)
add_all_nba_column(player_data_2012_2013, All_NBA_2013)
add_all_nba_column(player_data_2011_2012, All_NBA_2012)
add_all_nba_column(player_data_2010_2011, All_NBA_2011)
add_all_nba_column(player_data_2009_2010, All_NBA_2010)
add_all_nba_column(player_data_2008_2009, All_NBA_2009)
add_all_nba_column(player_data_2007_2008, All_NBA_2008)
add_all_nba_column(player_data_2006_2007, All_NBA_2007)
add_all_nba_column(player_data_2005_2006, All_NBA_2006)
add_all_nba_column(player_data_2004_2005, All_NBA_2005)

In [None]:
NBA_Player_Data = pd.concat([player_data_2004_2005, player_data_2005_2006, player_data_2006_2007,
                             player_data_2007_2008, player_data_2008_2009, player_data_2009_2010,
                             player_data_2010_2011, player_data_2011_2012, player_data_2012_2013,
                             player_data_2013_2014, player_data_2014_2015, player_data_2015_2016,
                             player_data_2016_2017, player_data_2017_2018, player_data_2018_2019,
                             player_data_2019_2020, player_data_2020_2021, player_data_2021_2022,
                             player_data_2022_2023])

In [None]:
NBA_Player_Data = inclusion_criteria(NBA_Player_Data)

In [None]:
NBA_Player_Data

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,TRB,AST,STL,BLK,TOV,PF,PTS,Player-additional,year,All_NBA
0,1,Shareef Abdur-Rahim,PF,28,POR,54,49,34.6,6.2,12.4,...,7.3,2.1,0.9,0.5,2.2,2.8,16.8,abdursh01,2005,0
5,4,Ray Allen,SG,29,SEA,78,78,39.3,8.2,19.2,...,4.4,3.7,1.1,0.1,2.2,2.1,23.9,allenra02,2005,1
6,5,Tony Allen,SG,23,BOS,77,34,16.4,2.4,5.0,...,2.9,0.8,1.0,0.3,1.0,2.0,6.4,allento01,2005,0
7,6,Rafer Alston,PG,28,TOR,80,78,34.0,5.0,12.2,...,3.5,6.4,1.5,0.1,2.1,2.7,14.2,alstora01,2005,0
16,11,Carmelo Anthony,SF,20,DEN,75,75,34.8,7.1,16.4,...,5.7,2.6,0.9,0.4,3.0,3.1,20.8,anthoca01,2005,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
646,509,Derrick White,SG,28,BOS,82,70,28.3,4.3,9.2,...,3.6,3.9,0.7,0.9,1.2,2.2,12.4,,2023,0
655,518,Jalen Williams,SG,21,OKC,75,62,30.3,5.5,10.6,...,4.5,3.3,1.4,0.5,1.6,2.5,14.1,,2023,0
660,523,Patrick Williams,PF,21,CHI,82,65,28.3,3.8,8.3,...,4.0,1.2,0.9,0.9,1.2,1.8,10.2,,2023,0
675,536,Trae Young,PG,24,ATL,73,73,34.8,8.2,19.0,...,3.0,10.2,1.1,0.1,4.1,1.4,26.2,,2023,0


In [None]:
NBA_Player_Data.to_csv('NBA_Player_Data.csv')