In [1]:
import pandas as pd
import numpy as np

In [2]:
all_star = pd.read_csv("final_data_with_stats_2023_2025.csv")

In [16]:
# Combine first and last name into full_name
all_star['Name'] = all_star['first'].str.strip() + ' ' + all_star['last'].str.strip()

# Create a new DataFrame with only full_name and year
name_year_df = all_star[['Name', 'year']].copy()

# Optional: drop duplicates if you only want one entry per player-year
name_year_df = name_year_df.drop_duplicates()

In [17]:
name_year_df.rename(columns={'year': 'Season'}, inplace=True)

In [18]:
name_year_df.head(10)

Unnamed: 0,Name,Season
0,Julius Erving,1980
1,Eddie Johnson,1980
2,Artis Gilmore,1980
3,Reggie Theus,1980
4,Larry Bird,1980
5,Robert Parish,1980
6,Nate Archibald,1980
7,Micheal Ray Richardson,1980
8,Marques Johnson,1980
9,Bobby Jones,1980


In [21]:
subset_df = name_year_df[(name_year_df['Season'] >= 2000) & (name_year_df['Season'] <= 2025)]

In [22]:
subset_df = subset_df.reset_index(drop=True)

In [23]:
subset_df.head()

Unnamed: 0,Name,Season
0,Allen Iverson,2000
1,Vince Carter,2000
2,Tracy McGrady,2000
3,Antonio Davis,2000
4,Anthony Mason,2000


In [9]:
subset_df.to_csv("all_star.csv", index=False)

In [10]:
all_stats = pd.read_csv("BBGM_League_1_all_seasons_Average_Stats.csv")

In [11]:
subset_stats = all_stats[(all_stats['Season'] >= 2000) & (all_stats['Season'] <= 2025)]

In [12]:
subset_stats.head()

Unnamed: 0,pid,Name,Pos,DraftPick,Age,Salary,Team,Season,G,GS,...,Ins,Dnk,FT.1,2Pt,3Pt,oIQ,dIQ,Drb,Pss,Reb
842,11,Ray Allen,SG,5.0,25,9.0,MIL,2000,82,82.0,...,51,60,78,76,75,63,52,67,61,53
843,36,Charles Barkley,F,5.0,37,9.0,HOU,2000,20,18.0,...,63,62,36,39,40,61,57,57,56,80
844,61,Chauncey Billups,G,3.0,24,3.1,DEN,2000,13,5.0,...,44,43,55,45,57,45,41,60,57,48
845,103,Kobe Bryant,GF,13.0,22,9.0,LAL,2000,66,62.0,...,55,60,64,68,36,57,50,70,65,51
846,122,Vince Carter,GF,5.0,23,2.27,TOR,2000,82,82.0,...,54,68,60,78,51,64,48,67,59,49


### Merge

In [27]:
# Create a set of (Name, Season) pairs from the All-Star subset
all_star_set = set(zip(subset_df['Name'], subset_df['Season']))

# Add a new column to mark All-Star status
subset_stats['is_all_star'] = subset_stats.apply(
    lambda row: 1 if (row['Name'], row['Season']) in all_star_set else 0,
    axis=1
)

# Preview
subset_stats.head(20)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_stats['is_all_star'] = subset_stats.apply(


Unnamed: 0,pid,Name,Pos,DraftPick,Age,Salary,Team,Season,G,GS,...,Dnk,FT.1,2Pt,3Pt,oIQ,dIQ,Drb,Pss,Reb,is_all_star
842,11,Ray Allen,SG,5.0,25,9.0,MIL,2000,82,82.0,...,60,78,76,75,63,52,67,61,53,1
843,36,Charles Barkley,F,5.0,37,9.0,HOU,2000,20,18.0,...,62,36,39,40,61,57,57,56,80,0
844,61,Chauncey Billups,G,3.0,24,3.1,DEN,2000,13,5.0,...,43,55,45,57,45,41,60,57,48,0
845,103,Kobe Bryant,GF,13.0,22,9.0,LAL,2000,66,62.0,...,60,64,68,36,57,50,70,65,51,1
846,122,Vince Carter,GF,5.0,23,2.27,TOR,2000,82,82.0,...,68,60,78,51,64,48,67,59,49,1
847,194,Vlade Divac,C,26.0,32,8.84,SAC,2000,82,81.0,...,57,39,40,40,44,57,61,62,58,1
848,227,Tim Duncan,FC,1.0,24,3.86,SA,2000,74,74.0,...,69,56,55,40,58,48,62,61,74,1
849,245,Patrick Ewing,FC,1.0,38,15.0,NYC,2000,62,62.0,...,66,46,44,20,52,68,47,42,79,0
850,270,Kevin Garnett,FC,5.0,24,16.81,MIN,2000,81,81.0,...,54,55,69,43,53,56,66,68,73,1
851,314,Tim Hardaway,PG,14.0,34,4.8,MIA,2000,52,52.0,...,34,66,63,80,62,51,78,81,53,0


In [30]:
subset_stats.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3954 entries, 842 to 5448
Data columns (total 90 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   pid          3954 non-null   int64  
 1   Name         3954 non-null   object 
 2   Pos          3954 non-null   object 
 3   DraftPick    3261 non-null   float64
 4   Age          3954 non-null   int64  
 5   Salary       3954 non-null   float64
 6   Team         3954 non-null   object 
 7   Season       3954 non-null   int64  
 8   G            3954 non-null   int64  
 9   GS           3954 non-null   float64
 10  MP           3954 non-null   float64
 11  FG           3954 non-null   float64
 12  FGA          3954 non-null   float64
 13  FG%          3954 non-null   float64
 14  3P           3954 non-null   float64
 15  3PA          3954 non-null   float64
 16  3P%          3954 non-null   float64
 17  2P           3954 non-null   float64
 18  2PA          3954 non-null   float64
 19  2P%      

In [40]:
# Drop all columns that contain at least one NaN
cleaned_df = subset_stats.dropna(axis=1)
cleaned_df = cleaned_df.loc[:, cleaned_df.nunique() > 1]
cleaned_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3954 entries, 842 to 5448
Data columns (total 77 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   pid          3954 non-null   int64  
 1   Name         3954 non-null   object 
 2   Pos          3954 non-null   object 
 3   Age          3954 non-null   int64  
 4   Salary       3954 non-null   float64
 5   Team         3954 non-null   object 
 6   Season       3954 non-null   int64  
 7   G            3954 non-null   int64  
 8   GS           3954 non-null   float64
 9   MP           3954 non-null   float64
 10  FG           3954 non-null   float64
 11  FGA          3954 non-null   float64
 12  FG%          3954 non-null   float64
 13  3P           3954 non-null   float64
 14  3PA          3954 non-null   float64
 15  3P%          3954 non-null   float64
 16  2P           3954 non-null   float64
 17  2PA          3954 non-null   float64
 18  2P%          3954 non-null   float64
 19  eFG%     

In [41]:
pd.set_option('display.max_columns', None)
cleaned_df.describe()

Unnamed: 0,pid,Age,Salary,Season,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,TOV,STL,BLK,BA,PF,PTS,TD,5x5,PER,EWA,TS%,3PAr,FT/FGA,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,+/-,On-Off,DRtg,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,Ovr,Pot,Hgt,Str,Spd,Jmp,End,Ins,Dnk,FT.1,2Pt,3Pt,oIQ,dIQ,Drb,Pss,Reb,is_all_star
count,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0,3954.0
mean,420.35129,26.58346,8.751434,2019.484573,52.060698,31.953465,23.547723,4.150319,8.891472,46.228036,1.041422,2.913193,30.350487,3.108897,5.978279,51.732805,52.155711,1.96139,2.540755,73.400295,1.059872,3.34425,4.404122,2.531051,1.400663,0.774187,0.518303,0.494193,1.881289,11.303451,0.35913,0.000506,15.080602,3.339056,55.487037,0.343246,0.273767,5.285635,15.603338,10.455134,15.157233,1.630172,1.944841,12.455524,19.923318,-0.477213,0.055438,110.983561,1.975089,1.510116,3.485205,0.099673,-0.251897,0.069196,-0.182701,1.108675,53.31133,58.616085,49.864188,50.698027,54.782246,54.095346,46.21649,48.683612,58.62873,49.922104,55.232423,46.637329,53.403895,45.735205,50.048306,48.783763,58.391502,0.120637
std,244.280998,4.52642,10.014563,5.834846,24.408911,29.945458,9.727946,2.558691,5.347766,9.400854,0.908936,2.344366,14.256901,2.241481,4.311024,10.937358,9.33086,1.780409,2.213891,17.820246,0.866337,2.051091,2.739996,2.146504,0.938329,0.45943,0.50972,0.335639,0.774093,7.180483,2.004551,0.022488,5.978524,4.583585,8.898369,0.219444,0.172721,4.207345,6.87795,4.919959,9.831043,0.983099,1.916039,5.474222,6.122317,12.056732,12.171109,6.004929,2.514623,1.327319,3.543101,0.090776,3.500047,1.684406,4.342403,1.772538,9.512155,8.898779,12.892304,14.821097,11.71406,13.666745,18.13025,16.247526,16.440842,13.912651,17.16008,17.295909,11.253512,10.266001,13.631166,13.059925,12.877008,0.325747
min,1.0,20.0,0.0,2000.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-45.2,-3.66806,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-300.0,-303.0,44.0,-3.0,-0.5,-1.9,-2.4,-28.1,-14.5,-42.6,-2.0,25.0,31.0,7.0,7.0,3.0,0.0,0.0,7.0,2.0,0.0,0.0,0.0,16.0,15.0,8.0,13.0,15.0,0.0
25%,225.0,23.0,1.91,2017.0,31.0,3.0,16.099893,2.097307,4.647933,41.979658,0.25,0.905495,27.272727,1.32103,2.567419,47.252747,48.614242,0.666667,0.921182,68.896396,0.450773,1.891155,2.461538,1.0,0.675676,0.444444,0.180082,0.238273,1.4,5.622466,0.0,0.0,11.5,0.11798,52.404126,0.175244,0.169358,2.3,10.8,6.8,7.8,1.1,0.7,9.697747,15.5,-5.1,-4.0,107.0,0.2,0.4,0.7,0.059667,-2.1,-0.8,-2.3,0.0,45.0,53.0,40.0,40.0,47.0,46.0,35.0,37.0,48.0,42.0,44.0,37.0,46.0,39.0,40.0,40.0,50.0,0.0
50%,426.0,26.0,4.375,2021.0,60.0,22.0,24.911837,3.664414,7.928571,45.541394,0.888194,2.58435,33.953502,2.579329,4.876524,51.586624,52.393419,1.402597,1.890774,77.272727,0.796699,2.961538,3.8375,1.811655,1.201351,0.711325,0.363636,0.433333,1.923077,9.897428,0.0,0.0,14.7,1.49403,56.17999,0.352099,0.249483,3.9,14.1,9.2,12.3,1.5,1.4,11.982173,19.1,0.4,0.9,112.0,1.2,1.2,2.5,0.102352,-0.3,0.0,-0.2,0.4,54.0,59.0,48.0,50.0,55.0,55.0,45.0,46.0,58.0,51.0,55.0,47.0,53.0,45.0,50.0,47.0,57.0,0.0
75%,613.0,29.0,12.6,2024.0,73.0,63.0,31.82489,5.839451,12.604741,50.0,1.632218,4.475307,38.113093,4.475995,8.602206,56.896552,56.547248,2.683448,3.425891,83.333333,1.394657,4.33701,5.687662,3.5,1.971109,1.029412,0.669831,0.701493,2.4,16.026764,0.0,0.0,18.6,4.94191,59.73945,0.493231,0.347456,7.3,19.5,13.2,20.5,2.0,2.6,14.585411,23.8,5.2,5.3,115.0,2.9,2.3,5.2,0.147151,1.6,0.9,2.0,1.6,60.0,65.0,60.0,61.0,63.0,63.0,57.0,59.0,69.0,60.0,66.0,58.0,61.0,52.0,58.0,56.0,66.0,0.0
max,852.0,43.0,59.32,2025.0,82.0,82.0,43.7,12.225,27.816667,100.0,5.349206,13.179487,100.0,12.101266,23.35,100.0,150.0,10.230769,13.135135,100.0,5.432099,11.410959,15.987179,11.738462,5.728395,3.013158,3.826087,2.147541,5.0,36.128205,42.0,1.0,65.6,25.769075,150.0,1.0,2.666667,53.9,100.0,56.4,65.0,24.2,44.1,100.0,72.5,93.3,97.4,127.0,14.8,9.1,20.2,0.8,37.9,32.7,48.6,11.8,82.0,86.0,90.0,100.0,92.0,96.0,100.0,100.0,100.0,85.0,100.0,97.0,100.0,88.0,100.0,100.0,98.0,1.0


In [42]:
cleaned_df.to_csv("nba_stats_2000-2025.csv", index=False)