# Performing EDA on NBA stats and awards data

### Importing packages

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)


### Importing original data

In [118]:
# Awards
mvp_df = pd.read_csv("MVP_Data/mvp_awards.csv", index_col=[0])
roy_df = pd.read_csv("MVP_Data/roy_awards.csv", index_col=[0])
dpoy_df = pd.read_csv("MVP_Data/dpoy_awards.csv", index_col=[0])
smoy_df = pd.read_csv("MVP_Data/smoy_awards.csv", index_col=[0])

# Stats
ppg_df = pd.read_csv("NBA_Stats/ppg_data.csv", index_col=[0])
advanced_stats_df = pd.read_csv("NBA_Stats/advanced_stats_data.csv", index_col=[0])
team_standings_df = pd.read_csv("NBA_Stats/team_standings.csv", index_col=[0])

In [27]:
df_list = [mvp_df, 
              roy_df,
              dpoy_df,
              smoy_df, 
              ppg_df, 
              advanced_stats_df,
              team_standings_df]

df_names = ["MVP",
            "ROY",
            "DPOY",
            "SMOY",
            "PPG",
            "ADVANCED_STATS",
            "TEAM_STANDINGS"]

for i in range(len(df_list)):
    print(f"{df_names[i]} dataframe contains: {df_list[i].shape[0]} rows & {df_list[i].shape[1]} columns\n")


MVP dataframe contains: 339 rows & 21 columns

ROY dataframe contains: 183 rows & 21 columns

DPOY dataframe contains: 400 rows & 24 columns

SMOY dataframe contains: 354 rows & 21 columns

PPG dataframe contains: 14423 rows & 31 columns

ADVANCED_STATS dataframe contains: 14423 rows & 28 columns

TEAM_STANDINGS dataframe contains: 715 rows & 29 columns



In [5]:
for df in df_list:
    print(df.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 339 entries, 0 to 338
Data columns (total 21 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Rank     339 non-null    object 
 1   Player   339 non-null    object 
 2   Age      339 non-null    int64  
 3   Tm       339 non-null    object 
 4   First    339 non-null    float64
 5   Pts Won  339 non-null    float64
 6   Pts Max  339 non-null    int64  
 7   Share    339 non-null    float64
 8   G        339 non-null    int64  
 9   MP       339 non-null    float64
 10  PTS      339 non-null    float64
 11  TRB      339 non-null    float64
 12  AST      339 non-null    float64
 13  STL      339 non-null    float64
 14  BLK      339 non-null    float64
 15  FG%      339 non-null    float64
 16  3P%      330 non-null    float64
 17  FT%      339 non-null    float64
 18  WS       339 non-null    float64
 19  WS/48    339 non-null    float64
 20  Year     339 non-null    int64  
dtypes: float64(14), 

In [6]:
for df in df_list:
    print(round(df.isna().sum()/len(df)*100, 2))

Rank       0.00
Player     0.00
Age        0.00
Tm         0.00
First      0.00
Pts Won    0.00
Pts Max    0.00
Share      0.00
G          0.00
MP         0.00
PTS        0.00
TRB        0.00
AST        0.00
STL        0.00
BLK        0.00
FG%        0.00
3P%        2.65
FT%        0.00
WS         0.00
WS/48      0.00
Year       0.00
dtype: float64
Rank       0.00
Player     0.00
Age        0.00
Tm         0.00
First      0.00
Pts Won    0.00
Pts Max    0.00
Share      0.00
G          0.00
MP         0.00
PTS        0.00
TRB        0.00
AST        0.00
STL        0.00
BLK        0.00
FG%        0.00
3P%        3.28
FT%        0.00
WS         0.00
WS/48      0.00
Year       0.00
dtype: float64
Rank       0.00
Player     0.00
Age        0.00
Tm         0.00
First      0.00
Pts Won    0.00
Pts Max    0.00
Share      0.00
G          0.00
MP         0.00
PTS        0.00
TRB        0.00
AST        0.00
STL        0.00
BLK        0.00
FG%        0.00
3P%        7.75
FT%        0.00
WS        

### Performing data pre-processing

#### 1. Merging players dataframe with the advanced stats table

In [56]:
players_df = ppg_df.merge(advanced_stats_df, 
                                     how='left', 
                                     left_on=['Player', 'Year', 'Tm'], 
                                     right_on=['Player', 'Year', 'Tm'],
                                     suffixes=('','_remove'))

players_df.drop([i for i in players_df.columns if "remove" in i], axis=1, inplace=True)

players_df = players_df[['Player', 'Pos', 'Age', 'Tm', 'G', 'GS', 'MP', 'FG', 'FGA', 'FG%',
       '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT', 'FTA', 'FT%',
       'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS',
       'PER', 'TS%', '3PAr', 'FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%',
       'BLK%', 'TOV%', 'USG%', 'OWS', 'DWS', 'WS', 'WS/48', 'OBPM', 'DBPM',
       'BPM', 'VORP', 'Year']]

#### 2. Merging players + advanced stats dataframe with the team standings table

In [8]:
# Creating functions to rename NBA teams that have updated their team names to be consistent across both tables
def rename_teams_short(col):
    if col == 'CHO' or col == 'CHH':
        return "CHA"
    if col == "NOH" or col == "NOK":
        return "NOP"
    if col == 'TOT':
        return "DEN"
    else:
        return col

def rename_teams_long(col):
    if col == "Charlotte Bobcats":
        return "Charlotte Hornets"
    if col == "New Orleans Hornets" or col == "New Orleans/Oklahoma City Hornets":
        return "New Orleans Pelicans"
    else:
        return col

In [57]:
team_standings_df['Team'] = team_standings_df['Team'].str.replace('*','')

players_df['Tm'] = players_df['Tm'].apply(rename_teams_short)
team_standings_df['Team'] = team_standings_df['Team'].apply(rename_teams_long)


team_long = team_standings_df.Team.unique()
team_long.sort()
team_long = list(team_long)

team_short = players_df['Tm'].unique()
team_short.sort()
team_short = list(team_short)

team_dict = dict(zip(team_short, team_long))

players_df['Team_Long'] = players_df['Tm'].map(team_dict)

  team_standings_df['Team'] = team_standings_df['Team'].str.replace('*','')


In [58]:
team_standings_filtered_df = team_standings_df[['Team', 'W', 'L', 'MOV', 'SOS', 'SRS', 'ORtg',
       'DRtg', 'NRtg', 'Pace', 'FTr', '3PAr', 'TS%', 'eFG%', 'TOV%', 'ORB%',
       'FT/FGA', 'eFG%.1', 'TOV%.1', 'DRB%', 'FT/FGA.1', 'Year']]


team_standings_filtered_df = team_standings_filtered_df.add_prefix('Team_')
team_standings_filtered_df.rename(columns={"Team_Team":"Team", "Team_Year":"Year"}, inplace=True)


In [59]:
team_standings_filtered_df

Unnamed: 0,Team,Team_W,Team_L,Team_MOV,Team_SOS,Team_SRS,Team_ORtg,Team_DRtg,Team_NRtg,Team_Pace,Team_FTr,Team_3PAr,Team_TS%,Team_eFG%,Team_TOV%,Team_ORB%,Team_FT/FGA,Team_eFG%.1,Team_TOV%.1,Team_DRB%,Team_FT/FGA.1,Year
0,Los Angeles Lakers,67,15,8.55,-0.14,8.41,107.3,98.2,9.1,93.3,0.346,0.153,0.525,0.484,12.7,30.6,0.241,0.443,13.4,73.1,0.222,2000
1,Portland Trail Blazers,59,23,6.40,-0.04,6.36,107.9,100.8,7.1,89.9,0.316,0.175,0.546,0.501,14.5,30.3,0.240,0.461,13.8,72.4,0.217,2000
2,San Antonio Spurs,53,29,5.94,-0.02,5.92,105.0,98.6,6.4,90.8,0.346,0.138,0.535,0.488,14.3,27.8,0.258,0.451,13.5,73.0,0.188,2000
3,Phoenix Suns,53,29,5.22,0.02,5.24,104.6,99.0,5.6,94.0,0.286,0.184,0.532,0.491,15.2,29.3,0.217,0.454,15.7,70.5,0.245,2000
4,Utah Jazz,55,27,4.46,0.05,4.52,107.3,102.3,5.0,89.6,0.337,0.134,0.540,0.490,14.3,29.5,0.260,0.477,15.0,73.2,0.256,2000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
710,Portland Trail Blazers,33,49,-4.01,0.05,-3.96,114.8,118.8,-4.0,98.6,0.289,0.413,0.589,0.549,13.1,22.4,0.230,0.563,12.1,74.9,0.217,2023
711,Charlotte Hornets,27,55,-6.24,0.35,-5.89,109.2,115.3,-6.1,100.8,0.261,0.360,0.550,0.516,12.3,23.8,0.195,0.544,12.5,75.5,0.211,2023
712,Houston Rockets,22,60,-7.85,0.24,-7.62,111.4,119.3,-7.9,99.0,0.285,0.359,0.554,0.516,14.0,30.2,0.215,0.564,11.8,75.8,0.218,2023
713,Detroit Pistons,17,65,-8.22,0.49,-7.73,110.7,118.9,-8.2,99.0,0.295,0.372,0.561,0.520,13.3,24.9,0.227,0.557,11.9,74.0,0.231,2023


In [60]:
players_df = players_df.merge(team_standings_filtered_df, 
                 how='left',
                 left_on=['Team_Long', 'Year'],
                 right_on=['Team', 'Year'])

# Some players have "*" so we want to remove it so we can merge the dfs
players_df['Player'] = players_df['Player'].str.replace('*','')

players_df['Total_Games'] = players_df['Team_W'] + players_df['Team_L']
players_df['Team_Win_PCT'] = round(players_df['Team_W'] / players_df['Total_Games'], 2)

players_df = players_df[['Player', 'Pos', 'Age', 'Team', 'Tm', 'G', 'GS', 'MP', 'FG', 'FGA', 'FG%', '3P',
       '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT', 'FTA', 'FT%', 'ORB',
       'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'PER', 'TS%',
       '3PAr', 'FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'TOV%',
       'USG%', 'OWS', 'DWS', 'WS', 'WS/48', 'OBPM', 'DBPM', 'BPM', 'VORP',
       'Total_Games', 'Team_W', 'Team_L', 'Team_Win_PCT', 'Team_MOV', 'Team_SOS',
       'Team_SRS', 'Team_ORtg', 'Team_DRtg', 'Team_NRtg', 'Team_Pace',
       'Team_FTr', 'Team_3PAr', 'Team_TS%', 'Team_eFG%', 'Team_TOV%',
       'Team_ORB%', 'Team_FT/FGA', 'Team_eFG%.1', 'Team_TOV%.1', 'Team_DRB%',
       'Team_FT/FGA.1', 'Year']]


  players_df['Player'] = players_df['Player'].str.replace('*','')


### 3. Cleaning NBA award tables

In [119]:
# Removing 'T' from the rank column
mvp_df['Rank'] = mvp_df['Rank'].str.split('T').apply(lambda x:x[0])
dpoy_df['Rank'] = dpoy_df['Rank'].str.split('T').apply(lambda x:x[0])
roy_df['Rank'] = roy_df['Rank'].str.split('T').apply(lambda x:x[0])
smoy_df['Rank'] = smoy_df['Rank'].str.split('T').apply(lambda x:x[0])

mvp_df['Rank'] = mvp_df['Rank'].astype('Int32')
dpoy_df['Rank'] = dpoy_df['Rank'].astype('Int32')
roy_df['Rank'] = roy_df['Rank'].astype('Int32')
smoy_df['Rank'] = smoy_df['Rank'].astype('Int32')

#### Finding the minimum criterias to be considered an award candidate
- By finding the minimum criterias, we can reduce the dataset to only players that can be considered a potential award candidate which should help with training the model and balance the data by removing majority of players / stats that wouldn't qualify for the award since only a handful of players are nominated

#### Minimum Criteria for MVP winner:
1. Lowest Games played: 62 games 
2. Lowest Minutes Played: 30 minutes
3. Lowest PPG: 15.5 Pts

#### Minimum Criteria for top 5 MVP candidates:
1. Lowest Games played: 51 games
2. Lowest Minutes Played: 30 minutes
3. Lowest PPG: 12.6 Pts

Note: Looking at stats for each MVP winner and find the minimum MP, Games played, and Pts per game. However, we will also take a look at the minimum stats to qualify as a top 5 MVP candidate since those are the candidates more likely to win

In [62]:
round(mvp_df[mvp_df['Rank'] == 1].describe(),2)

Unnamed: 0,Rank,Age,First,Pts Won,Pts Max,Share,G,MP,PTS,TRB,AST,STL,BLK,FG%,3P%,FT%,WS,WS/48,Year
count,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0
mean,1.0,26.5,90.71,1074.5,1174.17,0.92,76.04,36.54,27.07,8.69,6.66,1.38,1.04,0.51,0.33,0.81,15.5,0.27,2011.5
std,0.0,2.06,23.09,131.8,113.4,0.07,5.98,2.91,4.12,3.66,2.29,0.52,0.84,0.05,0.1,0.09,2.81,0.04,7.07
min,1.0,22.0,57.0,875.0,1000.0,0.74,62.0,30.4,15.5,3.3,3.4,0.5,0.1,0.42,0.0,0.52,10.9,0.19,2000.0
25%,1.0,25.0,72.0,960.0,1010.0,0.88,72.0,34.52,24.9,5.4,4.9,1.0,0.48,0.49,0.32,0.77,13.1,0.24,2005.75
50%,1.0,26.5,85.5,1087.0,1210.0,0.93,78.5,36.8,27.4,7.95,6.45,1.4,0.8,0.5,0.34,0.83,15.5,0.28,2011.5
75%,1.0,28.0,113.75,1199.75,1252.5,0.98,81.0,38.92,29.8,12.55,8.0,1.72,1.2,0.55,0.39,0.87,18.0,0.3,2017.25
max,1.0,31.0,131.0,1310.0,1310.0,1.0,82.0,42.0,33.1,13.9,11.5,2.5,3.0,0.58,0.45,0.92,20.3,0.32,2023.0


In [63]:
round(mvp_df[mvp_df['Rank'] <= 5].describe(),2)

Unnamed: 0,Rank,Age,First,Pts Won,Pts Max,Share,G,MP,PTS,TRB,AST,STL,BLK,FG%,3P%,FT%,WS,WS/48,Year
count,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0,118.0,120.0,120.0,120.0,120.0
mean,3.0,26.74,23.43,572.79,1174.17,0.49,74.39,36.56,25.95,8.18,5.8,1.38,1.03,0.5,0.31,0.79,13.18,0.23,2011.5
std,1.42,3.28,36.68,338.87,111.48,0.29,6.87,2.67,4.4,3.07,2.59,0.49,0.84,0.05,0.12,0.1,2.87,0.04,6.95
min,1.0,20.0,0.0,46.0,1000.0,0.05,51.0,30.4,12.6,2.7,1.3,0.5,0.1,0.39,0.0,0.46,6.2,0.13,2000.0
25%,2.0,24.0,0.0,306.25,1010.0,0.26,69.0,34.48,23.28,5.5,3.7,1.0,0.4,0.46,0.3,0.74,11.2,0.2,2005.75
50%,3.0,27.0,3.5,521.0,1210.0,0.43,76.0,36.45,26.8,7.85,5.5,1.4,0.75,0.49,0.34,0.81,13.05,0.23,2011.5
75%,4.0,29.0,25.25,878.25,1252.5,0.73,80.0,38.6,28.7,10.72,7.5,1.7,1.42,0.52,0.38,0.87,15.2,0.26,2017.25
max,5.0,36.0,131.0,1310.0,1310.0,1.0,82.0,42.5,36.1,14.2,11.6,2.8,3.7,0.63,0.46,0.93,20.3,0.32,2023.0


### There are instances where a player was traded to another team during the NBA season which results in their name appearing multiple instances. We can figure out how to fix this

In [64]:
ppg_df['Player'].nunique()

2338

### Creating a dataframe for MVP data

In [65]:
mvp_filter_gp = players_df['G'] >= 50 
mvp_filter_mp = players_df['MP'] >= 30
mvp_filter_ppg = players_df['PTS'] >= 12

mvp_players_df = players_df[mvp_filter_gp & 
                                mvp_filter_mp & 
                                mvp_filter_ppg]

In [67]:
mvp_players_df['Player'].nunique()

446

In [66]:
# Reducing columns that's unnecessary
mvp_subset = mvp_df[['Rank','Player', 'Tm','Share', 'Year']].copy()

mvp_players_df = mvp_players_df.merge(mvp_subset, 
                 how='left',
                 left_on=['Player', 'Year', 'Tm'],
                 right_on=['Player', 'Year', 'Tm'])

mvp_players_df['Share'] = mvp_players_df['Share'].fillna(0)
mvp_players_df['Rank'] = mvp_players_df['Rank'].fillna(0)

mvp_players_df

Unnamed: 0,Player,Pos,Age,Team,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,PER,TS%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,Total_Games,Team_W,Team_L,Team_Win_PCT,Team_MOV,Team_SOS,Team_SRS,Team_ORtg,Team_DRtg,Team_NRtg,Team_Pace,Team_FTr,Team_3PAr,Team_TS%,Team_eFG%,Team_TOV%,Team_ORB%,Team_FT/FGA,Team_eFG%.1,Team_TOV%.1,Team_DRB%,Team_FT/FGA.1,Year,Rank,Share
0,Shareef Abdur-Rahim,SF,23,Vancouver Grizzlies,VAN,82,82,39.3,7.2,15.6,0.465,0.4,1.2,0.302,6.9,14.4,0.478,0.477,5.4,6.7,0.809,2.7,7.4,10.1,3.3,1.1,1.1,3.0,3.0,20.3,20.2,0.547,0.075,0.431,8.0,22.7,15.3,15.5,1.5,1.9,14.1,25.0,6.2,2.6,8.8,0.132,2.6,-0.4,2.2,3.4,82,22,60,0.27,-5.62,0.52,-5.10,102.3,108.5,-6.2,91.0,0.320,0.139,0.524,0.474,15.8,30.1,0.247,0.502,14.1,70.6,0.231,2000,0,0.0
1,Ray Allen,SG,24,Milwaukee Bucks,MIL,82,82,37.4,7.8,17.2,0.455,2.1,5.0,0.423,5.7,12.2,0.468,0.516,4.3,4.9,0.887,1.0,3.4,4.4,3.8,1.3,0.2,2.2,2.3,22.1,20.6,0.570,0.288,0.282,3.2,10.5,6.8,17.0,1.9,0.5,10.3,25.6,9.0,1.0,10.1,0.157,4.7,-1.1,3.6,4.3,82,42,40,0.51,0.22,-0.28,-0.06,108.2,107.9,0.3,92.7,0.290,0.157,0.539,0.494,13.8,30.0,0.228,0.496,14.6,69.6,0.254,2000,0,0.0
2,Derek Anderson,SG,25,Los Angeles Clippers,LAC,64,58,34.4,5.9,13.4,0.438,0.9,2.8,0.309,5.0,10.7,0.472,0.470,4.2,4.8,0.877,1.3,2.8,4.0,3.4,1.4,0.2,2.6,2.3,16.9,16.9,0.542,0.207,0.359,3.9,9.3,6.5,17.9,2.1,0.3,14.4,23.4,3.1,0.3,3.3,0.073,1.2,-1.2,-0.1,1.1,82,15,67,0.18,-11.52,0.80,-10.73,97.8,110.1,-12.3,93.9,0.270,0.188,0.499,0.458,14.9,26.1,0.202,0.507,13.1,69.6,0.220,2000,0,0.0
3,Kenny Anderson,PG,29,Boston Celtics,BOS,82,82,31.6,5.3,12.0,0.440,1.0,2.7,0.386,4.3,9.3,0.456,0.483,2.4,3.1,0.775,0.7,2.1,2.7,5.1,1.7,0.1,1.6,2.8,14.0,17.4,0.524,0.223,0.257,2.3,7.9,4.9,26.7,2.7,0.2,10.6,20.5,5.4,1.9,7.3,0.136,1.7,0.1,1.8,2.5,82,35,47,0.43,-0.76,-0.24,-1.00,104.8,105.6,-0.8,94.6,0.316,0.183,0.520,0.474,13.8,30.4,0.236,0.498,15.8,73.8,0.306,2000,0,0.0
4,Shandon Anderson,SF,26,Houston Rockets,HOU,82,82,32.9,4.5,9.5,0.473,1.0,2.7,0.351,3.5,6.7,0.523,0.524,2.4,3.1,0.767,1.1,3.6,4.7,2.9,1.2,0.4,2.4,2.2,12.3,13.8,0.567,0.289,0.325,3.9,11.9,8.0,14.3,1.8,0.8,17.9,17.6,3.0,1.9,4.9,0.087,-0.3,-0.2,-0.5,1.1,82,34,48,0.41,-0.87,0.30,-0.57,104.8,105.7,-0.9,94.2,0.322,0.244,0.536,0.494,15.8,29.1,0.236,0.481,12.5,71.6,0.201,2000,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1894,Nikola VuÄeviÄ‡,C,32,Chicago Bulls,CHI,82,82,33.5,7.3,14.0,0.520,1.5,4.2,0.349,5.8,9.8,0.594,0.573,1.6,1.9,0.835,1.9,9.1,11.0,3.2,0.7,0.7,1.7,2.2,17.6,19.1,0.594,0.302,0.138,6.7,30.2,18.6,14.6,1.1,2.0,10.2,21.9,3.9,4.4,8.3,0.145,1.9,0.7,2.7,3.2,82,40,42,0.49,1.29,0.07,1.37,113.5,112.2,1.3,98.5,0.251,0.333,0.587,0.550,12.2,20.1,0.203,0.544,13.5,77.8,0.197,2023,0,0.0
1895,Franz Wagner,SF,21,Orlando Magic,ORL,80,80,32.6,6.8,14.0,0.485,1.6,4.5,0.361,5.2,9.5,0.544,0.543,3.4,4.0,0.842,0.9,3.2,4.1,3.5,1.0,0.2,2.1,2.3,18.6,15.9,0.589,0.322,0.288,3.0,11.2,7.1,17.2,1.4,0.6,11.7,23.5,3.1,2.3,5.4,0.099,0.4,-0.5,-0.1,1.3,82,34,48,0.41,-2.56,0.17,-2.39,111.6,114.2,-2.6,99.3,0.290,0.361,0.573,0.532,13.4,23.8,0.227,0.550,13.1,77.7,0.211,2023,0,0.0
1896,P.J. Washington,PF,24,Charlotte Hornets,CHA,73,73,32.6,5.9,13.4,0.444,2.0,5.9,0.348,3.9,7.5,0.518,0.520,1.7,2.4,0.730,1.0,3.9,4.9,2.4,0.9,1.1,1.5,2.6,15.7,12.9,0.542,0.438,0.178,3.2,13.0,8.0,11.0,1.3,2.9,9.4,20.6,0.4,2.2,2.6,0.052,-1.0,-0.3,-1.3,0.4,82,27,55,0.33,-6.24,0.35,-5.89,109.2,115.3,-6.1,100.8,0.261,0.360,0.550,0.516,12.3,23.8,0.195,0.544,12.5,75.5,0.211,2023,0,0.0
1897,Jalen Williams,SG,21,Oklahoma City Thunder,OKC,75,62,30.3,5.5,10.6,0.521,1.0,2.7,0.356,4.5,7.9,0.579,0.567,2.1,2.5,0.812,1.1,3.4,4.5,3.3,1.4,0.5,1.6,2.5,14.1,15.6,0.601,0.258,0.241,3.9,12.2,7.9,15.4,2.1,1.4,12.3,18.4,3.0,2.6,5.6,0.119,-0.4,0.7,0.3,1.3,82,40,42,0.49,1.09,-0.12,0.96,115.2,114.2,1.0,101.1,0.256,0.369,0.570,0.531,11.2,24.7,0.207,0.547,14.4,72.9,0.222,2023,0,0.0


### Creating dataframe for DPOY

#### Minimum Criteria for DPOY winner:
1. Lowest Games played: 56 games 
2. Lowest Minutes Played: 20 minutes
3. Lowest DWS: 3.60
4. Lowest DBPM: 1.10

#### Minimum Criteria for top 5 DPOY candidates:
1. Lowest Games played: 54 games
2. Lowest Minutes Played: 20 minutes
3. Lowest DWS: 2.70
4. Lowest DBPM: 0.30

Note: Looking at stats for each DPOY winner and find the minimum MP, Games played, and Pts per game. However, we will also take a look at the minimum stats to qualify as a top 5 DPOY candidate since those are the candidates more likely to win

In [96]:
round(dpoy_df[dpoy_df['Rank'] <= 5].describe(), 2)

Unnamed: 0,Rank,Age,First,Pts Won,Pts Max,Share,G,MP,PTS,TRB,AST,STL,BLK,FG%,3P%,FT%,WS,WS/48,DWS,DBPM,DRtg,Year
count,123.0,123.0,123.0,123.0,123.0,123.0,123.0,123.0,123.0,123.0,123.0,123.0,123.0,123.0,112.0,123.0,123.0,123.0,123.0,123.0,123.0,123.0
mean,3.03,27.13,21.59,159.5,517.63,0.3,74.02,34.21,16.15,8.98,3.27,1.27,1.67,0.51,0.25,0.7,9.75,0.18,4.79,2.16,100.08,2011.49
std,1.43,3.63,29.72,156.85,163.0,0.28,7.87,3.52,6.56,3.33,2.32,0.55,1.0,0.07,0.15,0.12,3.49,0.05,1.21,0.86,4.53,6.99
min,1.0,21.0,0.0,1.0,120.0,0.01,54.0,20.8,6.0,2.7,0.4,0.3,0.1,0.4,0.0,0.34,3.1,0.07,2.7,0.3,87.0,2000.0
25%,2.0,24.0,2.0,44.5,500.0,0.09,68.5,32.0,10.7,6.5,1.4,0.8,0.8,0.46,0.12,0.64,7.2,0.15,3.9,1.45,97.0,2005.5
50%,3.0,27.0,8.0,102.0,595.0,0.19,77.0,34.3,15.3,8.9,2.7,1.2,1.5,0.49,0.31,0.73,9.4,0.18,4.6,2.2,100.0,2012.0
75%,4.0,30.0,28.0,229.5,620.0,0.45,81.0,36.7,20.6,11.75,4.25,1.7,2.45,0.55,0.36,0.79,11.95,0.22,5.45,2.75,103.0,2017.0
max,5.0,36.0,116.0,585.0,650.0,0.98,85.0,41.8,30.2,15.4,11.2,2.7,3.7,0.71,0.44,0.9,20.3,0.32,9.1,4.1,112.0,2023.0


In [102]:
dpoy_filter_gp = players_df['G'] >= 50 
dpoy_filter_mp = players_df['MP'] >= 20
dpoy_filter_DWS = players_df['DWS'] >= 2.70
dpoy_filter_DBPM = players_df['DBPM'] >= 0.30

dpoy_players_df = players_df[dpoy_filter_gp & 
                                dpoy_filter_mp & 
                                dpoy_filter_DBPM &
                                dpoy_filter_DWS]

In [103]:
dpoy_players_df

Unnamed: 0,Player,Pos,Age,Team,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,PER,TS%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,Total_Games,Team_W,Team_L,Team_Win_PCT,Team_MOV,Team_SOS,Team_SRS,Team_ORtg,Team_DRtg,Team_NRtg,Team_Pace,Team_FTr,Team_3PAr,Team_TS%,Team_eFG%,Team_TOV%,Team_ORB%,Team_FT/FGA,Team_eFG%.1,Team_TOV%.1,Team_DRB%,Team_FT/FGA.1,Year
15,Darrell Armstrong,PG,31,Orlando Magic,ORL,82,82,31.6,5.9,13.6,0.433,1.7,4.9,0.340,4.2,8.7,0.485,0.494,2.7,3.0,0.911,0.8,2.5,3.3,6.1,2.1,0.1,3.0,1.7,16.2,19.5,0.542,0.360,0.221,2.7,8.6,5.7,31.4,3.2,0.2,16.8,23.9,4.6,3.4,8.0,0.149,3.6,0.9,4.5,4.3,82,41,41,0.50,0.68,-0.26,0.43,102.4,101.7,0.7,97.3,0.305,0.124,0.516,0.473,15.4,31.6,0.224,0.476,15.9,69.9,0.226,2000
55,Shawn Bradley,C,27,Dallas Mavericks,DAL,77,54,24.7,3.5,7.2,0.479,0.0,0.1,0.200,3.4,7.1,0.482,0.480,1.5,1.9,0.765,2.1,4.4,6.5,0.8,0.9,2.5,1.0,3.4,8.4,17.0,0.521,0.009,0.268,9.1,18.9,14.0,4.7,1.9,6.8,10.7,16.1,2.3,2.7,5.1,0.128,-1.0,1.9,0.8,1.4,82,40,42,0.49,-0.57,0.29,-0.29,106.6,107.2,-0.6,94.9,0.248,0.188,0.532,0.490,12.6,25.5,0.200,0.488,14.2,66.1,0.210,2000
58,Terrell Brandon,PG,29,Minnesota Timberwolves,MIN,71,71,36.4,6.8,14.7,0.466,0.7,1.9,0.402,6.1,12.8,0.476,0.492,2.6,2.9,0.899,0.6,2.7,3.4,8.9,1.9,0.4,2.6,2.2,17.1,20.8,0.535,0.127,0.200,2.0,8.8,5.4,39.1,2.7,0.8,14.0,23.0,5.8,2.8,8.6,0.159,3.9,0.3,4.3,4.1,82,50,32,0.61,2.52,0.14,2.67,106.1,103.4,2.7,91.8,0.256,0.104,0.525,0.485,12.9,29.4,0.200,0.474,14.0,73.0,0.250,2000
64,P.J. Brown,PF,30,Miami Heat,MIA,80,80,28.8,4.0,8.4,0.480,0.0,0.0,0.000,4.0,8.4,0.481,0.480,1.5,2.0,0.755,2.7,4.8,7.5,1.8,0.8,0.8,1.3,3.3,9.6,15.3,0.516,0.001,0.237,11.4,18.4,15.1,10.3,1.5,1.9,11.9,17.1,3.3,3.6,6.9,0.143,-0.3,0.6,0.3,1.4,82,52,30,0.63,3.11,-0.36,2.75,104.5,101.0,3.5,89.7,0.283,0.186,0.533,0.495,14.5,28.2,0.208,0.453,13.3,72.9,0.229,2000
67,Kobe Bryant,SG,21,Los Angeles Lakers,LAL,66,62,38.2,8.4,17.9,0.468,0.7,2.2,0.319,7.7,15.7,0.489,0.488,5.0,6.1,0.821,1.6,4.7,6.3,4.9,1.6,0.9,2.8,3.3,22.5,21.7,0.546,0.122,0.341,4.6,12.9,8.8,22.4,2.2,1.7,11.8,26.8,6.1,4.5,10.6,0.202,3.5,1.6,5.1,4.5,82,67,15,0.82,8.55,-0.14,8.41,107.3,98.2,9.1,93.3,0.346,0.153,0.525,0.484,12.7,30.6,0.241,0.443,13.4,73.1,0.222,2000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14334,Jayson Tatum,SF,24,Boston Celtics,BOS,74,74,36.9,9.8,21.1,0.466,3.2,9.3,0.350,6.6,11.8,0.558,0.543,7.2,8.4,0.854,1.1,7.7,8.8,4.6,1.1,0.7,2.9,2.2,30.1,23.7,0.607,0.440,0.399,3.2,22.5,13.0,20.9,1.4,1.6,10.4,32.7,6.2,4.3,10.5,0.185,4.8,0.7,5.5,5.1,82,57,25,0.70,6.52,-0.15,6.38,118.0,111.5,6.5,98.5,0.243,0.480,0.600,0.566,12.0,22.1,0.197,0.528,11.3,78.5,0.180,2023
14362,Fred VanVleet,PG,28,Toronto Raptors,TOR,69,69,36.7,6.3,16.1,0.393,3.0,8.8,0.342,3.3,7.3,0.455,0.486,3.7,4.1,0.898,0.4,3.6,4.1,7.2,1.8,0.6,2.0,2.8,19.3,17.0,0.540,0.545,0.254,1.2,12.0,6.3,28.1,2.4,1.5,10.2,23.2,3.7,2.8,6.5,0.123,2.0,0.5,2.5,2.9,82,41,41,0.50,1.48,0.12,1.59,115.5,114.0,1.5,97.1,0.257,0.351,0.555,0.517,10.3,27.8,0.201,0.565,15.3,76.7,0.223,2023
14366,Nikola VuÄeviÄ‡,C,32,Chicago Bulls,CHI,82,82,33.5,7.3,14.0,0.520,1.5,4.2,0.349,5.8,9.8,0.594,0.573,1.6,1.9,0.835,1.9,9.1,11.0,3.2,0.7,0.7,1.7,2.2,17.6,19.1,0.594,0.302,0.138,6.7,30.2,18.6,14.6,1.1,2.0,10.2,21.9,3.9,4.4,8.3,0.145,1.9,0.7,2.7,3.2,82,40,42,0.49,1.29,0.07,1.37,113.5,112.2,1.3,98.5,0.251,0.333,0.587,0.550,12.2,20.1,0.203,0.544,13.5,77.8,0.197,2023
14390,Derrick White,SG,28,Boston Celtics,BOS,82,70,28.3,4.3,9.2,0.462,1.8,4.8,0.381,2.5,4.5,0.548,0.560,2.0,2.3,0.875,0.6,2.9,3.6,3.9,0.7,0.9,1.2,2.2,12.4,15.4,0.604,0.516,0.253,2.5,11.2,6.9,19.4,1.1,2.8,10.1,17.7,4.4,3.1,7.4,0.154,1.1,1.3,2.4,2.6,82,57,25,0.70,6.52,-0.15,6.38,118.0,111.5,6.5,98.5,0.243,0.480,0.600,0.566,12.0,22.1,0.197,0.528,11.3,78.5,0.180,2023


In [104]:
dpoy_subset = dpoy_df[['Rank','Player', 'Tm', 'Share', 'DRtg','Year']].copy()

dpoy_players_df = dpoy_players_df.merge(dpoy_subset,
                                  how='left',
                                  left_on=['Player', 'Year', 'Tm'],
                                  right_on=['Player', 'Year', 'Tm'])

dpoy_players_df['Share'] = dpoy_players_df['Share'].fillna(0)
dpoy_players_df['DRtg'] = dpoy_players_df['DRtg'].fillna(0)
dpoy_players_df['Rank'] = dpoy_players_df['Rank'].fillna(0)
dpoy_players_df

Unnamed: 0,Player,Pos,Age,Team,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,PER,TS%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,Total_Games,Team_W,Team_L,Team_Win_PCT,Team_MOV,Team_SOS,Team_SRS,Team_ORtg,Team_DRtg,Team_NRtg,Team_Pace,Team_FTr,Team_3PAr,Team_TS%,Team_eFG%,Team_TOV%,Team_ORB%,Team_FT/FGA,Team_eFG%.1,Team_TOV%.1,Team_DRB%,Team_FT/FGA.1,Year,Rank,Share,DRtg
0,Darrell Armstrong,PG,31,Orlando Magic,ORL,82,82,31.6,5.9,13.6,0.433,1.7,4.9,0.340,4.2,8.7,0.485,0.494,2.7,3.0,0.911,0.8,2.5,3.3,6.1,2.1,0.1,3.0,1.7,16.2,19.5,0.542,0.360,0.221,2.7,8.6,5.7,31.4,3.2,0.2,16.8,23.9,4.6,3.4,8.0,0.149,3.6,0.9,4.5,4.3,82,41,41,0.50,0.68,-0.26,0.43,102.4,101.7,0.7,97.3,0.305,0.124,0.516,0.473,15.4,31.6,0.224,0.476,15.9,69.9,0.226,2000,0,0.000,0.0
1,Shawn Bradley,C,27,Dallas Mavericks,DAL,77,54,24.7,3.5,7.2,0.479,0.0,0.1,0.200,3.4,7.1,0.482,0.480,1.5,1.9,0.765,2.1,4.4,6.5,0.8,0.9,2.5,1.0,3.4,8.4,17.0,0.521,0.009,0.268,9.1,18.9,14.0,4.7,1.9,6.8,10.7,16.1,2.3,2.7,5.1,0.128,-1.0,1.9,0.8,1.4,82,40,42,0.49,-0.57,0.29,-0.29,106.6,107.2,-0.6,94.9,0.248,0.188,0.532,0.490,12.6,25.5,0.200,0.488,14.2,66.1,0.210,2000,0,0.000,0.0
2,Terrell Brandon,PG,29,Minnesota Timberwolves,MIN,71,71,36.4,6.8,14.7,0.466,0.7,1.9,0.402,6.1,12.8,0.476,0.492,2.6,2.9,0.899,0.6,2.7,3.4,8.9,1.9,0.4,2.6,2.2,17.1,20.8,0.535,0.127,0.200,2.0,8.8,5.4,39.1,2.7,0.8,14.0,23.0,5.8,2.8,8.6,0.159,3.9,0.3,4.3,4.1,82,50,32,0.61,2.52,0.14,2.67,106.1,103.4,2.7,91.8,0.256,0.104,0.525,0.485,12.9,29.4,0.200,0.474,14.0,73.0,0.250,2000,0,0.000,0.0
3,P.J. Brown,PF,30,Miami Heat,MIA,80,80,28.8,4.0,8.4,0.480,0.0,0.0,0.000,4.0,8.4,0.481,0.480,1.5,2.0,0.755,2.7,4.8,7.5,1.8,0.8,0.8,1.3,3.3,9.6,15.3,0.516,0.001,0.237,11.4,18.4,15.1,10.3,1.5,1.9,11.9,17.1,3.3,3.6,6.9,0.143,-0.3,0.6,0.3,1.4,82,52,30,0.63,3.11,-0.36,2.75,104.5,101.0,3.5,89.7,0.283,0.186,0.533,0.495,14.5,28.2,0.208,0.453,13.3,72.9,0.229,2000,0,0.000,0.0
4,Kobe Bryant,SG,21,Los Angeles Lakers,LAL,66,62,38.2,8.4,17.9,0.468,0.7,2.2,0.319,7.7,15.7,0.489,0.488,5.0,6.1,0.821,1.6,4.7,6.3,4.9,1.6,0.9,2.8,3.3,22.5,21.7,0.546,0.122,0.341,4.6,12.9,8.8,22.4,2.2,1.7,11.8,26.8,6.1,4.5,10.6,0.202,3.5,1.6,5.1,4.5,82,67,15,0.82,8.55,-0.14,8.41,107.3,98.2,9.1,93.3,0.346,0.153,0.525,0.484,12.7,30.6,0.241,0.443,13.4,73.1,0.222,2000,5,0.033,98.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1046,Jayson Tatum,SF,24,Boston Celtics,BOS,74,74,36.9,9.8,21.1,0.466,3.2,9.3,0.350,6.6,11.8,0.558,0.543,7.2,8.4,0.854,1.1,7.7,8.8,4.6,1.1,0.7,2.9,2.2,30.1,23.7,0.607,0.440,0.399,3.2,22.5,13.0,20.9,1.4,1.6,10.4,32.7,6.2,4.3,10.5,0.185,4.8,0.7,5.5,5.1,82,57,25,0.70,6.52,-0.15,6.38,118.0,111.5,6.5,98.5,0.243,0.480,0.600,0.566,12.0,22.1,0.197,0.528,11.3,78.5,0.180,2023,0,0.000,0.0
1047,Fred VanVleet,PG,28,Toronto Raptors,TOR,69,69,36.7,6.3,16.1,0.393,3.0,8.8,0.342,3.3,7.3,0.455,0.486,3.7,4.1,0.898,0.4,3.6,4.1,7.2,1.8,0.6,2.0,2.8,19.3,17.0,0.540,0.545,0.254,1.2,12.0,6.3,28.1,2.4,1.5,10.2,23.2,3.7,2.8,6.5,0.123,2.0,0.5,2.5,2.9,82,41,41,0.50,1.48,0.12,1.59,115.5,114.0,1.5,97.1,0.257,0.351,0.555,0.517,10.3,27.8,0.201,0.565,15.3,76.7,0.223,2023,0,0.000,0.0
1048,Nikola VuÄeviÄ‡,C,32,Chicago Bulls,CHI,82,82,33.5,7.3,14.0,0.520,1.5,4.2,0.349,5.8,9.8,0.594,0.573,1.6,1.9,0.835,1.9,9.1,11.0,3.2,0.7,0.7,1.7,2.2,17.6,19.1,0.594,0.302,0.138,6.7,30.2,18.6,14.6,1.1,2.0,10.2,21.9,3.9,4.4,8.3,0.145,1.9,0.7,2.7,3.2,82,40,42,0.49,1.29,0.07,1.37,113.5,112.2,1.3,98.5,0.251,0.333,0.587,0.550,12.2,20.1,0.203,0.544,13.5,77.8,0.197,2023,0,0.000,0.0
1049,Derrick White,SG,28,Boston Celtics,BOS,82,70,28.3,4.3,9.2,0.462,1.8,4.8,0.381,2.5,4.5,0.548,0.560,2.0,2.3,0.875,0.6,2.9,3.6,3.9,0.7,0.9,1.2,2.2,12.4,15.4,0.604,0.516,0.253,2.5,11.2,6.9,19.4,1.1,2.8,10.1,17.7,4.4,3.1,7.4,0.154,1.1,1.3,2.4,2.6,82,57,25,0.70,6.52,-0.15,6.38,118.0,111.5,6.5,98.5,0.243,0.480,0.600,0.566,12.0,22.1,0.197,0.528,11.3,78.5,0.180,2023,0,0.000,0.0


In [109]:
dpoy_players_df = dpoy_players_df[['Year', 'Rank', 'Player', 'Pos', 'Age', 'Team', 'Tm', 'G', 'GS', 'MP', 
                 'ORB', 'DRB', 'TRB', 'STL', 'BLK', 'TOV', 'PF', 'PER', 'ORB%', 'DRB%', 'TRB%', 'STL%',
                'BLK%', 'TOV%', 'USG%', 'OWS', 'DWS', 'WS', 'WS/48', 'OBPM', 'DBPM',
                'DRtg','BPM', 'VORP', 'Total_Games', 'Team_W', 'Team_L', 'Team_Win_PCT',
                'Team_MOV', 'Team_SOS', 'Team_SRS', 'Team_ORtg', 'Team_DRtg',
                'Team_NRtg', 'Team_Pace',  'Team_TOV%', 'Team_ORB%',
                'Team_TOV%.1', 'Team_DRB%', 'Share']]

### Creating dataframe for SMOY

#### Minimum Criteria for SMOY winner:
1. Lowest Games played: 59 games 
2. Lowest Minutes Played: 21.80 minutes
3. Lowest PPG: 11.60


#### Minimum Criteria for top 5 DPOY candidates:
1. Lowest Games played: 34 games
2. Lowest Minutes Played: 20 minutes
3. Lowest PPG: 7.00

Note: Looking at stats for each DPOY winner and find the minimum MP, Games played, and Pts per game. However, we will also take a look at the minimum stats to qualify as a top 5 DPOY candidate since those are the candidates more likely to win

In [125]:
round(smoy_df[smoy_df['Rank'] <= 5].describe(),2)

Unnamed: 0,Rank,Age,First,Pts Won,Pts Max,Share,G,MP,PTS,TRB,AST,STL,BLK,FG%,3P%,FT%,WS,WS/48,Year
count,122.0,122.0,122.0,122.0,122.0,122.0,122.0,122.0,122.0,122.0,122.0,122.0,122.0,122.0,119.0,122.0,122.0,122.0,122.0
mean,3.02,27.52,22.31,169.52,517.58,0.31,73.73,27.57,14.0,4.18,2.74,0.89,0.36,0.46,0.34,0.8,5.54,0.13,2011.43
std,1.42,3.66,32.27,170.09,162.45,0.29,8.63,3.3,3.09,1.96,1.29,0.28,0.35,0.05,0.09,0.08,1.82,0.04,6.95
min,1.0,20.0,0.0,2.0,121.0,0.02,34.0,20.0,7.0,1.4,0.4,0.3,0.0,0.4,0.0,0.56,2.4,0.06,2000.0
25%,2.0,24.25,1.0,33.0,500.0,0.07,68.0,25.85,12.3,2.7,1.7,0.7,0.2,0.43,0.33,0.76,4.2,0.1,2005.25
50%,3.0,27.5,5.5,96.0,592.5,0.2,76.0,27.7,14.1,3.65,2.65,0.9,0.3,0.45,0.36,0.83,5.3,0.12,2011.5
75%,4.0,30.0,33.0,271.25,615.0,0.46,81.0,29.9,16.0,5.3,3.68,1.08,0.4,0.48,0.39,0.86,6.68,0.16,2017.0
max,5.0,36.0,123.0,615.0,650.0,0.99,82.0,33.7,22.6,10.4,6.1,1.8,2.2,0.62,0.53,0.92,11.1,0.26,2023.0


In [131]:
smoy_filter_gp = players_df['G'] >= 34 
smoy_filter_mp = players_df['MP'] >= 20
smoy_filter_ppg = players_df['PTS'] >= 11


smoy_players_df = players_df[smoy_filter_gp & 
                                smoy_filter_mp & 
                                smoy_filter_ppg]

smoy_players_df['GB'] = smoy_players_df['G'] - smoy_players_df['GS']
smoy_players_df = smoy_players_df[smoy_players_df['GB'] > smoy_players_df['GS']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  smoy_players_df['GB'] = smoy_players_df['G'] - smoy_players_df['GS']


In [133]:
smoy_subset = smoy_df[['Rank','Player', 'Tm', 'Share', 'Year']].copy()

smoy_players_df = smoy_players_df.merge(smoy_subset,
                                  how='left',
                                  left_on=['Player', 'Year', 'Tm'],
                                  right_on=['Player', 'Year', 'Tm'])

smoy_players_df['Share'] = smoy_players_df['Share'].fillna(0)
smoy_players_df['Rank'] = smoy_players_df['Rank'].fillna(0)
smoy_players_df

Unnamed: 0,Player,Pos,Age,Team,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,PER,TS%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,Total_Games,Team_W,Team_L,Team_Win_PCT,Team_MOV,Team_SOS,Team_SRS,Team_ORtg,Team_DRtg,Team_NRtg,Team_Pace,Team_FTr,Team_3PAr,Team_TS%,Team_eFG%,Team_TOV%,Team_ORB%,Team_FT/FGA,Team_eFG%.1,Team_TOV%.1,Team_DRB%,Team_FT/FGA.1,Year,GB,Rank,Share
0,Cedric Ceballos,SF,30,Dallas Mavericks,DAL,69,25,29.9,6.5,14.5,0.446,0.6,1.9,0.328,5.8,12.6,0.464,0.468,3.0,3.6,0.843,2.5,4.2,6.7,1.3,0.8,0.3,1.8,2.4,16.6,18.1,0.516,0.134,0.248,9.0,15.0,12.0,7.4,1.4,0.8,10.1,26.4,2.7,1.3,4.0,0.094,1.2,-2.2,-1.0,0.5,82,40,42,0.49,-0.57,0.29,-0.29,106.6,107.2,-0.6,94.9,0.248,0.188,0.532,0.490,12.6,25.5,0.200,0.488,14.2,66.1,0.210,2000,44,3,0.025
1,Chris Gatling,PF,32,Denver Nuggets,DEN,85,0,21.3,4.3,9.4,0.455,0.2,0.8,0.257,4.1,8.6,0.474,0.466,3.1,4.4,0.713,1.8,4.1,5.9,0.8,1.0,0.3,2.0,2.9,11.9,17.3,0.525,0.087,0.465,9.2,21.0,15.1,6.7,2.3,0.9,14.9,26.7,1.3,2.6,4.0,0.105,-0.7,-0.2,-1.0,0.5,82,35,47,0.43,-2.12,0.36,-1.76,103.5,105.8,-2.3,94.7,0.306,0.202,0.517,0.476,14.0,29.2,0.222,0.479,13.0,71.6,0.235,2000,85,,0.000
2,Chris Gatling,PF,32,Orlando Magic,ORL,45,0,23.1,4.7,10.3,0.455,0.2,0.5,0.304,4.5,9.8,0.462,0.462,3.8,5.4,0.698,2.0,4.6,6.6,0.9,1.1,0.2,2.3,3.2,13.3,17.4,0.525,0.050,0.530,9.5,21.5,15.5,6.4,2.3,0.7,15.6,27.2,0.8,1.7,2.5,0.115,-0.8,-0.1,-0.9,0.3,82,41,41,0.50,0.68,-0.26,0.43,102.4,101.7,0.7,97.3,0.305,0.124,0.516,0.473,15.4,31.6,0.224,0.476,15.9,69.9,0.226,2000,45,,0.000
3,Larry Hughes,SG,21,Denver Nuggets,DEN,82,37,28.3,5.6,14.0,0.400,0.4,1.5,0.232,5.2,12.5,0.421,0.413,3.4,4.6,0.740,1.4,2.9,4.3,2.5,1.4,0.3,2.4,2.3,15.0,14.7,0.467,0.109,0.329,5.0,11.4,8.1,15.8,2.5,0.9,12.9,27.8,-0.5,2.2,1.7,0.036,-0.3,-0.6,-0.9,0.7,82,35,47,0.43,-2.12,0.36,-1.76,103.5,105.8,-2.3,94.7,0.306,0.202,0.517,0.476,14.0,29.2,0.222,0.479,13.0,71.6,0.235,2000,45,,0.000
4,Voshon Lenard,SG,26,Miami Heat,MIA,53,13,27.1,4.3,10.6,0.407,1.7,4.3,0.390,2.6,6.3,0.419,0.487,1.6,2.0,0.792,0.7,2.2,2.9,2.6,0.8,0.3,1.5,2.4,11.9,13.9,0.518,0.407,0.189,3.1,8.9,6.2,16.1,1.5,0.8,11.7,22.3,1.5,1.5,3.0,0.100,0.5,-0.4,0.1,0.8,82,52,30,0.63,3.11,-0.36,2.75,104.5,101.0,3.5,89.7,0.283,0.186,0.533,0.495,14.5,28.2,0.208,0.453,13.3,72.9,0.229,2000,40,,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
496,Max Strus,SF,26,Miami Heat,MIA,80,33,28.4,4.1,9.9,0.410,2.5,7.0,0.350,1.6,2.8,0.559,0.535,1.0,1.1,0.876,0.6,2.6,3.2,2.1,0.5,0.2,0.9,2.1,11.5,10.8,0.557,0.713,0.113,2.4,11.2,6.7,11.2,0.9,0.5,7.8,17.6,1.5,1.9,3.4,0.071,-0.8,-0.9,-1.7,0.2,82,44,38,0.54,-0.32,0.18,-0.13,113.0,113.3,-0.3,96.3,0.270,0.408,0.574,0.530,12.4,22.8,0.224,0.561,14.5,77.7,0.198,2023,47,,0.000
497,John Wall,PG,32,Los Angeles Clippers,LAC,34,3,22.2,4.1,9.9,0.408,1.0,3.2,0.303,3.1,6.7,0.459,0.457,2.3,3.3,0.681,0.4,2.3,2.7,5.2,0.8,0.4,2.4,1.7,11.4,13.6,0.498,0.322,0.334,2.1,11.4,6.8,35.3,1.8,1.4,17.1,27.0,-0.4,0.7,0.3,0.020,-0.8,-0.4,-1.2,0.1,82,44,38,0.54,0.50,-0.19,0.31,115.0,114.5,0.5,98.0,0.278,0.387,0.588,0.551,12.8,22.9,0.217,0.543,11.7,76.6,0.195,2023,31,,0.000
498,Russell Westbrook,PG,34,Denver Nuggets,DEN,73,24,29.1,5.9,13.6,0.436,1.2,3.9,0.311,4.7,9.7,0.487,0.481,2.8,4.3,0.656,1.2,4.6,5.8,7.5,1.0,0.5,3.5,2.2,15.9,16.1,0.513,0.289,0.317,4.7,16.5,10.8,38.6,1.7,1.3,18.4,27.7,-0.6,2.6,1.9,0.044,0.3,-0.1,0.2,1.2,82,53,29,0.65,3.33,-0.29,3.04,117.6,114.2,3.4,98.1,0.259,0.361,0.601,0.573,13.1,24.8,0.194,0.543,12.2,76.4,0.201,2023,49,,0.000
499,Russell Westbrook,PG,34,Los Angeles Lakers,LAL,52,3,28.7,5.8,14.0,0.417,1.2,4.1,0.296,4.6,9.9,0.467,0.460,3.0,4.6,0.655,1.1,5.0,6.2,7.5,1.0,0.4,3.5,2.3,15.9,15.3,0.496,0.292,0.326,4.4,18.2,11.5,38.5,1.7,1.3,18.1,28.8,-1.2,1.9,0.7,0.023,-0.2,0.1,-0.1,0.7,82,43,39,0.52,0.57,-0.15,0.43,114.5,113.9,0.6,101.3,0.299,0.351,0.582,0.542,12.3,22.8,0.232,0.535,10.9,76.3,0.171,2023,49,,0.000


In [139]:
smoy_players_df[smoy_players_df['Share'] > 0]

Unnamed: 0,Player,Pos,Age,Team,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,PER,TS%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,Total_Games,Team_W,Team_L,Team_Win_PCT,Team_MOV,Team_SOS,Team_SRS,Team_ORtg,Team_DRtg,Team_NRtg,Team_Pace,Team_FTr,Team_3PAr,Team_TS%,Team_eFG%,Team_TOV%,Team_ORB%,Team_FT/FGA,Team_eFG%.1,Team_TOV%.1,Team_DRB%,Team_FT/FGA.1,Year,GB,Rank,Share
0,Cedric Ceballos,SF,30,Dallas Mavericks,DAL,69,25,29.9,6.5,14.5,0.446,0.6,1.9,0.328,5.8,12.6,0.464,0.468,3.0,3.6,0.843,2.5,4.2,6.7,1.3,0.8,0.3,1.8,2.4,16.6,18.1,0.516,0.134,0.248,9.0,15.0,12.0,7.4,1.4,0.8,10.1,26.4,2.7,1.3,4.0,0.094,1.2,-2.2,-1.0,0.5,82,40,42,0.49,-0.57,0.29,-0.29,106.6,107.2,-0.6,94.9,0.248,0.188,0.532,0.490,12.6,25.5,0.200,0.488,14.2,66.1,0.210,2000,44,3,0.025
5,Tracy McGrady,SF,20,Toronto Raptors,TOR,79,34,31.2,5.8,12.9,0.451,0.2,0.8,0.277,5.6,12.1,0.463,0.460,3.5,5.0,0.707,2.4,4.0,6.3,3.3,1.1,1.9,2.0,2.5,15.4,20.0,0.509,0.064,0.385,8.3,14.7,11.4,18.8,1.9,4.3,11.8,24.3,3.7,2.9,6.6,0.129,2.6,0.9,3.5,3.4,82,45,37,0.55,-0.16,-0.30,-0.46,104.7,104.9,-0.2,92.5,0.300,0.170,0.511,0.464,12.7,30.1,0.230,0.480,14.2,71.8,0.247,2000,45,3,0.025
7,Cuttino Mobley,SG,24,Houston Rockets,HOU,81,8,30.8,5.4,12.5,0.430,1.3,3.6,0.356,4.1,8.9,0.460,0.481,3.7,4.4,0.847,0.7,2.8,3.6,2.6,1.1,0.4,2.3,2.1,15.8,16.0,0.545,0.287,0.347,2.7,10.1,6.5,14.3,1.8,0.8,13.7,23.9,3.6,1.6,5.2,0.099,0.7,-0.8,-0.1,1.2,82,34,48,0.41,-0.87,0.30,-0.57,104.8,105.7,-0.9,94.2,0.322,0.244,0.536,0.494,15.8,29.1,0.236,0.481,12.5,71.6,0.201,2000,73,2,0.058
8,Rodney Rogers,PF,28,Phoenix Suns,PHO,82,7,27.9,5.2,10.7,0.486,1.4,3.2,0.439,3.8,7.5,0.506,0.551,1.9,3.0,0.639,1.7,3.8,5.5,2.1,1.1,0.6,2.0,3.5,13.8,17.1,0.570,0.297,0.283,6.9,14.8,10.9,12.5,2.1,1.5,14.1,22.2,3.4,3.9,7.4,0.154,1.5,1.1,2.6,2.7,82,53,29,0.65,5.22,0.02,5.24,104.6,99.0,5.6,94.0,0.286,0.184,0.532,0.491,15.2,29.3,0.217,0.454,15.7,70.5,0.245,2000,75,1,0.860
9,Peja StojakoviÄ‡,SF,22,Sacramento Kings,SAC,74,11,23.6,4.3,9.7,0.448,1.4,3.6,0.375,3.0,6.1,0.491,0.517,1.8,2.1,0.882,1.0,2.7,3.7,1.4,0.7,0.1,1.2,1.3,11.9,16.3,0.559,0.372,0.213,4.4,12.1,8.2,9.4,1.4,0.3,10.1,20.8,3.2,1.7,4.8,0.133,2.0,-0.4,1.6,1.6,82,44,38,0.54,2.91,0.12,3.04,105.0,102.1,2.9,99.3,0.277,0.227,0.526,0.486,13.9,27.7,0.209,0.479,15.1,69.7,0.198,2000,63,6,0.008
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
490,Bobby Portis,PF,27,Milwaukee Bucks,MIL,70,22,26.0,5.7,11.5,0.496,1.4,3.7,0.370,4.3,7.8,0.555,0.555,1.4,1.8,0.768,2.2,7.4,9.6,1.5,0.4,0.2,1.2,1.6,14.1,17.8,0.575,0.320,0.156,9.2,28.6,19.3,8.7,0.8,0.7,8.6,21.7,2.5,2.8,5.3,0.139,1.2,-0.6,0.7,1.2,82,58,24,0.71,3.63,-0.02,3.61,115.4,111.9,3.5,100.5,0.248,0.446,0.583,0.555,12.7,25.0,0.184,0.520,10.4,77.8,0.175,2023,48,3,0.194
491,Norman Powell,SG,29,Los Angeles Clippers,LAC,60,8,26.1,5.7,11.8,0.479,1.9,4.8,0.397,3.8,7.1,0.534,0.559,3.8,4.6,0.812,0.4,2.5,2.9,1.8,0.8,0.3,1.7,2.1,17.0,17.1,0.612,0.404,0.390,1.7,10.7,6.3,11.0,1.6,1.0,10.8,26.0,2.1,1.4,3.5,0.107,0.9,-0.7,0.3,0.9,82,44,38,0.54,0.50,-0.19,0.31,115.0,114.5,0.5,98.0,0.278,0.387,0.588,0.551,12.8,22.9,0.217,0.543,11.7,76.6,0.195,2023,52,4,0.048
492,Immanuel Quickley,SG,23,New York Knicks,NYK,81,21,28.9,5.2,11.6,0.448,2.1,5.6,0.370,3.1,6.0,0.521,0.537,2.5,3.1,0.819,0.7,3.4,4.2,3.4,1.0,0.2,1.2,2.0,14.9,16.3,0.578,0.485,0.265,2.7,13.1,7.9,17.4,1.7,0.6,8.7,20.9,4.4,2.3,6.7,0.137,1.3,0.4,1.7,2.1,82,47,35,0.57,2.93,0.06,2.99,117.8,114.8,3.0,97.1,0.285,0.400,0.577,0.541,11.4,28.3,0.217,0.536,11.4,77.1,0.210,2023,60,2,0.652
493,Austin Reaves,SG,24,Los Angeles Lakers,LAL,64,22,28.8,4.0,7.7,0.529,1.3,3.4,0.398,2.7,4.3,0.631,0.616,3.6,4.1,0.864,0.5,2.5,3.0,3.4,0.5,0.3,1.5,1.7,13.0,14.9,0.687,0.441,0.541,2.0,9.0,5.6,15.7,0.8,0.9,14.0,16.2,4.1,1.4,5.4,0.142,0.6,0.1,0.7,1.2,82,43,39,0.52,0.57,-0.15,0.43,114.5,113.9,0.6,101.3,0.299,0.351,0.582,0.542,12.3,22.8,0.232,0.535,10.9,76.3,0.171,2023,42,7,0.018


### TO DO:

- [x] Convert rank column to a numeric in all of the award dataframes
- [ ] Filter each award dataframe by the relative minimum criteria to reduce data and create a more balanced dataset
- [ ] Merge player data with each awards table (Finish SMOY table)
- [ ] Perform data cleaning
- [ ] Plot distributions
- [ ] Analyze correlation
- [ ] Plot statistics