In [7]:
from nba_api.stats.endpoints import leaguedashplayerstats as stats
from nba_api.stats.static import players
from nba_api.stats.library.parameters import SeasonType
from unidecode import unidecode
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [2]:
# Constants

const_stats_object_kwargs = {
    'last_n_games': 0, 
    'per_mode_detailed': 'PerGame', 
    'season_type_all_star': SeasonType.regular, 
    'measure_type_detailed_defense': 'Base'
}

const_stats_columns_to_drop = ['NICKNAME','GP','W', 'L', 'W_PCT', 'MIN', 'FGM', 'FGA', \
                                'FG3A', 'FTA', 'FTM', 'DREB', 'PF', 'L_RANK', 'FGM_RANK', \
                                'FGA_RANK', 'FG3A_RANK', 'FTM_RANK', 'FTA_RANK', 'NBA_FANTASY_PTS', \
                               'NBA_FANTASY_PTS_RANK', 'CFID', 'CFPARAMS']

years = list(range(1996, 2022))
season_years = []
for pos in range(len(years) - 1):
    season_year = str(years[pos]) + '-' + str(years[pos + 1])[2:]
    season_years.append(season_year)

In [32]:
# Functions

def extract_mvp_votes(years):
    
    mvp_votes_by_season = {}
    
    for year in years:
        url = f'https://www.basketball-reference.com/awards/awards_{year}.html#mvp'
        response = requests.get(url)
        if response.status_code != 200:
            return response.status_code
        
        soup = BeautifulSoup(response.text)
        table_body = soup.find('table', {'id': 'mvp'}).find('tbody')
        rows = table_body.find_all('tr')

        mvp_votes = []
        for row in rows:
            player_data = []
            player = row.find('td').find('a').text
            player_data.append(player)
            votes = row.find('td', {'data-stat': 'points_won'}).text
            player_data.append(int(float(votes)))
            percentage = row.find('td', {'data-stat': 'award_share'}).text
            player_data.append(float(percentage))
            mvp_votes.append(player_data)
            
        mvp_votes_by_season[year] = pd.DataFrame(mvp_votes, columns = ['PLAYER', 'VOTES', 'VOTES_PCT'])
    
    return mvp_votes_by_season

def find_player_id(player):
    
    player_list = players.find_players_by_full_name(f'^{player}$')
    
    if len(player_list) == 0:
        return np.nan
    
    elif len(player_list) == 1:
        return player_list[0]['id']
    
    else:
        ids = []
        for player in player_list:
            ids.append(player['id'])
        return min(ids)

def steve_smith(df):
    df.loc[df[df['PLAYER'] == 'Steve Smith'].index, 'PLAYER_ID'] = 120


In [4]:
mvp_years = years[1:]

In [37]:
mvp_votes_stats_seasons = extract_mvp_votes(mvp_years)

In [38]:
for year in mvp_votes_stats_seasons:
    mvp_votes_stats_seasons[year]['PLAYER'] = mvp_votes_stats_seasons[year]['PLAYER'].apply(unidecode)
    mvp_votes_stats_seasons[year]['PLAYER_ID'] = mvp_votes_stats_seasons[year]['PLAYER'].apply(find_player_id)
    mvp_votes_stats_seasons[year]['PLAYER_ID'].fillna(120, inplace = True)
    mvp_votes_stats_seasons[year]['PLAYER_ID'] = mvp_votes_stats_seasons[year]['PLAYER_ID'].astype(int)
    print(mvp_votes_stats_seasons[year])

              PLAYER  VOTES  VOTES_PCT  PLAYER_ID
0        Karl Malone    986      0.857        252
1     Michael Jordan    957      0.832        893
2         Grant Hill    376      0.327        255
3       Tim Hardaway    238      0.207        896
4          Glen Rice    134      0.117        779
5        Gary Payton    105      0.091         56
6    Hakeem Olajuwon     95      0.083        165
7      Patrick Ewing     57      0.050        121
8      Anthony Mason      7      0.006        193
9   Shaquille O'Neal      7      0.006        406
10    Scottie Pippen      6      0.005        937
11   Alonzo Mourning      5      0.004        297
12   Dikembe Mutombo      4      0.003         87
13    Mitch Richmond      4      0.003        782
14     John Stockton      3      0.003        304
15   Charles Barkley      2      0.002        787
16     Tom Gugliotta      1      0.001        339
17     Allen Iverson      1      0.001        947
18     Kevin Johnson      1      0.001        134


              PLAYER  VOTES  VOTES_PCT  PLAYER_ID
0         Steve Nash    924      0.739        959
1       LeBron James    688      0.550       2544
2      Dirk Nowitzki    544      0.435       1717
3        Kobe Bryant    483      0.386        977
4   Chauncey Billups    430      0.344       1497
5        Dwyane Wade     87      0.070       2548
6        Elton Brand     50      0.040       1882
7         Tim Duncan     33      0.026       1495
8        Tony Parker      9      0.007       2225
9      Allen Iverson      1      0.001        947
10      Shawn Marion      1      0.001       1890
               PLAYER  VOTES  VOTES_PCT  PLAYER_ID
0       Dirk Nowitzki   1138      0.882       1717
1          Steve Nash   1013      0.785        959
2         Kobe Bryant    521      0.404        977
3          Tim Duncan    286      0.222       1495
4        LeBron James    183      0.142       2544
5       Tracy McGrady    110      0.085       1503
6          Chris Bosh     43      0.033    

                   PLAYER  VOTES  VOTES_PCT  PLAYER_ID
0            James Harden    965      0.955     201935
1            LeBron James    738      0.731       2544
2           Anthony Davis    445      0.441     203076
3          Damian Lillard    207      0.205     203081
4       Russell Westbrook     76      0.075     201566
5   Giannis Antetokounmpo     75      0.074     203507
6            Kevin Durant     66      0.065     201142
7           DeMar DeRozan     32      0.032     201942
8       LaMarcus Aldridge      6      0.006     200746
9            Jimmy Butler      5      0.005     202710
10          Stephen Curry      5      0.005     201939
11            Joel Embiid      4      0.004     203954
12         Victor Oladipo      2      0.002     203506
                   PLAYER  VOTES  VOTES_PCT  PLAYER_ID
0   Giannis Antetokounmpo    941      0.932     203507
1            James Harden    776      0.768     201935
2             Paul George    356      0.352     202331
3         

In [7]:
stats_dfs_by_season = {}
for season in season_years:
    stats_df = stats.LeagueDashPlayerStats(**const_stats_object_kwargs, season = season).get_data_frames()[0]
    stats_df.drop(columns = const_stats_columns_to_drop, inplace = True)
    stats_df.dropna(inplace = True)
    stats_df['AGE'] = stats_df['AGE'].astype(int)
    stats_dfs_by_season[season] = stats_df

In [8]:
for season in stats_dfs_by_season:
    print(f'Season: {season}')
    print(stats_dfs_by_season[season].info())
    print(f'Null values: {stats_dfs_by_season[season].isnull().sum().sum()}')
    print('\n---------------------------\n')

Season: 1996-97
<class 'pandas.core.frame.DataFrame'>
Int64Index: 441 entries, 4 to 444
Data columns (total 43 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   PLAYER_ID          441 non-null    int64  
 1   PLAYER_NAME        441 non-null    object 
 2   TEAM_ID            441 non-null    float64
 3   TEAM_ABBREVIATION  441 non-null    object 
 4   AGE                441 non-null    int32  
 5   FG_PCT             441 non-null    float64
 6   FG3M               441 non-null    float64
 7   FG3_PCT            441 non-null    float64
 8   FT_PCT             441 non-null    float64
 9   OREB               441 non-null    float64
 10  REB                441 non-null    float64
 11  AST                441 non-null    float64
 12  TOV                441 non-null    float64
 13  STL                441 non-null    float64
 14  BLK                441 non-null    float64
 15  BLKA               441 non-null    float64
 16  PFD       

Null values: 0

---------------------------

Season: 2007-08
<class 'pandas.core.frame.DataFrame'>
Int64Index: 451 entries, 0 to 450
Data columns (total 43 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   PLAYER_ID          451 non-null    int64  
 1   PLAYER_NAME        451 non-null    object 
 2   TEAM_ID            451 non-null    int64  
 3   TEAM_ABBREVIATION  451 non-null    object 
 4   AGE                451 non-null    int32  
 5   FG_PCT             451 non-null    float64
 6   FG3M               451 non-null    float64
 7   FG3_PCT            451 non-null    float64
 8   FT_PCT             451 non-null    float64
 9   OREB               451 non-null    float64
 10  REB                451 non-null    float64
 11  AST                451 non-null    float64
 12  TOV                451 non-null    float64
 13  STL                451 non-null    float64
 14  BLK                451 non-null    float64
 15  BLKA         

Null values: 0

---------------------------

Season: 2013-14
<class 'pandas.core.frame.DataFrame'>
Int64Index: 482 entries, 0 to 481
Data columns (total 43 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   PLAYER_ID          482 non-null    int64  
 1   PLAYER_NAME        482 non-null    object 
 2   TEAM_ID            482 non-null    int64  
 3   TEAM_ABBREVIATION  482 non-null    object 
 4   AGE                482 non-null    int32  
 5   FG_PCT             482 non-null    float64
 6   FG3M               482 non-null    float64
 7   FG3_PCT            482 non-null    float64
 8   FT_PCT             482 non-null    float64
 9   OREB               482 non-null    float64
 10  REB                482 non-null    float64
 11  AST                482 non-null    float64
 12  TOV                482 non-null    float64
 13  STL                482 non-null    float64
 14  BLK                482 non-null    float64
 15  BLKA         

None
Null values: 0

---------------------------

Season: 2020-21
<class 'pandas.core.frame.DataFrame'>
Int64Index: 540 entries, 0 to 539
Data columns (total 43 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   PLAYER_ID          540 non-null    int64  
 1   PLAYER_NAME        540 non-null    object 
 2   TEAM_ID            540 non-null    int64  
 3   TEAM_ABBREVIATION  540 non-null    object 
 4   AGE                540 non-null    int32  
 5   FG_PCT             540 non-null    float64
 6   FG3M               540 non-null    float64
 7   FG3_PCT            540 non-null    float64
 8   FT_PCT             540 non-null    float64
 9   OREB               540 non-null    float64
 10  REB                540 non-null    float64
 11  AST                540 non-null    float64
 12  TOV                540 non-null    float64
 13  STL                540 non-null    float64
 14  BLK                540 non-null    float64
 15  BLKA    