In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import seaborn as sns
import requests
import re
import datetime as dt

## Grabbing Season Per Game Data

In [None]:
# NBA seasons I am planning on analyzing (2014-15 season to 2018-19 season)
years = [year for year in range(2017,2020)]

# URL pages that I want to scrape:
# will be for years between 2015 and 2019, but I am inputting 2019, just as a test season
year = 2019

season_url = f'https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html'

season_html = requests.get(season_url).text

season_soup = BeautifulSoup(season_html, 'lxml')

In [None]:
# grabbing column headers

# headers = season_soup.find('table', id='per_game_stats').find('thead').find_all('th')

headers = season_soup.find('thead').find_all('th')

header_list = [th.text for th in headers]

# we don't need the rank column, so we will drop it
header_list = header_list[1:]

header_list

In [None]:
# grabbing row data from cells
rows = season_soup.find_all('tr')[1:]
season_stats = [[td.text for td in rows[i].find_all('td')] for i in range(len(rows))]
season_stats

In [None]:
season_stats = pd.DataFrame(season_stats, columns=header_list)

season_stats

In [None]:
# checking number of unique players throughout the season
len(season_stats['Player'].unique())

In [None]:
# Player data with multiple teams indicate that they were traded during the season
# we want the TOT stats, as they are the average across all games for all teams the player played on

traded_players = season_stats[season_stats['Tm']=='TOT']

traded_players

In [None]:
# list of traded players
traded_list = [player for player in traded_players['Player']]

# remove all rows of players in traded list
season_stats = season_stats[~season_stats['Player'].isin(traded_list)]

# season_stats.drop_duplicates(subset='Player', keep='first')
season_stats = pd.concat([season_stats,traded_players])

In [None]:
# checking for any extra duplicated rows
season_stats.duplicated().values.any()

### Function: Player Averages Over A Season

2019 indicates NBA season 2018-19

In [2]:
# putting everything into a single function

def season_per_game(year):

    season_html = requests.get(f'https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html').text

    season_soup = BeautifulSoup(season_html, 'lxml')

    # grabbing column headers

    headers = season_soup.find('thead').find_all('th')
    
    # we don't need the rank column, so we will drop it
    header_list = [th.text for th in headers][1:]

    # grabbing row data from cells
    rows = season_soup.find_all('tr')[1:]
    season_stats = [[td.text for td in rows[i].find_all('td')] for i in range(len(rows))]

    # converting row data to dataframe
    season_stats = pd.DataFrame(season_stats, columns=header_list)

    # getting df with traded players based on team

    traded_players = season_stats[season_stats['Tm']=='TOT']

    # list of traded players
    traded_list = [player for player in traded_players['Player']]

    # remove all rows of players in traded list
    season_stats = season_stats[~season_stats['Player'].isin(traded_list)]

    # concatenating non-traded players, and traded players
    # dropping all team duplicates & resetting index
    return pd.concat([season_stats,traded_players]).drop_duplicates().dropna().reset_index().drop(columns='index')

In [None]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [None]:
season_2018 = season_per_game(2018)

season_2018

In [None]:
season_2019 = season_per_game(2019)

season_2019

In [3]:
def season_adv(year):

    season_adv_html = requests.get(f'https://www.basketball-reference.com/leagues/NBA_{year}_advanced.html').text

    season_adv_soup = BeautifulSoup(season_adv_html, 'lxml')

    # grabbing column headers

    headers = season_adv_soup.find('thead').find_all('th')
    
    # we don't need the rank column, so we will drop it
    header_list = [th.text for th in headers][1:]

    # grabbing row data from cells
    rows = season_adv_soup.find_all('tr')[1:]
    season_adv_stats = [[td.text for td in rows[i].find_all('td')] for i in range(len(rows))]

    # converting row data to dataframe
    season_adv_stats = pd.DataFrame(season_adv_stats, columns=header_list)

    # getting df with traded players based on team

    traded_players = season_adv_stats[season_adv_stats['Tm']=='TOT']

    # list of traded players
    traded_list = [player for player in traded_players['Player']]

    # remove all rows of players in traded list
    season_adv_stats = season_adv_stats[~season_adv_stats['Player'].isin(traded_list)]

    # concatenating non-traded players, and traded players
    # dropping all team duplicates & resetting index
    return pd.concat([season_adv_stats,traded_players]).drop_duplicates().dropna().reset_index().drop(columns='index')

In [None]:
season_adv_2019 = season_adv(2019)

season_adv_2019

In [4]:
def adv_shooting(year):

    adv_shooting_html = requests.get(f'https://www.basketball-reference.com/leagues/NBA_{year}_shooting.html').text

    adv_shooting_soup = BeautifulSoup(adv_shooting_html, 'lxml')

    # grabbing column headers

    headers = adv_shooting_soup.find('thead').find('tr', class_= lambda x: x !='over_header').find_all('th')
    
    # we don't need the rank column, so we will drop it
    header_list = [th.text for th in headers][1:]

    # grabbing row data from cells
    rows = adv_shooting_soup.find_all('tr')[1:]
    adv_shooting_stats = [[td.text for td in rows[i].find_all('td')] for i in range(len(rows))]

    # converting row data to dataframe
    adv_shooting_stats = pd.DataFrame(adv_shooting_stats, columns=header_list)

    # getting df with traded players based on team

    traded_players = adv_shooting_stats[adv_shooting_stats['Tm']=='TOT']

    # list of traded players
    traded_list = [player for player in traded_players['Player']]

    # remove all rows of players in traded list
    adv_shooting_stats = adv_shooting_stats[~adv_shooting_stats['Player'].isin(traded_list)]

    # concatenating non-traded players, and traded players
    # dropping all team duplicates & resetting index
    adv_shooting_stats = pd.concat([adv_shooting_stats,traded_players]).drop_duplicates().dropna().reset_index().drop(columns='index')
    
    
    adv_shooting_stats = adv_shooting_stats.iloc[:,0:15].drop(columns = ['Pos', 'Age', 'Tm', 'G', 'MP', 'FG%'])
    
#     for column_name in list(adv_shooting_stats.columns):
#         adv_shooting_stats.rename(columns={column_name : '% ' + column_name})

    new_names = [(i,'%'+i) for i in adv_shooting_stats.iloc[:, 3:].columns.values]
    adv_shooting_stats.rename(columns = dict(new_names), inplace=True)
        
#     adv_shooting_stats.columns[2:9] = ['%' + str(col) for col in adv_shooting_stats.columns[2:9]]
    
    return adv_shooting_stats

#     return adv_shooting_stats.columns[2:9]

In [None]:
adv_shooting_2017 = adv_shooting(2019)
adv_shooting_2017

In [None]:
adv_shooting_2017.dtypes

## Grabbing Individual Player Attributes (Height, Weight)

In [None]:
players_2018 = list(season_per_game(2018)['Player'])

len(players_2018)

In [None]:
# plan: search for player id number based on player name
player = 'Jaylen Adams'

player_info_tag = season_soup.find('td', string=player)

player_id = player_info_tag['data-append-csv']

player_id

# retrieve player info on individual page based on id

In [5]:
# generates a dictionary of player names & id's based on season dataframe
def player_id_dict_generator(df,year):
# search for player id number based on player name
    player_id_dict = {}
    player_list = list(df['Player'])
    
    season_html = requests.get(f'https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html').text

    season_soup = BeautifulSoup(season_html, 'lxml')
    
    
    for player in player_list:
#         player_info_tag = season_soup.find('td', attr={"data-stat":"player"})
        player_info_tag = season_soup.find('td', string=player)
        player_id = player_info_tag['data-append-csv']
        player_id_dict[player]=player_id.strip()

    return player_id_dict

In [None]:
player_id_2018 = player_id_dict_generator(season_2018, 2018)

In [None]:
player_id_2018

In [6]:
def height_to_inches(height):
    return int(height[0])*12 + int(height[2::])

In [7]:
# grabs individual player height & weight according to webpage ID
def player_ht_wt(player, player_id):
    player_html = requests.get(f'https://www.basketball-reference.com/players/{player_id[0]}/{player_id}.html').text
    
    soup = BeautifulSoup(player_html, 'lxml')
    
    headers = ['Player','player_id', 'Height', 'Weight']
    
    # check if player page html exists
    if len(soup)!=0:       
#         plyr_name = soup.find('h1', attrs={"itemprop":"name"}).find('span').text
        plyr_height = height_to_inches(soup.find('span', attrs={"itemprop":"height"}).text)
        plyr_weight = int(soup.find('span', attrs={"itemprop":"weight"}).text[:3])

        player_dict = dict(zip(headers, [player, player_id, plyr_height, plyr_weight]))

        
    else:
        player_dict = dict(zip(headers, [player, player_id, None, None]))
    
    return player_dict

In [None]:
# showcases what happens to height & weight columns for player pages
# with len(html) == 0
player_ht_wt('Damian Jones','jonesda03')

In [8]:
import time
# combines all player metadata dictionaries into a list
def player_ht_wt_looper(id_list):
    player_ht_wt_list = []
    
    for player,player_id in id_list.items():
        player_attrib = player_ht_wt(player,player_id)
        player_ht_wt_list.append(player_attrib)
#         time.sleep(3)
        
#     for i, player_id in enumerate(id_list):
#         player_attrib = player_ht_wt(player_id)
#         player_ht_wt_list.append(player_attrib)
        
#         if (i+1%3 == 0):
#             time.sleep(3)
        
        
    return player_ht_wt_list

In [None]:
player_ht_wt_looper(player_id_2019)

In [None]:
player_test_html = requests.get('https://www.basketball-reference.com/players/j/jonesda03.html').text
test_soup = BeautifulSoup(player_test_html, 'lxml')

if len(test_soup)!=0:
    print("worked!")
    print(len(test_soup))
    
else:
    print('nope!')

In [None]:
url = 'https://www.basketball-reference.com/robots.txt'

response = requests.get(url)
print(response.text)

In [None]:
# converting the list of dictionaries into a dataframe
pd.DataFrame(players_5_2019)

### Function: Creating Player Metadata DF

In [9]:
def player_metadata(player_df,year):
    # creates a dictionary of player names & id's based on season dataframe
    player_id_dict = player_id_dict_generator(player_df,year)
    
    # grabs height and weight based on id value
    player_md_list = player_ht_wt_looper(player_id_dict)
    
    # creates dataframe of player id, matched with height & weight
    player_md_df = pd.DataFrame(player_md_list)
    
    return player_md_df

In [None]:
metadata_2019 = player_metadata(season_2019,2019)

In [None]:
metadata_2019

In [10]:
def player_webpage_checker(player_id_dict):
    failed=[]
    for player, player_id in player_id_dict.items():
        player_page_html = requests.get(f'https://www.basketball-reference.com/players/{player_id[0]}/{player_id}.html')
        if player_page_html.status_code!=200:
            failed.append(player_id)
    return failed

In [None]:
player_webpage_checker()

### Function: Combining All DataFrames by Player

In [11]:
def df_combiner(df1, df2):
    return pd.merge(left=df1, right=df2, how='left', 
             left_on='Player', right_on='Player')

In [None]:
season_2019_combined = df_combiner(season_2019, metadata_2019)

In [None]:
season_2019_combined

In [12]:
def single_season_combined(year):
    # creating dataframe for per game data
    season_per_game_df = season_per_game(year)
    
    # creating dataframe for season advanced stats
    season_adv_df = season_adv(year).drop(columns = ['Pos', 'Age', 'Tm', 'G', 'MP'])
    
    # creating dataframe for player metadata
    player_metadata_df = player_metadata(season_per_game_df,year)
    
    adv_shooting_df = adv_shooting(year)
    
    # combining all dataframes

    player_stats_df = df_combiner(season_per_game_df, season_adv_df)
    player_stats_df = df_combiner(player_stats_df, player_metadata_df)
    player_stats_df = df_combiner(player_stats_df, adv_shooting_df)
    
#     return df_combiner(player_stats_df, player_metadata_df)

    return player_stats_df.drop(columns=['\xa0_x', '\xa0_y'])

In [None]:
combined_df_2017 = single_season_combined(2017)

combined_df_2017

In [None]:
combined_df_2017.columns

In [None]:
combined_df_2017.dtypes

In [None]:
combined_df_2017.columns[41]

In [None]:
combined_df_2017.columns[46]

In [None]:
combined_df_2017.columns[55]

In [None]:
combined_df_2017.drop(columns=['\xa0_x', '\xa0_y'])

In [None]:
combined_df_2018 = single_season_combined(2018)

combined_df_2018

In [None]:
combined_df_2019 = single_season_combined(2019)
combined_df_2019

In [13]:
def multiple_seasons_combined(year_list):
    dfs = []
    for year in year_list:
        df = single_season_combined(year)
        
        dfs.append(df)
        
        
    return pd.concat(dfs).reset_index()

In [15]:
multiple_df = multiple_seasons_combined([2017,2018,2019])

In [None]:
# to make sure that multiple years are included in the dataset
multiple_df['Player'].duplicated().any()

In [None]:
multiple_df

In [None]:
multiple_df.shape

In [31]:
def df_cleaner(df):
    final_df=df
#     final_df = df[['Player','Pos','Age','Height', 'Weight', 'MP', 'FGA', '3PA', '2PA', 
#                    'FTA','PTS', 'USG%', 'G']]
    
    final_df['G'] = final_df['G'].astype(int)
    final_df['Games_Missed'] = 82 - final_df['G']
    final_df['Pct_Games_Missed'] = final_df['Games_Missed']/82

#     final_df['Age']
    
    for column in list(final_df.columns[2:]):
        final_df[column] = pd.to_numeric(final_df[column], errors='ignore')
    
    # adding a bmi column      
#     final_df.insert(5, 'BMI', (final_df['Weight'] * 703) / (final_df['Height']**2))
    
    final_df['BMI'] = (final_df['Weight'] * 703) / (final_df['Height']**2)
    final_df['0-3_A'] = final_df['%0-3']*final_df['FGA']
    final_df['3-10_A'] = final_df['%3-10']*final_df['FGA']
    final_df['10-16_A'] = final_df['%10-16']*final_df['FGA']
    final_df['16-3P_A'] = final_df['%16-3P']*final_df['FGA']
    
    final_df = final_df.drop(columns=['index'])
    
#     return final_df = final_df[final_df['Height'].notna()]

    return final_df.dropna(subset=['Height', 'Weight']).reset_index(drop=True).fillna(0)

In [32]:
NBA_final_df = df_cleaner(multiple_df)

# final_df = final_df.reset_index(drop=True).drop(columns=['index'])

NBA_final_df

Unnamed: 0,Player,Pos,Age,Tm,BMI,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,PER,TS%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,player_id,Height,Weight,Dist.,%2P,%0-3,%3-10,%10-16,%16-3P,%3P,Games_Missed,Pct_Games_Missed,0-3_A,3-10_A,10-16_A,16-3P_A
0,Álex Abrines,SG,23,OKC,23.109796,68,6,15.5,2.0,5.0,0.393,1.4,3.6,0.381,0.6,1.4,0.426,0.531,0.6,0.7,0.898,0.3,1.0,1.3,0.6,0.5,0.1,0.5,1.7,6.0,10.1,0.56,0.724,0.144,1.9,7.1,4.5,5.5,1.7,0.6,8.3,15.9,1.2,0.9,2.1,0.096,-1.3,-0.4,-1.6,0.1,abrinal01,78.0,200.0,20.6,0.276,0.12,0.047,0.044,0.065,0.724,14,0.170732,0.6,0.235,0.22,0.325
1,Steven Adams,C,23,OKC,27.042386,80,80,29.9,4.7,8.2,0.571,0.0,0.0,0.0,4.7,8.2,0.572,0.571,2.0,3.2,0.611,3.5,4.2,7.7,1.1,1.1,1.0,1.8,2.4,11.3,16.5,0.589,0.002,0.392,13.0,15.4,14.2,5.4,1.8,2.6,16.0,16.2,3.3,3.1,6.5,0.13,-0.2,0.0,-0.2,1.1,adamsst01,83.0,265.0,3.7,0.998,0.557,0.388,0.046,0.005,0.002,2,0.02439,4.5674,3.1816,0.3772,0.041
2,Arron Afflalo,SG,31,SAC,24.899646,61,45,25.9,3.0,6.9,0.44,1.0,2.5,0.411,2.0,4.4,0.457,0.514,1.4,1.5,0.892,0.1,1.9,2.0,1.3,0.3,0.1,0.7,1.7,8.4,8.9,0.559,0.36,0.221,0.7,8.4,4.6,7.4,0.7,0.3,8.4,14.4,1.2,0.2,1.4,0.043,-2.1,-1.5,-3.6,-0.7,afflaar01,77.0,210.0,16.8,0.64,0.1,0.117,0.186,0.236,0.36,21,0.256098,0.69,0.8073,1.2834,1.6284
3,Alexis Ajinça,C,28,NOP,23.572742,39,15,15.0,2.3,4.6,0.5,0.0,0.1,0.0,2.3,4.5,0.511,0.5,0.7,1.0,0.725,1.2,3.4,4.5,0.3,0.5,0.6,0.8,2.0,5.3,12.9,0.529,0.022,0.225,8.3,23.8,16.0,3.1,1.7,3.1,13.7,17.2,0.0,0.9,1.0,0.08,-4.0,0.7,-3.3,-0.2,ajincal01,86.0,248.0,9.0,0.978,0.348,0.253,0.107,0.27,0.022,43,0.52439,1.6008,1.1638,0.4922,1.242
4,Cole Aldrich,C,28,MIN,25.511685,62,0,8.6,0.7,1.4,0.523,0.0,0.0,0.0,0.7,1.4,0.523,0.523,0.2,0.4,0.682,0.8,1.7,2.5,0.4,0.4,0.4,0.3,1.4,1.7,12.7,0.549,0.0,0.256,11.0,23.9,17.4,6.4,2.4,3.7,15.1,9.4,0.6,0.7,1.3,0.116,-2.3,1.5,-0.8,0.2,aldrico01,83.0,250.0,3.0,1.0,0.581,0.407,0.012,0.0,0.0,20,0.243902,0.8134,0.5698,0.0168,0.0
5,LaMarcus Aldridge,PF,31,SAS,25.511685,72,72,32.4,6.9,14.6,0.477,0.3,0.8,0.411,6.6,13.8,0.48,0.488,3.1,3.8,0.812,2.4,4.9,7.3,1.9,0.6,1.2,1.4,2.2,17.3,18.6,0.532,0.053,0.258,8.5,16.6,12.7,9.9,1.0,3.0,7.7,24.5,3.5,3.7,7.2,0.149,0.7,0.1,0.8,1.7,aldrila01,83.0,250.0,11.8,0.947,0.222,0.194,0.202,0.328,0.053,10,0.121951,3.2412,2.8324,2.9492,4.7888
6,Lavoy Allen,PF,27,IND,27.858558,61,5,14.3,1.3,2.8,0.458,0.0,0.0,0.0,1.3,2.7,0.461,0.458,0.4,0.5,0.697,1.7,1.9,3.6,0.9,0.3,0.4,0.5,1.3,2.9,11.6,0.485,0.006,0.196,13.7,14.5,14.1,9.1,1.0,2.4,13.7,10.9,0.9,0.8,1.7,0.093,-1.5,-0.2,-1.7,0.1,allenla01,81.0,260.0,8.9,0.994,0.357,0.256,0.06,0.321,0.006,21,0.256098,0.9996,0.7168,0.168,0.8988
7,Tony Allen,SG,35,MEM,25.924342,71,66,27.0,3.9,8.4,0.461,0.2,0.8,0.278,3.6,7.6,0.479,0.473,1.1,1.8,0.615,2.3,3.2,5.5,1.4,1.6,0.4,1.4,2.5,9.1,13.3,0.493,0.091,0.218,9.6,13.8,11.7,8.4,3.1,1.4,13.3,17.9,0.2,2.9,3.1,0.077,-2.0,1.1,-1.0,0.5,allento01,76.0,213.0,5.6,0.909,0.652,0.166,0.03,0.061,0.091,11,0.134146,5.4768,1.3944,0.252,0.5124
8,Al-Farouq Aminu,PF,26,POR,24.165625,61,25,29.1,3.0,7.6,0.393,1.1,3.5,0.33,1.9,4.2,0.445,0.468,1.6,2.2,0.706,1.3,6.1,7.4,1.6,1.0,0.7,1.5,1.7,8.7,11.3,0.506,0.455,0.292,4.8,23.5,14.1,7.9,1.7,2.0,15.2,15.4,-0.1,2.0,1.9,0.051,-1.4,0.4,-1.1,0.4,aminual01,80.0,220.0,14.0,0.545,0.32,0.133,0.03,0.06,0.455,21,0.256098,2.432,1.0108,0.228,0.456
9,Chris Andersen,C,38,CLE,25.614961,12,0,9.5,0.8,1.8,0.409,0.0,0.3,0.0,0.8,1.6,0.474,0.409,0.8,1.2,0.714,0.8,1.8,2.6,0.4,0.4,0.6,0.4,1.7,2.3,11.6,0.497,0.136,0.636,9.0,20.6,15.0,5.9,2.2,5.0,15.1,13.0,0.1,0.2,0.2,0.102,-4.7,2.2,-2.5,0.0,anderch01,82.0,245.0,7.4,0.864,0.636,0.045,0.045,0.136,0.136,70,0.853659,1.1448,0.081,0.081,0.2448


In [29]:
pd.set_option("display.max_rows", None, "display.max_columns", None)
NBA_final_df

Unnamed: 0,Player,Pos,Age,Tm,BMI,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,PER,TS%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,player_id,Height,Weight,Dist.,%2P,%0-3,%3-10,%10-16,%16-3P,%3P,Games_Missed,Pct_Games_Missed,0-3_A,3-10_A,10-16_A,16-3P_A
0,Álex Abrines,SG,23,OKC,23.109796,68,6,15.5,2.0,5.0,0.393,1.4,3.6,0.381,0.6,1.4,0.426,0.531,0.6,0.7,0.898,0.3,1.0,1.3,0.6,0.5,0.1,0.5,1.7,6.0,10.1,0.56,0.724,0.144,1.9,7.1,4.5,5.5,1.7,0.6,8.3,15.9,1.2,0.9,2.1,0.096,-1.3,-0.4,-1.6,0.1,abrinal01,78.0,200.0,20.6,0.276,0.12,0.047,0.044,0.065,0.724,14,0.170732,0.6,0.235,0.22,0.325
1,Steven Adams,C,23,OKC,27.042386,80,80,29.9,4.7,8.2,0.571,0.0,0.0,0.0,4.7,8.2,0.572,0.571,2.0,3.2,0.611,3.5,4.2,7.7,1.1,1.1,1.0,1.8,2.4,11.3,16.5,0.589,0.002,0.392,13.0,15.4,14.2,5.4,1.8,2.6,16.0,16.2,3.3,3.1,6.5,0.13,-0.2,0.0,-0.2,1.1,adamsst01,83.0,265.0,3.7,0.998,0.557,0.388,0.046,0.005,0.002,2,0.02439,4.5674,3.1816,0.3772,0.041
2,Arron Afflalo,SG,31,SAC,24.899646,61,45,25.9,3.0,6.9,0.44,1.0,2.5,0.411,2.0,4.4,0.457,0.514,1.4,1.5,0.892,0.1,1.9,2.0,1.3,0.3,0.1,0.7,1.7,8.4,8.9,0.559,0.36,0.221,0.7,8.4,4.6,7.4,0.7,0.3,8.4,14.4,1.2,0.2,1.4,0.043,-2.1,-1.5,-3.6,-0.7,afflaar01,77.0,210.0,16.8,0.64,0.1,0.117,0.186,0.236,0.36,21,0.256098,0.69,0.8073,1.2834,1.6284
3,Alexis Ajinça,C,28,NOP,23.572742,39,15,15.0,2.3,4.6,0.5,0.0,0.1,0.0,2.3,4.5,0.511,0.5,0.7,1.0,0.725,1.2,3.4,4.5,0.3,0.5,0.6,0.8,2.0,5.3,12.9,0.529,0.022,0.225,8.3,23.8,16.0,3.1,1.7,3.1,13.7,17.2,0.0,0.9,1.0,0.08,-4.0,0.7,-3.3,-0.2,ajincal01,86.0,248.0,9.0,0.978,0.348,0.253,0.107,0.27,0.022,43,0.52439,1.6008,1.1638,0.4922,1.242
4,Cole Aldrich,C,28,MIN,25.511685,62,0,8.6,0.7,1.4,0.523,0.0,0.0,,0.7,1.4,0.523,0.523,0.2,0.4,0.682,0.8,1.7,2.5,0.4,0.4,0.4,0.3,1.4,1.7,12.7,0.549,0.0,0.256,11.0,23.9,17.4,6.4,2.4,3.7,15.1,9.4,0.6,0.7,1.3,0.116,-2.3,1.5,-0.8,0.2,aldrico01,83.0,250.0,3.0,1.0,0.581,0.407,0.012,0.0,0.0,20,0.243902,0.8134,0.5698,0.0168,0.0
5,LaMarcus Aldridge,PF,31,SAS,25.511685,72,72,32.4,6.9,14.6,0.477,0.3,0.8,0.411,6.6,13.8,0.48,0.488,3.1,3.8,0.812,2.4,4.9,7.3,1.9,0.6,1.2,1.4,2.2,17.3,18.6,0.532,0.053,0.258,8.5,16.6,12.7,9.9,1.0,3.0,7.7,24.5,3.5,3.7,7.2,0.149,0.7,0.1,0.8,1.7,aldrila01,83.0,250.0,11.8,0.947,0.222,0.194,0.202,0.328,0.053,10,0.121951,3.2412,2.8324,2.9492,4.7888
6,Lavoy Allen,PF,27,IND,27.858558,61,5,14.3,1.3,2.8,0.458,0.0,0.0,0.0,1.3,2.7,0.461,0.458,0.4,0.5,0.697,1.7,1.9,3.6,0.9,0.3,0.4,0.5,1.3,2.9,11.6,0.485,0.006,0.196,13.7,14.5,14.1,9.1,1.0,2.4,13.7,10.9,0.9,0.8,1.7,0.093,-1.5,-0.2,-1.7,0.1,allenla01,81.0,260.0,8.9,0.994,0.357,0.256,0.06,0.321,0.006,21,0.256098,0.9996,0.7168,0.168,0.8988
7,Tony Allen,SG,35,MEM,25.924342,71,66,27.0,3.9,8.4,0.461,0.2,0.8,0.278,3.6,7.6,0.479,0.473,1.1,1.8,0.615,2.3,3.2,5.5,1.4,1.6,0.4,1.4,2.5,9.1,13.3,0.493,0.091,0.218,9.6,13.8,11.7,8.4,3.1,1.4,13.3,17.9,0.2,2.9,3.1,0.077,-2.0,1.1,-1.0,0.5,allento01,76.0,213.0,5.6,0.909,0.652,0.166,0.03,0.061,0.091,11,0.134146,5.4768,1.3944,0.252,0.5124
8,Al-Farouq Aminu,PF,26,POR,24.165625,61,25,29.1,3.0,7.6,0.393,1.1,3.5,0.33,1.9,4.2,0.445,0.468,1.6,2.2,0.706,1.3,6.1,7.4,1.6,1.0,0.7,1.5,1.7,8.7,11.3,0.506,0.455,0.292,4.8,23.5,14.1,7.9,1.7,2.0,15.2,15.4,-0.1,2.0,1.9,0.051,-1.4,0.4,-1.1,0.4,aminual01,80.0,220.0,14.0,0.545,0.32,0.133,0.03,0.06,0.455,21,0.256098,2.432,1.0108,0.228,0.456
9,Chris Andersen,C,38,CLE,25.614961,12,0,9.5,0.8,1.8,0.409,0.0,0.3,0.0,0.8,1.6,0.474,0.409,0.8,1.2,0.714,0.8,1.8,2.6,0.4,0.4,0.6,0.4,1.7,2.3,11.6,0.497,0.136,0.636,9.0,20.6,15.0,5.9,2.2,5.0,15.1,13.0,0.1,0.2,0.2,0.102,-4.7,2.2,-2.5,0.0,anderch01,82.0,245.0,7.4,0.864,0.636,0.045,0.045,0.136,0.136,70,0.853659,1.1448,0.081,0.081,0.2448


In [None]:
NBA_final_df = NBA_final_df.drop(columns=['index'])

In [None]:
NBA_final_df.dtypes

In [None]:
sns.set(style='white')
sns.pairplot(NBA_final_df)

In [None]:
NBA_final_df.info()

In [19]:
# import pickle
# df_pickle = '../data/NBA_stats_df.pk'
# var_to_pickle(NBA_final_df, df_pickle)

NameError: name 'var_to_pickle' is not defined

In [33]:
NBA_final_df.to_csv('player_data.csv',index=False)