## 2017-2018 and 2018-2019 Seasons NBA Player Total Stats + Advanced Stats

In [1]:
# Import packages / libraries
import pandas as pd
import numpy as np

In [2]:
# Season 17-18 Player Data
player_17_18_stat = pd.read_csv("NBA_17_18_season_player_stat.csv", encoding = 'unicode_escape')
player_17_18_adv_stat = pd.read_csv("NBA_17_18_season_player_adv_stat.csv", encoding = 'unicode_escape')

In [3]:
# Merge basic stats and advanced stats sideways
player_17_18_df = pd.concat([player_17_18_stat,player_17_18_adv_stat], axis=1).dropna(axis=1, how="all")

In [4]:
# Drop duplicate columns (e.g. Rk) (Due to trade, team change within the same season)
player_17_18_df = player_17_18_df.loc[:,~player_17_18_df.columns.duplicated()]

In [5]:
# Season 18-19 Player Data
player_18_19_stat = pd.read_csv("NBA_18_19_season_player_stat.csv")
player_18_19_adv_stat = pd.read_csv("NBA_18_19_season_player_adv_stat.csv")

In [6]:
# Merge basic stats and advanced stats sideways
player_18_19_df = pd.concat([player_18_19_stat,player_18_19_adv_stat], axis=1).dropna(axis=1, how="all")

In [7]:
# Drop duplicate columns (e.g. Rk) (Due to trade, team change within the same season)
player_18_19_df = player_18_19_df.loc[:,~player_18_19_df.columns.duplicated()]

In [8]:
# 17-18 Season all star players
all_star = ['LeBron James','Kevin Durant', 'Russell Westbrook', 'Kyrie Irving', 'Anthony Davis', 'Paul George', 'Andre Drummond', 
            'Bradley Beal', 'Victor Oladipo', 'Kemba Walker', 'Goran Dragić', 'LaMarcus Aldrige', 'James Harden', 'DeMar DeRozan', 
            'Stephen Curry','Giannis Antetokounmpo', 'Joel Emblid', 'Kyle Lowry', 'Klay Thompson', 'Damian Lillard', 
            'Draymond Green', 'Karl-Anthony Towns','Al Horford']

In [9]:
# Making a new indicator column that tells you if the player was all star player or not in 17-18 season (1: yes, 0: N0)
player_17_18_df['all_star_bool'] = np.where(player_17_18_df.Player.isin(all_star), 1, 0)

In [10]:
player_17_18_df_cleaned = \
pd.merge(player_17_18_df[['Rk','Player']], player_17_18_df.groupby(['Rk']).mean(), how='right', on='Rk')

In [11]:
player_18_19_df_cleaned = \
pd.merge(player_18_19_df[['Rk','Player']], player_18_19_df.groupby(['Rk']).mean(), how='right', on='Rk')

In [12]:
# player_17_18_df_cleaned.to_csv("player_17_18_stats.csv")

In [13]:
# player_18_19_df_cleaned.to_csv("player_18_19_stats.csv")

In [14]:
player_18_19_df_cleaned.head()

Unnamed: 0,Rk,Player,Age,G,GS,MP,FG,FGA,FG%,3P,...,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP
0,1,Alex Abrines,25.0,31.0,2.0,588.0,56.0,157.0,0.357,41.0,...,7.9,12.2,0.1,0.6,0.6,0.053,-2.4,-0.9,-3.4,-0.2
1,2,Quincy Acy,28.0,10.0,0.0,123.0,4.0,18.0,0.222,2.0,...,15.2,9.2,-0.1,0.0,-0.1,-0.022,-5.7,-0.3,-5.9,-0.1
2,3,Jaylen Adams,22.0,34.0,1.0,428.0,38.0,110.0,0.345,25.0,...,19.7,13.5,-0.1,0.2,0.1,0.011,-3.1,-1.3,-4.4,-0.3
3,4,Steven Adams,25.0,80.0,80.0,2669.0,481.0,809.0,0.595,0.0,...,12.6,16.4,5.1,4.0,9.1,0.163,0.6,2.1,2.7,3.2
4,5,Bam Adebayo,21.0,82.0,28.0,1913.0,280.0,486.0,0.576,3.0,...,17.1,15.8,3.4,3.4,6.8,0.171,-0.6,3.6,3.0,2.4


#### Checking Normality of conditional distributions with Shapiro-Wilk Test

In [43]:
from scipy.stats import shapiro

not_gaussian_one = []
not_gaussian_zero = []

for i in player_17_18_df_cleaned.select_dtypes(include=['int','float']).columns.tolist():
    stat, p = shapiro(player_17_18_df_cleaned[player_17_18_df_cleaned.all_star_bool==1][i])
    stat2, p2 = shapiro(player_17_18_df_cleaned[player_17_18_df_cleaned.all_star_bool==0][i])
    if p <= 0.05:
        not_gaussian_one.append(i)
    if p2 <= 0.05:
        not_gaussian_zero.append(i)



In [44]:
not_gaussian_one

['G', 'GS', '3P%', 'ORB', 'TRB', 'BLK', 'TOV', 'ORB%', 'TRB%', 'BLK%']

In [45]:
not_gaussian_zero

['Age',
 'G',
 'GS',
 'MP',
 'FG',
 'FGA',
 '3P',
 '3PA',
 '2P',
 '2PA',
 'FT',
 'FTA',
 'ORB',
 'DRB',
 'TRB',
 'AST',
 'STL',
 'BLK',
 'TOV',
 'PF',
 'PTS',
 'PER',
 'ORB%',
 'DRB%',
 'TRB%',
 'AST%',
 'STL%',
 'BLK%',
 'USG%',
 'OWS',
 'DWS',
 'WS',
 'WS/48',
 'OBPM',
 'DBPM',
 'BPM',
 'VORP']

## Instagram Follower Numbers for Major NBA Players

In [15]:
NBA_insta = ['giannis_an34','stephencurry30', 'kingjames', 'jharden13', 'antdavis23',
 'joelembiid', 'ygtrece', 'damianlillard','karltowns', 'jimmybutler',
'kyrieirving', 'rudygobert27','russwest44',
 'bensimmons', 'blakegriffin23', '_kw15', 'bradbeal3', 'money23green', 'alhorford',
'lukadoncic', 'swipathefox','cp3', 'pskills43', 'jrue_holiday11', 'dbook', 'mac10', 'kyle_lowry7',
'k_mid22','spidadmitchell','aldridge_121', '3jmccollum','jaytatum0','jmglitxh27','kporzee','easymoneysniper', 
'demar_derozan', 'andredrummondd', 'kevinlove', 'tobiasharris', 'klaythompson', 'vicoladipo', 'turner_myles',
'danilogallogallinari', 'dloading', 'traeyoung','paulmilsap4', 'jcollins20_', 'stevenadams',  'marcgasol',
'malcolmbrogdon', 'ottodayporter22','buddylove242', 'joeingles7', 'thats_g_', 'thebledshow', 'carislavert',
'jrich_0', 'fchwpo', 'louwillville', 'youngamechanger', 'shai', 'zachlavine8', 'ccapela15', 'dsabonis11', 
'gordonhayward', '22wiggins', '1ngram4', 'ruuufio', 'fredvanvleet', 'dtaegraham', 'zo',  'deandreayton',
'zionwilliamson', 'jamorant', 'rjbarrett', 'nolimitherro', 'nunnbetter_', 'bam1of1', '23.savage23', 'hassanwhiteside','carmeloanthony']

In [16]:
import requests

NBA_insta_follower_num = []

for player in NBA_insta:

    user = player
    url = 'https://www.instagram.com/' + user
    r = requests.get(url).text

    start = '"edge_followed_by":{"count":'
    end = '},"followed_by_viewer"'

    NBA_insta_follower_num.append(r[r.find(start)+len(start):r.rfind(end)])

In [17]:
insta_df = pd.concat([pd.Series(NBA_insta), pd.Series(NBA_insta_follower_num)], axis=1)

In [18]:
insta_df.columns = ['player_ig_handle', 'follower_num']

In [19]:
guards = ['stephencurry30', 'jharden13', 'damianlillard', 'jimmybutler', 'kyrieirving', 'russwest44', 'bradbeal3', 'lukadoncic', 
'swipathefox', 'cp3', 'jrue_holiday11', 'dbook', 'mac10', 'kyle_lowry7', 'k_mid22', 'spidadmitchell', '3jmccollum', 
'jmglitxh27', 'demar_derozan', 'klaythompson', 'vicoladipo', 'dloading', 'traeyoung', 'malcolmbrogdon', 'buddylove242', 
'joeingles7', 'thebledshow', 'thats_g_', 'jrich_0', 'louwillville', 'shai', 'fchwpo', 'zachlavine8', 'youngamechanger', 
'1ngram4', 'zo', 'fredvanvleet', 'ruuufio', 'dtaegraham', 'jamorant', 'rjbarrett', 'nolimitherro', 'nunnbetter_', 'bensimmons']

In [20]:
forwards = ['giannis_an34', 'kingjames', 'antdavis23', 'blakegriffin23', '_kw15', 'ygtrece', 'money23green', 
            'alhorford', 'pskills43', 'aldridge_121', 'jaytatum0', 'kporzee', 'easymoneysniper', 'kevinlove', 'tobiasharris', 
            'danilogallogallinari', 'paulmilsap4', 'ottodayporter22', 'carislavert', 
            'zionwilliamson', 'carmeloanthony', 'gordonhayward', '22wiggins']

In [21]:
center = ['joelembiid', 'karltowns', 'rudygobert27', 'andredrummondd', 'turner_myles', 'jcollins20_', 'stevenadams', 'marcgasol', 
          'ccapela15', 'dsabonis11', 'deandreayton', '23.savage23', 'hassanwhiteside','bam1of1']

In [22]:
# Encoding "guard" players
insta_df['position'] = np.where(insta_df.player_ig_handle.isin(guards), 'guard', 'forward or center')

In [23]:
# Encoding "forward" players
insta_df['position'] = np.where(insta_df.player_ig_handle.isin(forwards), 'forward', insta_df.position)

In [24]:
# Encoding "center" players
insta_df['position'] = np.where(insta_df.player_ig_handle.isin(center), 'center', insta_df.position)

In [26]:
insta_df.head()

Unnamed: 0,player_ig_handle,follower_num,position
0,giannis_an34,6389945,forward
1,stephencurry30,27753089,guard
2,kingjames,53436120,forward
3,jharden13,10983329,guard
4,antdavis23,4845788,forward


In [25]:
# insta_df.to_csv("major_player_insta_follow_num.csv")