In [1]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind

In [2]:
nba = pd.read_excel('NBA Highschool & NBA_WIP.xlsx')
nba.head()

Unnamed: 0,Year,RK,PLAYER,POS_HS,HOMETOWN,HOMETOWN_,HT,WT,GRADE,SCHOOL,...,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP
0,2007,1,Kevin Love,C,"Lake Oswego, OR",Lake Oswego High School,6'9'',255.0,99,UCLA,...,10.3,28.8,10.6,3.7,14.3,0.245,7.2,1.2,8.4,7.3
1,2007,2,Eric Gordon,SG,"Indianapolis, IN",North Central High School,6'5'',205.0,98,INDIANA,...,12.8,23.2,2.0,0.5,2.5,0.058,1.2,-2.5,-1.2,0.4
2,2007,3,O.J. Mayo,SG,"Huntington, WV",Huntington High School,6'4'',195.0,98,USC,...,15.7,20.9,2.3,1.8,4.2,0.069,1.1,-0.7,0.4,1.7
3,2007,4,Kyle Singler,SF,"Medford, OR",Camden Catholic High School,6'8'',215.0,98,DUKE,...,10.4,14.0,3.6,0.7,4.4,0.09,0.7,-0.8,-0.1,1.1
4,2007,5,Derrick Rose,PG,"Chicago, IL",Simeon Career Academy,6'4'',195.0,98,MEMPHIS,...,16.0,31.5,-0.6,0.4,-0.2,-0.036,-2.8,-2.3,-5.1,-0.2


In [3]:
nba.columns

Index(['Year', 'RK', 'PLAYER', 'POS_HS', 'HOMETOWN', 'HOMETOWN_', 'HT', 'WT',
       'GRADE', 'SCHOOL', 'STATUS', 'Final Height', 'Pos', 'Age', 'Tm', 'G',
       'GS', 'MP', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA', '2P%',
       'eFG%', 'FT', 'FTA', 'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK',
       'TOV', 'PF', 'PTS', 'PER', 'TS%', '3PAr', 'FTr', 'ORB%', 'DRB%', 'TRB%',
       'AST%', 'STL%', 'BLK%', 'TOV%', 'USG%', 'OWS', 'DWS', 'WS', 'WS/48',
       'OBPM', 'DBPM', 'BPM', 'VORP'],
      dtype='object')

In [4]:
nba['HT']

0       6'9''
1       6'5''
2       6'4''
3       6'8''
4       6'4''
        ...  
395     6'6''
396     6'4''
397    6'11''
398     6'9''
399     6'6''
Name: HT, Length: 400, dtype: object

In [5]:
def Imperial_to_Metric(value):
    feet_inches = value.split("'")
    feet = int(feet_inches[0])
    inches = int(feet_inches[1].strip('"'))
    total_inches = feet * 12 + inches
    metric = total_inches*2.54 # conversion rate
    return metric

In [6]:
nba['HT_cm'] = nba['HT'].apply(Imperial_to_Metric)
nba['Final Height_cm'] = nba['Final Height'].apply(Imperial_to_Metric)
nba['Height Diff_cm'] = round(nba['Final Height_cm'] - nba['HT_cm'],2)

In [7]:
nba['Height Diff_cm'].value_counts()

0.00    267
2.54    108
5.08     25
Name: Height Diff_cm, dtype: int64

In [8]:
def Growth_Checker(value):
    if value > 0:
        return 'Yes'
    else:
        return 'No'

In [9]:
nba['Growth'] = nba['Height Diff_cm'].apply(Growth_Checker)

In [10]:
nba['Pos'].value_counts()

SG       96
PF       83
PG       74
C        72
SF       70
SF-PF     2
SF-SG     1
PF-C      1
SG-PG     1
Name: Pos, dtype: int64

In [11]:
def Position_Main(value):
    if value == 'PG':
        return 'Point Guard'
    elif value == 'SG':
        return 'Shooting Guard'
    elif value == 'SF':
        return 'Small Forward'
    elif value == 'PF':
        return 'Power Forward'
    elif value == 'C':
        return 'Center'
    elif value == 'SF-PF':
        return 'Small Forward'
    elif value == 'SF-SG':
        return 'Small Forward'
    elif value == 'PF-C':
        return 'Center'
    elif value == 'SG-PG':
        return 'Point Guard'

In [12]:
nba['Position'] = nba['Pos'].apply(Position_Main)

In [13]:
nba['Position'].value_counts()

Shooting Guard    96
Power Forward     83
Point Guard       75
Small Forward     73
Center            73
Name: Position, dtype: int64

In [14]:
def Size_Checker(value):
    if value == 'Center':
        return 'Big'
    elif value == 'Point Guard':
        return 'Short'
    elif value == 'Shooting Guard':
        return 'Short'
    else:
        return 'Medium'

In [15]:
nba['Size'] = nba['Position'].apply(Size_Checker)

In [16]:
def HT_group(value):
    if value < 190:
        return '[1.75 - 1.90]'
    if value < 205:
        return '[1.90 - 2.05]'
    else:
        return '[2.05 - 2.30]'

In [17]:
nba['Height Group'] = nba['Final Height_cm'].apply(HT_group)

In [18]:
nba['Height Group'].value_counts()

[1.90 - 2.05]    227
[2.05 - 2.30]    132
[1.75 - 1.90]     41
Name: Height Group, dtype: int64

In [19]:
nba.dtypes

Year                int64
RK                  int64
PLAYER             object
POS_HS             object
HOMETOWN           object
                   ...   
Height Diff_cm    float64
Growth             object
Position           object
Size               object
Height Group       object
Length: 67, dtype: object

In [20]:
col = '3PAr'
print(np.mean(nba.loc[(nba['Final Height_cm'] < 185),col]))
print(np.mean(nba.loc[(nba['Height Diff_cm'] == 0) & (nba['Final Height_cm'] < 185), col]))
print(np.mean(nba.loc[(nba['Height Diff_cm'] != 0) & (nba['Final Height_cm'] < 185), col]))

0.37163636363636365
0.42724999999999996
0.22333333333333336


In [21]:
nba_tall = nba[nba['Growth']=='No']
nba_short = nba[nba['Growth']=='Yes']

In [22]:
statistic, pvalue = ttest_ind(nba_short['3PAr'], nba_tall['3PAr'])
print(statistic, pvalue)

-1.3064616261374633 0.19215007775513718


In [23]:
Positions = list(nba['Position'].unique())
Positions

['Power Forward', 'Shooting Guard', 'Small Forward', 'Point Guard', 'Center']

In [24]:
Positions = ['Point Guard', 'Shooting Guard', 'Small Forward', 'Power Forward', 'Center']

In [25]:
nba_cols = ['3P%','2P%','eFG%','3PAr','AST%','STL%','BLK%','TRB%']

for col in nba_cols:
    print('-----',col)
    for P in Positions:    
        statistic, pvalue = ttest_ind(nba_short[nba_short['Position']==P][col], nba_tall[nba_tall['Position']==P][col])
        median_short = nba_short[nba_short['Position']==P][col].median()
        median_tall = nba_tall[nba_tall['Position']==P][col].median()
        print(P)
        print('pvalue: ',pvalue)
        print('median tall vs short: ',median_tall,' vs ',median_short)

        std_dev_short = np.sqrt(np.sum((nba_short[nba_short['Position']==P][col] -
                                        nba_short[nba_short['Position']==P][col].mean())**2) /
                                (len(nba_short[nba_short['Position']==P][col]) - 1))

        std_dev_tall = np.sqrt(np.sum((nba_tall[nba_tall['Position']==P][col] -
                                        nba_tall[nba_tall['Position']==P][col].mean())**2) /
                                (len(nba_tall[nba_tall['Position']==P][col]) - 1))
        print('std_dev tall vs short: ', std_dev_tall,' vs ',std_dev_short)
        print("")

----- 3P%
Point Guard
pvalue:  0.032494131243918284
median tall vs short:  0.344  vs  0.25
std_dev tall vs short:  0.15715952357388255  vs  0.1500720905196605

Shooting Guard
pvalue:  0.11623558447665336
median tall vs short:  0.347  vs  0.347
std_dev tall vs short:  0.10678249519505491  vs  0.15332996949538125

Small Forward
pvalue:  0.126608485829272
median tall vs short:  0.332  vs  0.312
std_dev tall vs short:  0.15308354395091403  vs  0.1729022091040004

Power Forward
pvalue:  0.039016989133049594
median tall vs short:  0.3175  vs  0.2
std_dev tall vs short:  0.18118699442303232  vs  0.16644552850741878

Center
pvalue:  0.6043910732434864
median tall vs short:  0.066  vs  0.0
std_dev tall vs short:  0.18520421853754457  vs  0.1812064576747482

----- 2P%
Point Guard
pvalue:  0.32411139433892844
median tall vs short:  0.4475  vs  0.441
std_dev tall vs short:  0.14201955443353675  vs  0.16728196959906103

Shooting Guard
pvalue:  0.22321427051126816
median tall vs short:  0.474  vs  0

In [26]:
nba.to_excel('NBA Highschool & NBA_final.xlsx')