In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import ensemble, model_selection, preprocessing, tree

In [2]:
# Dataset contains NBA players stats from 1950 - 2017 (67 seasons)

nbaStats = pd.read_csv("/Users/ericleung/Desktop/Comp/Project/NBA (Data Science)/nba-players-stats/Seasons_Stats.csv")

In [3]:
# Let's take a look at our raw data

nbaStats

Unnamed: 0.1,Unnamed: 0,Year,Player,Pos,Age,Tm,G,GS,MP,PER,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,0,1950.0,Curly Armstrong,G-F,31.0,FTW,63.0,,,,...,0.705,,,,176.0,,,,217.0,458.0
1,1,1950.0,Cliff Barker,SG,29.0,INO,49.0,,,,...,0.708,,,,109.0,,,,99.0,279.0
2,2,1950.0,Leo Barnhorst,SF,25.0,CHS,67.0,,,,...,0.698,,,,140.0,,,,192.0,438.0
3,3,1950.0,Ed Bartels,F,24.0,TOT,15.0,,,,...,0.559,,,,20.0,,,,29.0,63.0
4,4,1950.0,Ed Bartels,F,24.0,DNN,13.0,,,,...,0.548,,,,20.0,,,,27.0,59.0
5,5,1950.0,Ed Bartels,F,24.0,NYK,2.0,,,,...,0.667,,,,0.0,,,,2.0,4.0
6,6,1950.0,Ralph Beard,G,22.0,INO,60.0,,,,...,0.762,,,,233.0,,,,132.0,895.0
7,7,1950.0,Gene Berce,G-F,23.0,TRI,3.0,,,,...,0.000,,,,2.0,,,,6.0,10.0
8,8,1950.0,Charlie Black,F-C,28.0,TOT,65.0,,,,...,0.651,,,,163.0,,,,273.0,661.0
9,9,1950.0,Charlie Black,F-C,28.0,FTW,36.0,,,,...,0.632,,,,75.0,,,,140.0,382.0


In [4]:
#Since the NBA first adopted the 3 point line in 1979. We will drop the data where ['Year'] is less than 1979.

nbaStats_3ptAdded = nbaStats[nbaStats["Year"] >= 1979] 

In [5]:
# -------------------- Data Wrangling -------------------- #
# Filling all NaN inputs with 0
# Converting all the inputs of the coloumns 'Year', 'Age', 'G', and 'GS' to int.

nbaStats_3ptAdded = nbaStats_3ptAdded.fillna(0)
nbaStats_3ptAdded[['Year', 'Age', 'G', 'GS', 'MP', 'FG', 'FGA', '3P', '3PA', '2P', '2PA', 'FT', 'FTA', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']] = nbaStats_3ptAdded[['Year', 'Age', 'G', 'GS', 'MP', 'FG', 'FGA', '3P', '3PA', '2P', '2PA', 'FT', 'FTA', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']].astype(int) 


In [6]:
# Now we are done preprocessing our dataset. 

nbaStats_3ptAdded

Unnamed: 0.1,Unnamed: 0,Year,Player,Pos,Age,Tm,G,GS,MP,PER,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
5382,5382,1979,Kareem Abdul-Jabbar*,C,31,LAL,80,0,3157,25.5,...,0.736,207,818,1025,431,76,316,282,230,1903
5383,5383,1979,Tom Abernethy,PF,24,GSW,70,0,1219,13.6,...,0.745,74,142,216,79,39,13,32,133,422
5384,5384,1979,Alvan Adams,C,24,PHO,77,0,2364,20.4,...,0.799,220,485,705,360,110,63,279,246,1369
5385,5385,1979,Lucius Allen,PG,31,KCK,31,0,413,8.7,...,0.576,14,32,46,44,21,6,30,52,157
5386,5386,1979,Kim Anderson,SF,23,POR,21,0,224,3.0,...,0.536,17,28,45,15,4,5,22,42,63
5387,5387,1979,Tiny Archibald*,PG,30,BOS,69,0,1662,13.1,...,0.788,25,78,103,324,55,6,197,132,760
5388,5388,1979,Tate Armstrong,SG,23,CHI,26,0,259,6.5,...,0.769,7,13,20,31,10,0,21,22,66
5389,5389,1979,Dennis Awtrey,C,30,TOT,63,0,746,6.7,...,0.732,42,109,151,69,16,13,52,106,129
5390,5390,1979,Dennis Awtrey,C,30,BOS,23,0,247,5.4,...,0.800,13,34,47,20,3,6,21,37,50
5391,5391,1979,Dennis Awtrey,C,30,SEA,40,0,499,7.3,...,0.694,29,75,104,49,13,7,31,69,79


In [16]:
nbaStats_3ptAdded.set_index('Player')

Unnamed: 0_level_0,Unnamed: 0,Year,Pos,Age,Tm,G,GS,MP,PER,TS%,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Kareem Abdul-Jabbar*,5382,1979,C,31,LAL,80,0,3157,25.5,0.612,...,0.736,207,818,1025,431,76,316,282,230,1903
Tom Abernethy,5383,1979,PF,24,GSW,70,0,1219,13.6,0.550,...,0.745,74,142,216,79,39,13,32,133,422
Alvan Adams,5384,1979,C,24,PHO,77,0,2364,20.4,0.570,...,0.799,220,485,705,360,110,63,279,246,1369
Lucius Allen,5385,1979,PG,31,KCK,31,0,413,8.7,0.416,...,0.576,14,32,46,44,21,6,30,52,157
Kim Anderson,5386,1979,SF,23,POR,21,0,224,3.0,0.353,...,0.536,17,28,45,15,4,5,22,42,63
Tiny Archibald*,5387,1979,PG,30,BOS,69,0,1662,13.1,0.537,...,0.788,25,78,103,324,55,6,197,132,760
Tate Armstrong,5388,1979,SG,23,CHI,26,0,259,6.5,0.436,...,0.769,7,13,20,31,10,0,21,22,66
Dennis Awtrey,5389,1979,C,30,TOT,63,0,746,6.7,0.490,...,0.732,42,109,151,69,16,13,52,106,129
Dennis Awtrey,5390,1979,C,30,BOS,23,0,247,5.4,0.473,...,0.800,13,34,47,20,3,6,21,37,50
Dennis Awtrey,5391,1979,C,30,SEA,40,0,499,7.3,0.501,...,0.694,29,75,104,49,13,7,31,69,79


In [15]:
# nbaStats_3ptAdded['Player'][nbaStats_3ptAdded.BLK == nbaStats_3ptAdded.BLK.max()]

7638    Mark Eaton
Name: Player, dtype: object