In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import ensemble, model_selection, preprocessing, tree

In [2]:
# Dataset that contains NBA players stats from 1950 - 2017 (67 seasons)

nbaStats = pd.read_csv("./nba-players-stats/Seasons_Stats.csv")

In [3]:
# Let's take a look at our raw data

nbaStats

Unnamed: 0,Year,Player,Pos,Age,Tm,G,GS,MP,PER,TS%,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1950.0,Curly Armstrong,G-F,31.0,FTW,63.0,,,,0.368,...,0.705,,,,176.0,,,,217.0,458.0
1,1950.0,Cliff Barker,SG,29.0,INO,49.0,,,,0.435,...,0.708,,,,109.0,,,,99.0,279.0
2,1950.0,Leo Barnhorst,SF,25.0,CHS,67.0,,,,0.394,...,0.698,,,,140.0,,,,192.0,438.0
3,1950.0,Ed Bartels,F,24.0,TOT,15.0,,,,0.312,...,0.559,,,,20.0,,,,29.0,63.0
4,1950.0,Ed Bartels,F,24.0,DNN,13.0,,,,0.308,...,0.548,,,,20.0,,,,27.0,59.0
5,1950.0,Ed Bartels,F,24.0,NYK,2.0,,,,0.376,...,0.667,,,,0.0,,,,2.0,4.0
6,1950.0,Ralph Beard,G,22.0,INO,60.0,,,,0.422,...,0.762,,,,233.0,,,,132.0,895.0
7,1950.0,Gene Berce,G-F,23.0,TRI,3.0,,,,0.275,...,0.000,,,,2.0,,,,6.0,10.0
8,1950.0,Charlie Black,F-C,28.0,TOT,65.0,,,,0.346,...,0.651,,,,163.0,,,,273.0,661.0
9,1950.0,Charlie Black,F-C,28.0,FTW,36.0,,,,0.362,...,0.632,,,,75.0,,,,140.0,382.0


In [4]:
#Since the NBA first adopted the 3 point line in 1979. We will drop the data where ['Year'] is less than 1979.

nbaStats_3ptAdded = nbaStats[nbaStats["Year"] >= 1979] 

In [5]:
# -------------------- Data Wrangling -------------------- #
# Filling all NaN inputs with 0
# Converting all the inputs of the coloumns 'Year', 'Age', 'G', and 'GS' to int.

nbaStats_3ptAdded = nbaStats_3ptAdded.fillna(0)
nbaStats_3ptAdded[['Year', 'Age', 'G', 'GS', 'MP', 'FG', 'FGA', '3P', '3PA', '2P', '2PA', 'FT', 'FTA', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']] = nbaStats_3ptAdded[['Year', 'Age', 'G', 'GS', 'MP', 'FG', 'FGA', '3P', '3PA', '2P', '2PA', 'FT', 'FTA', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']].astype(int) 

In [6]:
# Now we are done preprocessing our dataset. 

nbaStats_3ptAdded

Unnamed: 0,Year,Player,Pos,Age,Tm,G,GS,MP,PER,TS%,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
5382,1979,Kareem Abdul-Jabbar*,C,31,LAL,80,0,3157,25.5,0.612,...,0.736,207,818,1025,431,76,316,282,230,1903
5383,1979,Tom Abernethy,PF,24,GSW,70,0,1219,13.6,0.550,...,0.745,74,142,216,79,39,13,32,133,422
5384,1979,Alvan Adams,C,24,PHO,77,0,2364,20.4,0.570,...,0.799,220,485,705,360,110,63,279,246,1369
5385,1979,Lucius Allen,PG,31,KCK,31,0,413,8.7,0.416,...,0.576,14,32,46,44,21,6,30,52,157
5386,1979,Kim Anderson,SF,23,POR,21,0,224,3.0,0.353,...,0.536,17,28,45,15,4,5,22,42,63
5387,1979,Tiny Archibald*,PG,30,BOS,69,0,1662,13.1,0.537,...,0.788,25,78,103,324,55,6,197,132,760
5388,1979,Tate Armstrong,SG,23,CHI,26,0,259,6.5,0.436,...,0.769,7,13,20,31,10,0,21,22,66
5389,1979,Dennis Awtrey,C,30,TOT,63,0,746,6.7,0.490,...,0.732,42,109,151,69,16,13,52,106,129
5390,1979,Dennis Awtrey,C,30,BOS,23,0,247,5.4,0.473,...,0.800,13,34,47,20,3,6,21,37,50
5391,1979,Dennis Awtrey,C,30,SEA,40,0,499,7.3,0.501,...,0.694,29,75,104,49,13,7,31,69,79


In [7]:
# Example: Finding the player who has the most blocks in one season (From 1979 - 2017)
# nbaStats_3ptAdded['Player'][nbaStats_3ptAdded.BLK == nbaStats_3ptAdded.BLK.max()]

In [8]:
# We can use 'Player' as an index for our dataframe.

nbaStats_3ptAdded.set_index('Player', inplace=True)
nbaStats_3ptAdded

Unnamed: 0_level_0,Year,Pos,Age,Tm,G,GS,MP,PER,TS%,3PAr,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Kareem Abdul-Jabbar*,1979,C,31,LAL,80,0,3157,25.5,0.612,0.000,...,0.736,207,818,1025,431,76,316,282,230,1903
Tom Abernethy,1979,PF,24,GSW,70,0,1219,13.6,0.550,0.000,...,0.745,74,142,216,79,39,13,32,133,422
Alvan Adams,1979,C,24,PHO,77,0,2364,20.4,0.570,0.000,...,0.799,220,485,705,360,110,63,279,246,1369
Lucius Allen,1979,PG,31,KCK,31,0,413,8.7,0.416,0.000,...,0.576,14,32,46,44,21,6,30,52,157
Kim Anderson,1979,SF,23,POR,21,0,224,3.0,0.353,0.000,...,0.536,17,28,45,15,4,5,22,42,63
Tiny Archibald*,1979,PG,30,BOS,69,0,1662,13.1,0.537,0.000,...,0.788,25,78,103,324,55,6,197,132,760
Tate Armstrong,1979,SG,23,CHI,26,0,259,6.5,0.436,0.000,...,0.769,7,13,20,31,10,0,21,22,66
Dennis Awtrey,1979,C,30,TOT,63,0,746,6.7,0.490,0.000,...,0.732,42,109,151,69,16,13,52,106,129
Dennis Awtrey,1979,C,30,BOS,23,0,247,5.4,0.473,0.000,...,0.800,13,34,47,20,3,6,21,37,50
Dennis Awtrey,1979,C,30,SEA,40,0,499,7.3,0.501,0.000,...,0.694,29,75,104,49,13,7,31,69,79


In [9]:
# Say if we would like to take a look at the statistics of Michael Jordan's NBA career.

nbaStats_3ptAdded.loc['Michael Jordan*'] 

Unnamed: 0_level_0,Year,Pos,Age,Tm,G,GS,MP,PER,TS%,3PAr,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Michael Jordan*,1985,SG,21,CHI,82,82,3144,25.8,0.592,0.032,...,0.845,167,367,534,481,196,69,291,285,2313
Michael Jordan*,1986,SG,22,CHI,18,7,451,27.5,0.533,0.055,...,0.84,23,41,64,53,37,21,45,46,408
Michael Jordan*,1987,SG,23,CHI,82,82,3281,29.8,0.562,0.029,...,0.857,166,264,430,377,236,125,272,237,3041
Michael Jordan*,1988,SG,24,CHI,82,82,3311,31.7,0.603,0.027,...,0.841,139,310,449,485,259,131,252,270,2868
Michael Jordan*,1989,SG,25,CHI,81,81,3255,31.1,0.614,0.055,...,0.85,149,503,652,650,234,65,290,247,2633
Michael Jordan*,1990,SG,26,CHI,82,82,3197,31.2,0.606,0.125,...,0.848,143,422,565,519,227,54,247,241,2753
Michael Jordan*,1991,SG,27,CHI,82,82,3034,31.6,0.605,0.051,...,0.851,118,374,492,453,223,83,202,229,2580
Michael Jordan*,1992,SG,28,CHI,80,80,3102,27.7,0.579,0.055,...,0.832,91,420,511,489,182,75,200,201,2404
Michael Jordan*,1993,SG,29,CHI,78,78,3067,29.7,0.564,0.115,...,0.837,135,387,522,428,221,61,207,188,2541
Michael Jordan*,1995,SG,31,CHI,17,17,668,22.1,0.493,0.079,...,0.801,25,92,117,90,30,13,35,47,457


In [10]:
# First of all, we can group all the players of the same team together.

groupBy_Team = nbaStats_3ptAdded.groupby('Tm')

In [11]:
# We cannot use the command groupBy_Team to print out the dataframe grouped by 'Tm' (Team)
# To do that, we use the code below: 

for p, tm in groupBy_Team:
    print(p)
    print(tm)

ATL
                       Year Pos  Age   Tm   G  GS    MP   PER    TS%   3PAr  \
Player                                                                        
Charlie Criss          1979  PG   30  ATL  54   0   879   9.1  0.436  0.000   
John Drew              1979  SF   24  ATL  79   0  2410  21.7  0.536  0.000   
Terry Furlow           1979  SG   24  ATL  29   0   576  17.1  0.538  0.000   
Jack Givens            1979  SF   22  ATL  74   0  1347  13.0  0.457  0.000   
Steve Hawes            1979  PF   28  ATL  81   0  2205  15.0  0.523  0.000   
Keith Herron           1979  SG   22  ATL  14   0    81   9.3  0.372  0.000   
Armond Hill            1979  PG   25  ATL  82   0  2527  11.8  0.518  0.000   
Eddie Johnson          1979  SG   23  ATL  78   0  2413  16.7  0.561  0.000   
Butch Lee              1979  PG   22  ATL  49   0   997  12.2  0.516  0.000   
Tom McMillen           1979   C   26  ATL  82   0  1392  12.6  0.518  0.000   
Tree Rollins           1979   C   23  ATL  81   

                     Year Pos  Age   Tm   G  GS    MP   PER    TS%   3PAr  \
Player                                                                      
Darrell Allums       1981  PF   22  DAL  22   0   276   5.3  0.385  0.015   
Winford Boynes       1981  SG   23  DAL  44   0   757   6.1  0.426  0.000   
Marty Byrnes         1981  SF   24  DAL  72   0  1360  13.6  0.539  0.044   
Austin Carr          1981  SG   32  DAL   8   0    77  -1.9  0.269  0.000   
Brad Davis           1981  PG   25  DAL  56   0  1686  16.4  0.626  0.041   
Monti Davis          1981  PF   22  DAL   1   0     8  -5.4  0.081  0.000   
Ralph Drollinger     1981   C   26  DAL   6   0    67   5.3  0.476  0.000   
Terry Duerod         1981  SF   24  DAL  18   0   337  13.7  0.486  0.037   
Joe Hassett          1981  SG   25  DAL  17   0   280  11.6  0.467  0.282   
Geoff Huston         1981  PG   23  DAL  56   0  1892  13.4  0.516  0.005   
Abdul Jeelani        1981  SF   26  DAL  66   0  1108  15.5  0.515  0.002   

                   Year Pos  Age   Tm   G  GS    MP   PER    TS%   3PAr  ...   \
Player                                                                   ...    
Nick Anderson      2002  SG   34  MEM  15   0   219   4.9  0.375  0.632  ...    
Isaac Austin       2002   C   32  MEM  21   8   307   7.8  0.405  0.033  ...    
Shane Battier      2002  SF   23  MEM  78  78  3097  13.9  0.518  0.287  ...    
Rodney Buford      2002  SG   24  MEM  63  21  1769  10.1  0.470  0.118  ...    
Michael Dickerson  2002  SG   26  MEM   4   4   124   6.4  0.425  0.438  ...    
Isaac Fontaine     2002  SG   26  MEM   6   0    75  -0.9  0.331  0.357  ...    
Antonis Fotsis     2002  PF   20  MEM  28   1   320  11.3  0.479  0.221  ...    
Pau Gasol          2002   C   21  MEM  82  79  3007  19.5  0.566  0.005  ...    
Eddie Gill         2002  PG   23  MEM  23   5   384   9.2  0.531  0.239  ...    
Brevin Knight      2002  PG   26  MEM  53  11  1151  14.9  0.482  0.024  ...    
Grant Long         2002  PF 

                     Year Pos  Age   Tm   G  GS    MP   PER    TS%   3PAr  \
Player                                                                      
Chris Andersen       2006   C   27  NOK  32   2   570  14.1  0.562  0.000   
Brandon Bass         2006  PF   20  NOK  29   1   268   9.1  0.434  0.000   
P.J. Brown           2006   C   36  NOK  75  73  2380  12.7  0.515  0.000   
Rasual Butler        2006  SF   26  NOK  79  20  1875  10.8  0.501  0.382   
Speedy Claxton       2006  PG   27  NOK  71   3  2019  16.1  0.497  0.086   
Marcus Fizer         2006  PF   27  NOK   3   0    39  14.1  0.559  0.059   
Marc Jackson         2006   C   31  NOK  27  10   594  13.9  0.519  0.009   
Linton Johnson       2006  SF   25  NOK  27   7   489  10.5  0.476  0.259   
Maciej Lampe         2006   C   20  NOK   2   0    16  -6.1  0.000  0.000   
Arvydas Macijauskas  2006  SG   26  NOK  19   0   135   8.3  0.462  0.293   
Desmond Mason        2006  SF   28  NOK  70  55  2102   9.2  0.460  0.009   

                   Year Pos  Age   Tm   G  GS    MP   PER    TS%   3PAr  ...   \
Player                                                                   ...    
Kim Anderson       1979  SF   23  POR  21   0   224   3.0  0.353  0.000  ...    
Ron Brewer         1979  SG   23  POR  81   0  2454  14.2  0.544  0.000  ...    
T.R. Dunn          1979  SG   23  POR  80   0  1828  12.3  0.496  0.000  ...    
Bob Gross          1979  SF   25  POR  53   0  1441  14.0  0.519  0.000  ...    
Lionel Hollins     1979  PG   25  POR  64   0  1967  14.0  0.496  0.000  ...    
Clemon Johnson     1979   C   22  POR  74   0   794  12.1  0.481  0.000  ...    
Maurice Lucas      1979  PF   26  POR  69   0  2462  18.4  0.517  0.000  ...    
Jim McMillian      1979  SF   30  POR  23   0   278  11.9  0.499  0.000  ...    
Lloyd Neal         1979  PF   28  POR   4   0    48  -0.6  0.393  0.000  ...    
Tom Owens          1979   C   29  POR  82   0  2791  19.9  0.597  0.000  ...    
Willie Smith       1979  PG 

                   Year Pos  Age   Tm   G  GS    MP   PER    TS%   3PAr  ...   \
Player                                                                   ...    
Calbert Cheaney    1998  SG   26  WAS  82  82  2841  11.7  0.488  0.054  ...    
Terry Davis        1998  PF   30  WAS  74  66  1705  10.5  0.524  0.004  ...    
Ledell Eackles     1998  SG   31  WAS  42   0   547  12.2  0.542  0.263  ...    
Harvey Grant       1998  SF   32  WAS  65   8   895   7.5  0.406  0.031  ...    
Darvin Ham         1998  SF   24  WAS  71   3   635   9.2  0.531  0.000  ...    
Juwan Howard       1998  PF   24  WAS  64  64  2559  15.2  0.515  0.002  ...    
Tim Legler         1998  SG   31  WAS   8   0    76  -4.5  0.217  0.316  ...    
Lawrence Moten     1998  SG   25  WAS   8   0    27  -0.2  0.305  0.077  ...    
Tracy Murray       1998  SF   26  WAS  82  12  2227  17.3  0.563  0.400  ...    
Jimmy Oliver       1998  SG   28  WAS   1   0    10  19.7  0.625  0.500  ...    
God Shammgod       1998  PG 

In [13]:
# Accessing the group object of team'ATL', 

groupBy_Team.get_group('ATL')

Unnamed: 0_level_0,Year,Pos,Age,Tm,G,GS,MP,PER,TS%,3PAr,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Charlie Criss,1979,PG,30,ATL,54,0,879,9.1,0.436,0.000,...,0.779,19,41,60,138,41,3,79,70,285
John Drew,1979,SF,24,ATL,79,0,2410,21.7,0.536,0.000,...,0.731,225,297,522,119,128,16,211,332,1795
Terry Furlow,1979,SG,24,ATL,29,0,576,17.1,0.538,0.000,...,0.857,32,39,71,81,18,13,47,42,286
Jack Givens,1979,SF,22,ATL,74,0,1347,13.0,0.457,0.000,...,0.756,98,116,214,83,72,17,75,121,570
Steve Hawes,1979,PF,28,ATL,81,0,2205,15.0,0.523,0.000,...,0.818,190,401,591,184,79,47,145,264,852
Keith Herron,1979,SG,22,ATL,14,0,81,9.3,0.372,0.000,...,0.923,4,6,10,3,6,2,5,11,40
Armond Hill,1979,PG,25,ATL,82,0,2527,11.8,0.518,0.000,...,0.854,41,123,164,480,102,16,202,292,838
Eddie Johnson,1979,SG,23,ATL,78,0,2413,16.7,0.561,0.000,...,0.832,65,105,170,360,121,11,213,241,1245
Butch Lee,1979,PG,22,ATL,49,0,997,12.2,0.516,0.000,...,0.752,11,48,59,169,56,1,96,88,376
Tom McMillen,1979,C,26,ATL,82,0,1392,12.6,0.518,0.000,...,0.891,131,201,332,69,15,32,87,211,570
