In [48]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

import os

In [49]:
dataframes_path = os.path.join(os.getcwd(), 'dataframes')

In [50]:
df = pd.read_pickle(os.path.join(dataframes_path, 'complete_stats.pkl'))

In [51]:
df.info(verbose = True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21655 entries, 0 to 21654
Data columns (total 138 columns):
 #    Column        Dtype  
---   ------        -----  
 0    Rk            int64  
 1    Player        object 
 2    Pos           object 
 3    Age           int64  
 4    Tm            object 
 5    G             int64  
 6    GS            int64  
 7    MP_pg         float64
 8    FG_pg         float64
 9    FGA_pg        float64
 10   FG%           float64
 11   3P_pg         float64
 12   3PA_pg        float64
 13   3P%           float64
 14   2P_pg         float64
 15   2PA_pg        float64
 16   2P%           float64
 17   eFG%          float64
 18   FT_pg         float64
 19   FTA_pg        float64
 20   FT%           float64
 21   ORB_pg        float64
 22   DRB_pg        float64
 23   TRB_pg        float64
 24   AST_pg        float64
 25   STL_pg        float64
 26   BLK_pg        float64
 27   TOV_pg        float64
 28   PF_pg         float64
 29   PTS_pg        fl

In [52]:
def check_rk_season(df, rk_season_pairs):
    return df.apply(lambda x: (x['Rk'], x['Season']) in rk_season_pairs, axis = 1)

In [53]:
def drop_players_multiteams(df):
    df_tot = df[df['Tm'] == 'TOT']
    rk_season_pairs = list(zip(df_tot['Rk'], df_tot['Season']))
    df_tot_full = df[check_rk_season(df, rk_season_pairs)]
    drop_index = df_tot_full[df_tot_full['Tm'] != 'TOT'].index
    return df.drop(drop_index).reset_index(drop = True)

In [54]:
df = drop_players_multiteams(df)

In [55]:
df.describe()

Unnamed: 0,Rk,Age,G,GS,MP_pg,FG_pg,FGA_pg,FG%,3P_pg,3PA_pg,...,WS/48_rank,OBPM_rank,DBPM_rank,BPM_rank,VORP_rank,%W_rank,%GS,Votes,Share,MaxVotes
count,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0,...,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0
mean,216.510321,26.659221,54.10004,25.148448,20.510897,3.210355,7.039173,0.44175,0.448504,1.299434,...,109.805293,64.566194,39.448001,71.441215,42.7199,28.826613,0.369406,6.415597,0.006084,1070.038851
std,130.32204,4.07873,25.217314,29.720575,10.104615,2.284105,4.709932,0.094254,0.636592,1.682514,...,56.590689,28.557129,14.085333,32.326389,12.282598,22.198109,0.454062,64.30399,0.059236,208.777846
min,1.0,18.0,1.0,-10.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,-10.0,0.0,0.0,221.0
25%,106.0,23.0,35.0,0.0,12.1,1.4,3.3,0.405,0.0,0.0,...,68.0,45.0,31.0,50.0,37.0,10.0,0.0,0.0,0.0,980.0
50%,211.0,26.0,62.0,10.0,20.0,2.7,5.9,0.446,0.1,0.5,...,106.0,63.0,39.0,70.0,46.0,24.0,0.195,0.0,0.0,1150.0
75%,317.5,29.0,77.0,51.0,29.0,4.6,9.9,0.488,0.7,2.1,...,148.0,82.0,48.0,91.0,51.0,44.0,0.8395,0.0,0.0,1230.0
max,540.0,44.0,85.0,83.0,43.7,13.4,27.8,1.0,5.3,13.2,...,287.0,164.0,98.0,183.0,69.0,98.0,1.0,1310.0,1.0,1310.0


In [56]:
df_with_votes = df[df['Share'] > 0]

In [57]:
df_with_votes.describe()

Unnamed: 0,Rk,Age,G,GS,MP_pg,FG_pg,FGA_pg,FG%,3P_pg,3PA_pg,...,WS/48_rank,OBPM_rank,DBPM_rank,BPM_rank,VORP_rank,%W_rank,%GS,Votes,Share,MaxVotes
count,686.0,686.0,686.0,686.0,686.0,686.0,686.0,686.0,686.0,686.0,...,686.0,686.0,686.0,686.0,686.0,686.0,686.0,686.0,686.0,686.0
mean,202.419825,27.488338,74.517493,68.970845,36.200729,8.15,16.559475,0.494496,0.774052,2.184694,...,25.498542,16.482507,28.794461,17.657434,11.59621,10.373178,0.925109,165.374636,0.156824,1026.98105
std,116.055933,3.734903,9.645489,21.422255,3.098102,1.892527,3.890346,0.045793,0.946402,2.446809,...,23.039652,12.784188,12.87934,13.216363,9.298391,10.753133,0.258376,283.568755,0.258654,222.056667
min,1.0,19.0,17.0,-10.0,21.6,2.0,3.7,0.384,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,-0.27,1.0,0.001,221.0
25%,114.5,25.0,72.0,68.0,34.5,6.9,13.9,0.463,0.0,0.1,...,9.0,7.0,19.0,8.0,4.0,3.0,1.0,3.0,0.004,800.0
50%,200.5,27.0,78.0,77.0,36.6,8.3,16.8,0.492,0.3,1.2,...,18.0,13.0,28.5,14.0,9.0,7.0,1.0,22.0,0.021,1030.0
75%,294.75,30.0,81.0,81.0,38.2,9.5,19.2,0.52275,1.3,3.775,...,34.0,24.0,38.0,25.0,17.0,14.0,1.0,182.25,0.17675,1210.0
max,517.0,38.0,82.0,82.0,43.7,13.4,27.8,0.675,5.3,13.2,...,139.0,73.0,65.0,64.0,46.0,73.0,1.0,1310.0,1.0,1310.0


In [58]:
df_with_votes[df_with_votes['G'] < 50]

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP_pg,FG_pg,FGA_pg,...,WS/48_rank,OBPM_rank,DBPM_rank,BPM_rank,VORP_rank,%W_rank,%GS,Votes,Share,MaxVotes
420,134,George Johnson,PF,24,IND,43,-10,21.6,4.2,9.2,...,59,40,30,41,39,14,-0.233,3,0.004,690
423,137,Magic Johnson,SG,21,LAL,37,-10,37.1,8.4,15.9,...,5,2,3,1,9,4,-0.27,18,0.026,690
3909,331,Isiah Thomas,PG,29,DET,48,46,34.5,6.0,13.9,...,87,31,33,29,33,11,0.958,11,0.011,960
3998,33,Larry Bird,SF,35,BOS,45,45,36.9,7.8,16.8,...,30,12,23,12,21,9,1.0,3,0.003,960
4448,97,Clyde Drexler,SG,30,POR,49,49,34.1,7.1,16.7,...,20,12,18,9,19,8,1.0,1,0.001,980
5330,186,Michael Jordan,SG,31,CHI,17,17,39.3,9.8,23.8,...,29,16,33,21,41,14,1.0,12,0.011,1050
5453,309,Dennis Rodman,PF,33,SAS,49,26,32.0,2.8,4.9,...,22,34,35,38,37,1,0.531,9,0.009,1050
5737,186,Magic Johnson,PF,36,LAL,32,9,29.9,4.3,9.2,...,22,12,33,17,36,10,0.281,8,0.007,1130
6728,307,Scottie Pippen,SF,32,CHI,44,44,37.5,7.2,16.0,...,15,16,22,14,23,1,1.0,14,0.012,1160
6991,131,Kevin Garnett,PF,22,MIN,47,47,37.9,8.8,19.1,...,59,22,25,19,6,17,1.0,9,0.008,1180
