# Capston Project 1
## Predicting NBA Scores (and Winners)

#### By: 		Justin Huang
#### Date: 		Sept 15, 2020

# Notebook Description

The purpose of this notebook was to collect and process data for the "missing" / NaN values:
- 2002 season data needed for initial 2003 games (i.e. home/away records)
- 2003 season was missing CHA (league only had 29 teams, not 30) so used the league median for that season as a substitute.



In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('_data_out/df.csv', index_col=0)

In [3]:
df.columns[:67]

Index(['GAME_ID', 'GAME_TYPE', 'GAME_TYPE_CODE', 'GAME_DATE_EST', 'SEASON',
       'dt_MONTH', 'dt_YEAR', 'TEAM_ID', 'TEAM_ABBR', 'TEAM_CITY',
       'TEAM_NICKNAME', 'PTS', 'AST', 'REB', 'OREB', 'DREB', 'FGM', 'FGA',
       'FG3M', 'FG3A', 'FTM', 'FTA', 'TO', 'PF', 'FG_PCT', 'FT_PCT', 'FG3_PCT',
       'TEAM_ID_opp', 'TEAM_ABBR_opp', 'TEAM_CITY_opp', 'TEAM_NICKNAME_opp',
       'PTS_opp', 'AST_opp', 'REB_opp', 'OREB_opp', 'DREB_opp', 'FGM_opp',
       'FGA_opp', 'FG3M_opp', 'FG3A_opp', 'FTM_opp', 'FTA_opp', 'TO_opp',
       'PF_opp', 'FG_PCT_opp', 'FT_PCT_opp', 'FG3_PCT_opp', 'HOME_TEAM_WINS',
       'HomeAway', 'IsHomeTeam', 'WINS', 'd_PTS', 'd_AST', 'd_REB', 'd_OREB',
       'd_DREB', 'd_FGM', 'd_FGA', 'd_FG3M', 'd_FG3A', 'd_FTM', 'd_FTA',
       'd_TO', 'd_PF', 'GAME_ID_opp', 'GAMES', 'WINS_HOME'],
      dtype='object')

In [4]:
df = df[df.columns[:67]]

In [5]:
df_grp = df.groupby(['SEASON', 'GAME_TYPE_CODE', 'TEAM_ABBR']).sum().reset_index()

In [6]:
df_grp['SEASON_AND1'] = df_grp['SEASON'] + 1

In [7]:
df_grp

Unnamed: 0,SEASON,GAME_TYPE_CODE,TEAM_ABBR,GAME_ID,dt_MONTH,dt_YEAR,TEAM_ID,PTS,AST,REB,...,d_FG3M,d_FG3A,d_FTM,d_FTA,d_TO,d_PF,GAME_ID_opp,GAMES,WINS_HOME,SEASON_AND1
0,2003,2,ATL,1664647661,489,164295,132070244434,7611.0,1648.0,3503.0,...,-63.0,-96.0,-40.0,-96.0,192.0,127.0,1664647661,82,18,2004
1,2003,2,BKN,1664649681,474,164297,132070245582,7401.0,2009.0,3335.0,...,-40.0,-74.0,13.0,35.0,-90.0,-21.0,1664649681,82,28,2004
2,2003,2,BOS,1664647465,488,164295,132070244516,7811.0,1683.0,3291.0,...,-2.0,65.0,-15.0,-61.0,-45.0,18.0,1664647465,82,19,2004
3,2003,2,CHI,1664648632,469,164297,132070244762,7355.0,1793.0,3567.0,...,-9.0,24.0,-347.0,-410.0,58.0,225.0,1664648632,82,14,2004
4,2003,2,CLE,1664648536,482,164296,132070244598,7619.0,1808.0,3737.0,...,-184.0,-376.0,37.0,40.0,149.0,87.0,1664648536,82,23,2004
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
504,2019,2,SAC,1314027424,424,121166,96636765480,6498.0,1397.0,2544.0,...,32.0,41.0,-202.0,-207.0,-55.0,150.0,1314027424,60,13,2020
505,2019,2,SAS,1270225843,397,117128,93415540022,6546.0,1423.0,2611.0,...,-146.0,-370.0,59.0,20.0,-22.0,-30.0,1270225843,58,15,2020
506,2019,2,TOR,1314027188,423,121166,96636765660,6768.0,1530.0,2718.0,...,43.0,-88.0,5.0,-48.0,-142.0,101.0,1314027188,60,23,2020
507,2019,2,UTA,1292126393,409,119147,95026152958,6566.0,1299.0,2688.0,...,117.0,145.0,95.0,119.0,176.0,-46.0,1292126393,59,21,2020


In [8]:
df_grp['WIN_pct'] = (df_grp['WINS'] / df_grp['GAMES'])

df_grp['WIN_pct_at_HOME'] = (df_grp['WINS_HOME'] /  df_grp['IsHomeTeam'])

df_grp['WINS_at_AWAY'] = df_grp['WINS'] - df_grp['WINS_HOME']

df_grp['GAMES_at_AWAY'] = df_grp['GAMES'] - df_grp['IsHomeTeam'] 

df_grp['WIN_pct_at_AWAY'] = df_grp['WINS_at_AWAY'] /  df_grp['GAMES_at_AWAY']


In [9]:
df_grp['d_PTS_per_GAME'] = (df_grp['PTS'] - df_grp['PTS_opp']) / df_grp['GAMES']

In [10]:
df_grp['FGM_per_GAME'] = df_grp['FGM'] / df_grp['GAMES']
df_grp['FGA_per_GAME'] = (df_grp['FGA']) / df_grp['GAMES']

In [11]:
df_grp['PTS_per_GAME'] = (df_grp['PTS']) / df_grp['GAMES']
df_grp['PTS_opp_per_GAME'] = (df_grp['PTS_opp']) / df_grp['GAMES']


In [12]:
df_grp

Unnamed: 0,SEASON,GAME_TYPE_CODE,TEAM_ABBR,GAME_ID,dt_MONTH,dt_YEAR,TEAM_ID,PTS,AST,REB,...,WIN_pct,WIN_pct_at_HOME,WINS_at_AWAY,GAMES_at_AWAY,WIN_pct_at_AWAY,d_PTS_per_GAME,FGM_per_GAME,FGA_per_GAME,PTS_per_GAME,PTS_opp_per_GAME
0,2003,2,ATL,1664647661,489,164295,132070244434,7611.0,1648.0,3503.0,...,0.341463,0.439024,10,41,0.243902,-4.646341,34.500000,79.621951,92.817073,97.463415
1,2003,2,BKN,1664649681,474,164297,132070245582,7401.0,2009.0,3335.0,...,0.573171,0.682927,19,41,0.463415,2.500000,34.231707,77.487805,90.256098,87.756098
2,2003,2,BOS,1664647465,488,164295,132070244516,7811.0,1683.0,3291.0,...,0.439024,0.463415,17,41,0.414634,-1.451220,34.670732,78.231707,95.256098,96.707317
3,2003,2,CHI,1664648632,469,164297,132070244762,7355.0,1793.0,3567.0,...,0.280488,0.341463,9,41,0.219512,-6.353659,34.121951,82.353659,89.695122,96.048780
4,2003,2,CLE,1664648536,482,164296,132070244598,7619.0,1808.0,3737.0,...,0.426829,0.560976,12,41,0.292683,-2.621951,35.634146,82.353659,92.914634,95.536585
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
504,2019,2,SAC,1314027424,424,121166,96636765480,6498.0,1397.0,2544.0,...,0.433333,0.464286,13,32,0.406250,-2.000000,40.133333,87.983333,108.300000,110.300000
505,2019,2,SAS,1270225843,397,117128,93415540022,6546.0,1423.0,2611.0,...,0.431034,0.535714,10,30,0.333333,-1.637931,42.051724,89.689655,112.862069,114.500000
506,2019,2,TOR,1314027188,423,121166,96636765660,6768.0,1530.0,2718.0,...,0.700000,0.718750,19,28,0.678571,6.366667,40.683333,88.833333,112.800000,106.433333
507,2019,2,UTA,1292126393,409,119147,95026152958,6566.0,1299.0,2688.0,...,0.627119,0.700000,16,29,0.551724,3.050847,40.203390,84.898305,111.288136,108.237288


In [13]:
df_grp['POS'] = 0.96 * ((df_grp['FGA']) + (df_grp['TO']) + (0.44 * df_grp['FTA']) - (df_grp['OREB']))
df_grp['POS_opp'] = 0.96 * ((df_grp['FGA_opp']) + (df_grp['TO_opp']) + (0.44 * df_grp['FTA_opp']) - (df_grp['OREB_opp']))

In [14]:
df_grp['OFF_EFF'] = 100 * (df_grp['PTS'] / df_grp['POS'])
df_grp['OFF_EFF_opp'] = 100 * (df_grp['PTS_opp'] / df_grp['POS_opp'])

df_grp['DEF_EFF'] = 100 * (df_grp['PTS_opp'] / df_grp['POS'])
df_grp['DEF_EFF_opp'] = 100 * (df_grp['PTS'] / df_grp['POS_opp'])

In [15]:
df_grp.columns

Index(['SEASON', 'GAME_TYPE_CODE', 'TEAM_ABBR', 'GAME_ID', 'dt_MONTH',
       'dt_YEAR', 'TEAM_ID', 'PTS', 'AST', 'REB', 'OREB', 'DREB', 'FGM', 'FGA',
       'FG3M', 'FG3A', 'FTM', 'FTA', 'TO', 'PF', 'FG_PCT', 'FT_PCT', 'FG3_PCT',
       'TEAM_ID_opp', 'PTS_opp', 'AST_opp', 'REB_opp', 'OREB_opp', 'DREB_opp',
       'FGM_opp', 'FGA_opp', 'FG3M_opp', 'FG3A_opp', 'FTM_opp', 'FTA_opp',
       'TO_opp', 'PF_opp', 'FG_PCT_opp', 'FT_PCT_opp', 'FG3_PCT_opp',
       'HOME_TEAM_WINS', 'IsHomeTeam', 'WINS', 'd_PTS', 'd_AST', 'd_REB',
       'd_OREB', 'd_DREB', 'd_FGM', 'd_FGA', 'd_FG3M', 'd_FG3A', 'd_FTM',
       'd_FTA', 'd_TO', 'd_PF', 'GAME_ID_opp', 'GAMES', 'WINS_HOME',
       'SEASON_AND1', 'WIN_pct', 'WIN_pct_at_HOME', 'WINS_at_AWAY',
       'GAMES_at_AWAY', 'WIN_pct_at_AWAY', 'd_PTS_per_GAME', 'FGM_per_GAME',
       'FGA_per_GAME', 'PTS_per_GAME', 'PTS_opp_per_GAME', 'POS', 'POS_opp',
       'OFF_EFF', 'OFF_EFF_opp', 'DEF_EFF', 'DEF_EFF_opp'],
      dtype='object')

In [16]:
df_grp2 = df_grp[[
    'SEASON', 'SEASON_AND1', 'GAME_TYPE_CODE', 'TEAM_ABBR', 
    'WIN_pct', 'WIN_pct_at_HOME', 'WINS_at_AWAY',
       'GAMES_at_AWAY', 'WIN_pct_at_AWAY', 'd_PTS_per_GAME', 'FGM_per_GAME',
       'FGA_per_GAME', 'PTS_per_GAME', 'PTS_opp_per_GAME', 'POS', 'POS_opp',
       'OFF_EFF', 'OFF_EFF_opp', 'DEF_EFF', 'DEF_EFF_opp'
    
]].copy()

In [17]:
df_grp2['SEASON_AND1_TEAM'] = df_grp2['SEASON_AND1'].astype(str) + '_' + df_grp2['TEAM_ABBR']
df_grp2

Unnamed: 0,SEASON,SEASON_AND1,GAME_TYPE_CODE,TEAM_ABBR,WIN_pct,WIN_pct_at_HOME,WINS_at_AWAY,GAMES_at_AWAY,WIN_pct_at_AWAY,d_PTS_per_GAME,...,FGA_per_GAME,PTS_per_GAME,PTS_opp_per_GAME,POS,POS_opp,OFF_EFF,OFF_EFF_opp,DEF_EFF,DEF_EFF_opp,SEASON_AND1_TEAM
0,2003,2004,2,ATL,0.341463,0.439024,10,41,0.243902,-4.646341,...,79.621951,92.817073,97.463415,7373.2224,7382.0928,103.224880,108.261982,108.392228,103.100844,2004_ATL
1,2003,2004,2,BKN,0.573171,0.682927,19,41,0.463415,2.500000,...,77.487805,90.256098,87.756098,7163.7120,7155.6480,103.312361,100.563918,100.450716,103.428788,2004_BKN
2,2003,2004,2,BOS,0.439024,0.463415,17,41,0.414634,-1.451220,...,78.231707,95.256098,96.707317,7463.8080,7526.0544,104.651674,105.367296,106.246034,103.786122,2004_BOS
3,2003,2004,2,CHI,0.280488,0.341463,9,41,0.219512,-6.353659,...,82.353659,89.695122,96.048780,7449.9072,7460.8512,98.726062,105.564362,105.719438,98.581245,2004_CHI
4,2003,2004,2,CLE,0.426829,0.560976,12,41,0.292683,-2.621951,...,82.353659,92.914634,95.536585,7387.3920,7374.3360,103.135180,106.233293,106.045544,103.317777,2004_CLE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
504,2019,2020,2,SAC,0.433333,0.464286,13,32,0.406250,-2.000000,...,87.983333,108.300000,110.300000,5808.1536,5819.7504,111.877207,113.716217,113.943268,111.654273,2020_SAC
505,2019,2020,2,SAS,0.431034,0.535714,10,30,0.333333,-1.637931,...,89.689655,112.862069,114.500000,5711.5008,5732.8128,114.610857,115.841913,116.274167,114.184786,2020_SAS
506,2019,2020,2,TOR,0.700000,0.718750,19,28,0.678571,6.366667,...,88.833333,112.800000,106.433333,5904.2688,5952.3840,114.628927,107.284745,108.159032,113.702342,2020_TOR
507,2019,2020,2,UTA,0.627119,0.700000,16,29,0.551724,3.050847,...,84.898305,111.288136,108.237288,5694.8736,5702.2080,115.296677,111.991706,112.135939,115.148378,2020_UTA


In [18]:
df_grp2.TEAM_ABBR.unique()

array(['ATL', 'BKN', 'BOS', 'CHI', 'CLE', 'DAL', 'DEN', 'DET', 'GSW',
       'HOU', 'IND', 'LAC', 'LAL', 'MEM', 'MIA', 'MIL', 'MIN', 'NOP',
       'NYK', 'OKC', 'ORL', 'PHI', 'PHX', 'POR', 'SAC', 'SAS', 'TOR',
       'UTA', 'WAS', 'CHA'], dtype=object)

# =========== 
# Bringing manually compiled data: 

## 1: all teams 2002 and;

In [19]:
df_2002 = pd.read_csv('_data/nba 2002.csv', index_col=0)

In [20]:
df_grp3 = df_2002.append(df_grp2, ignore_index=True)

## 2: 2003 substitiue for CHA (I will use median of the other 29 NBA teams in 2003)

### basically CHA didn't exist as a franchise until 2004. so to calculate using "previous season", the median was used of all other teams of previous season

In [21]:
df_2003 = df_grp2.query('SEASON == 2003')
df_2003_CHA = df_2003.median()

In [22]:
df_2003_CHA = df_2003_CHA.to_frame()

In [23]:
df_2003_CHA

Unnamed: 0,0
SEASON,2003.0
SEASON_AND1,2004.0
GAME_TYPE_CODE,2.0
WIN_pct,0.5
WIN_pct_at_HOME,0.658537
WINS_at_AWAY,16.0
GAMES_at_AWAY,41.0
WIN_pct_at_AWAY,0.390244
d_PTS_per_GAME,-0.634146
FGM_per_GAME,34.670732


In [24]:
df_2003_CHA = df_2003_CHA.transpose()
df_2003_CHA

Unnamed: 0,SEASON,SEASON_AND1,GAME_TYPE_CODE,WIN_pct,WIN_pct_at_HOME,WINS_at_AWAY,GAMES_at_AWAY,WIN_pct_at_AWAY,d_PTS_per_GAME,FGM_per_GAME,FGA_per_GAME,PTS_per_GAME,PTS_opp_per_GAME,POS,POS_opp,OFF_EFF,OFF_EFF_opp,DEF_EFF,DEF_EFF_opp
0,2003.0,2004.0,2.0,0.5,0.658537,16.0,41.0,0.390244,-0.634146,34.670732,79.926829,92.817073,94.268293,7274.9568,7299.5328,104.273586,105.359541,105.391154,104.543239


In [25]:
df_2003_CHA['SEASON_AND1_TEAM'] = '2004_CHA'
df_2003_CHA['TEAM_ABBR'] = 'CHA'

In [26]:
# converting values so they are consistent with the main dataframe
df_2003_CHA['SEASON'] = pd.to_numeric(df_2003_CHA['SEASON'], downcast='integer')
df_2003_CHA['SEASON_AND1'] = pd.to_numeric(df_2003_CHA['SEASON_AND1'], downcast='integer')
df_2003_CHA['GAME_TYPE_CODE'] = pd.to_numeric(df_2003_CHA['GAME_TYPE_CODE'], downcast='integer')

In [27]:
list(df_2003_CHA.columns.values)

['SEASON',
 'SEASON_AND1',
 'GAME_TYPE_CODE',
 'WIN_pct',
 'WIN_pct_at_HOME',
 'WINS_at_AWAY',
 'GAMES_at_AWAY',
 'WIN_pct_at_AWAY',
 'd_PTS_per_GAME',
 'FGM_per_GAME',
 'FGA_per_GAME',
 'PTS_per_GAME',
 'PTS_opp_per_GAME',
 'POS',
 'POS_opp',
 'OFF_EFF',
 'OFF_EFF_opp',
 'DEF_EFF',
 'DEF_EFF_opp',
 'SEASON_AND1_TEAM',
 'TEAM_ABBR']

In [28]:
df_2003_CHA = df_2003_CHA[
    [
    'SEASON',
    'SEASON_AND1',     
    'GAME_TYPE_CODE',
    'TEAM_ABBR',
    
    'WIN_pct',
    'WIN_pct_at_HOME',
    'WINS_at_AWAY',
    'GAMES_at_AWAY',
    'WIN_pct_at_AWAY',
    'd_PTS_per_GAME',
    'FGM_per_GAME',
    'FGA_per_GAME',
    'PTS_per_GAME',
    'PTS_opp_per_GAME',
    'POS',
    'POS_opp',
    'OFF_EFF',
    'OFF_EFF_opp',
    'DEF_EFF',
    'DEF_EFF_opp',
    'SEASON_AND1_TEAM']
]

In [29]:
df_grp3 = df_2003_CHA.append(df_grp3, ignore_index=True)

In [30]:
df_2003_CHA

Unnamed: 0,SEASON,SEASON_AND1,GAME_TYPE_CODE,TEAM_ABBR,WIN_pct,WIN_pct_at_HOME,WINS_at_AWAY,GAMES_at_AWAY,WIN_pct_at_AWAY,d_PTS_per_GAME,...,FGA_per_GAME,PTS_per_GAME,PTS_opp_per_GAME,POS,POS_opp,OFF_EFF,OFF_EFF_opp,DEF_EFF,DEF_EFF_opp,SEASON_AND1_TEAM
0,2003,2004,2,CHA,0.5,0.658537,16.0,41.0,0.390244,-0.634146,...,79.926829,92.817073,94.268293,7274.9568,7299.5328,104.273586,105.359541,105.391154,104.543239,2004_CHA


# =========== 
# Final CSV Output

In [31]:
df_grp3.columns

Index(['SEASON', 'SEASON_AND1', 'GAME_TYPE_CODE', 'TEAM_ABBR', 'WIN_pct',
       'WIN_pct_at_HOME', 'WINS_at_AWAY', 'GAMES_at_AWAY', 'WIN_pct_at_AWAY',
       'd_PTS_per_GAME', 'FGM_per_GAME', 'FGA_per_GAME', 'PTS_per_GAME',
       'PTS_opp_per_GAME', 'POS', 'POS_opp', 'OFF_EFF', 'OFF_EFF_opp',
       'DEF_EFF', 'DEF_EFF_opp', 'SEASON_AND1_TEAM'],
      dtype='object')

In [32]:
df_grp3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 539 entries, 0 to 538
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   SEASON            539 non-null    int64  
 1   SEASON_AND1       539 non-null    int64  
 2   GAME_TYPE_CODE    539 non-null    int64  
 3   TEAM_ABBR         539 non-null    object 
 4   WIN_pct           539 non-null    float64
 5   WIN_pct_at_HOME   539 non-null    float64
 6   WINS_at_AWAY      510 non-null    float64
 7   GAMES_at_AWAY     510 non-null    float64
 8   WIN_pct_at_AWAY   539 non-null    float64
 9   d_PTS_per_GAME    539 non-null    float64
 10  FGM_per_GAME      539 non-null    float64
 11  FGA_per_GAME      539 non-null    float64
 12  PTS_per_GAME      539 non-null    float64
 13  PTS_opp_per_GAME  539 non-null    float64
 14  POS               539 non-null    float64
 15  POS_opp           539 non-null    float64
 16  OFF_EFF           539 non-null    float64
 1

In [33]:
df_grp3.query("SEASON == 2002")

Unnamed: 0,SEASON,SEASON_AND1,GAME_TYPE_CODE,TEAM_ABBR,WIN_pct,WIN_pct_at_HOME,WINS_at_AWAY,GAMES_at_AWAY,WIN_pct_at_AWAY,d_PTS_per_GAME,...,FGA_per_GAME,PTS_per_GAME,PTS_opp_per_GAME,POS,POS_opp,OFF_EFF,OFF_EFF_opp,DEF_EFF,DEF_EFF_opp,SEASON_AND1_TEAM
1,2002,2003,2,ATL,0.427,0.634,,,0.415,-3.561,...,78.463,94.073,97.634,7438.886,7476.211,103.698,107.086,107.624,103.181,2003_ATL
2,2002,2003,2,BOS,0.537,0.61,,,0.146,-0.39,...,79.378,92.671,93.061,7376.986,7435.162,103.01,102.634,103.443,102.204,2003_BOS
3,2002,2003,2,CHI,0.366,0.659,,,0.585,-5.134,...,81.878,94.951,100.085,7683.034,7642.906,101.34,107.381,106.82,101.872,2003_CHI
4,2002,2003,2,CLE,0.207,0.341,,,0.268,-9.622,...,82.268,91.402,101.024,7693.709,7672.666,97.417,107.968,107.672,97.684,2003_CLE
5,2002,2003,2,DAL,0.732,0.805,,,0.146,7.78,...,85.146,102.976,95.195,7495.603,7537.997,112.653,103.555,104.141,112.019,2003_DAL
6,2002,2003,2,DEN,0.207,0.317,,,0.22,-8.28,...,79.805,84.159,92.439,7449.6,7352.909,92.636,103.088,101.75,93.854,2003_DEN
7,2002,2003,2,DET,0.61,0.732,,,0.244,3.683,...,76.549,91.366,87.683,7091.75,7097.088,105.644,101.309,101.385,105.564,2003_DET
8,2002,2003,2,GSW,0.463,0.585,,,0.244,-1.134,...,84.646,102.439,103.573,7680.077,7688.602,109.374,110.462,110.585,109.253,2003_GSW
9,2002,2003,2,HOU,0.524,0.683,,,0.317,1.476,...,78.793,93.756,92.28,7307.866,7275.072,105.202,104.013,103.546,105.676,2003_HOU
10,2002,2003,2,IND,0.585,0.78,,,0.39,3.488,...,80.585,96.829,93.341,7500.71,7505.741,105.857,101.975,102.044,105.786,2003_IND


In [36]:
df_grp3.query("TEAM_ABBR == 'CHA'")

Unnamed: 0,SEASON,SEASON_AND1,GAME_TYPE_CODE,TEAM_ABBR,WIN_pct,WIN_pct_at_HOME,WINS_at_AWAY,GAMES_at_AWAY,WIN_pct_at_AWAY,d_PTS_per_GAME,...,FGA_per_GAME,PTS_per_GAME,PTS_opp_per_GAME,POS,POS_opp,OFF_EFF,OFF_EFF_opp,DEF_EFF,DEF_EFF_opp,SEASON_AND1_TEAM
0,2003,2004,2,CHA,0.5,0.658537,16.0,41.0,0.390244,-0.634146,...,79.926829,92.817073,94.268293,7274.9568,7299.5328,104.273586,105.359541,105.391154,104.543239,2004_CHA
62,2004,2005,2,CHA,0.219512,0.341463,4.0,41.0,0.097561,-5.987805,...,83.378049,94.256098,100.243902,7495.5264,7468.6848,103.114839,110.059538,109.665413,103.485422,2005_CHA
92,2005,2006,2,CHA,0.317073,0.414634,9.0,41.0,0.219512,-4.0,...,83.45122,96.865854,100.865854,7612.6464,7581.6192,104.339537,109.092791,108.648157,104.766539,2006_CHA
122,2006,2007,2,CHA,0.402439,0.487805,13.0,41.0,0.317073,-3.743902,...,81.012195,96.890244,100.634146,7522.4064,7515.6096,105.617798,109.798146,109.698939,105.713314,2007_CHA
152,2007,2008,2,CHA,0.390244,0.512195,11.0,41.0,0.268293,-4.378049,...,79.926829,97.060976,101.439024,7418.496,7430.4,107.285897,111.945521,112.125153,107.114018,2008_CHA
182,2008,2009,2,CHA,0.426829,0.560976,12.0,41.0,0.292683,-1.268293,...,76.817073,93.621951,94.890244,7198.656,7175.3472,106.644907,108.440746,108.089621,106.991338,2009_CHA
212,2009,2010,2,CHA,0.536585,0.756098,13.0,41.0,0.317073,1.463415,...,76.902439,95.280488,93.817073,7322.112,7312.704,106.704186,105.200484,105.065314,106.841464,2010_CHA
242,2010,2011,2,CHA,0.414634,0.512195,13.0,41.0,0.317073,-4.0,...,77.621951,93.292683,97.292683,7218.7776,7253.2992,105.973621,109.991326,110.517326,105.469246,2011_CHA
272,2011,2012,2,CHA,0.106061,0.121212,3.0,33.0,0.090909,-13.909091,...,80.19697,86.954545,100.863636,5910.9888,5900.9664,97.090355,112.81203,112.620751,97.255256,2012_CHA
302,2012,2013,2,CHA,0.256098,0.365854,6.0,41.0,0.146341,-9.231707,...,81.085366,93.426829,102.658537,7403.904,7382.3616,103.472438,114.028552,113.696774,103.77438,2013_CHA


In [37]:
df_grp3.to_csv('_data_out/df_grp_team_season_stats.csv')