In [438]:
# Imports
import pandas as pd
import numpy as np
import datetime as dt
from pathlib import Path
%matplotlib inline

# Win Loss Record

This data set contains the historical win loss records for each NBA team beginning with the 1999-2000 season.

In [439]:
# Bring in NBA Records CSV
nba_records_csv = Path('../Kevin_Files/Win_Loss.csv')
# Read csv into a pandas Dataframe
nba_records = pd.read_csv(nba_records_csv)
nba_records['Season Start'] = ''
nba_records['Season End'] = ''
nba_records.head()

Unnamed: 0,Season,Team,GP,W,L,WIN%,Season Start,Season End
0,2019-20,Atlanta Hawks,67,20,47,0.299,,
1,2019-20,Boston Celtics,72,48,24,0.667,,
2,2019-20,Brooklyn Nets,72,35,37,0.486,,
3,2019-20,Charlotte Hornets,65,23,42,0.354,,
4,2019-20,Chicago Bulls,65,22,43,0.338,,


In [440]:
# Fill the Season Start and Season End Columns conditional on the Season column
# Extract the start from seasons and convert to an integer
nba_records['Season Start'] = nba_records['Season'].str[:4]
nba_records['Season Start'] = nba_records['Season Start'].astype(int)
# Using the start year find the end year
nba_records['Season End'] = nba_records['Season Start'] +1
nba_records.head()

Unnamed: 0,Season,Team,GP,W,L,WIN%,Season Start,Season End
0,2019-20,Atlanta Hawks,67,20,47,0.299,2019,2020
1,2019-20,Boston Celtics,72,48,24,0.667,2019,2020
2,2019-20,Brooklyn Nets,72,35,37,0.486,2019,2020
3,2019-20,Charlotte Hornets,65,23,42,0.354,2019,2020
4,2019-20,Chicago Bulls,65,22,43,0.338,2019,2020


In [441]:
# Drop the Season column
nba_records = nba_records.drop(['Season'],axis=1)
nba_records = nba_records.sort_values(by='Team')
nba_records.head()

Unnamed: 0,Team,GP,W,L,WIN%,Season Start,Season End
0,Atlanta Hawks,67,20,47,0.299,2019,2020
119,Atlanta Hawks,82,43,39,0.524,2016,2017
120,Atlanta Hawks,82,48,34,0.585,2015,2016
479,Atlanta Hawks,82,13,69,0.159,2004,2005
150,Atlanta Hawks,82,60,22,0.732,2014,2015


In [442]:
# Rearrange the columns in the dataframe
nba_records = nba_records[['Season Start','Season End','Team','GP','W','L','WIN%']]
nba_records.head()

Unnamed: 0,Season Start,Season End,Team,GP,W,L,WIN%
0,2019,2020,Atlanta Hawks,67,20,47,0.299
119,2016,2017,Atlanta Hawks,82,43,39,0.524
120,2015,2016,Atlanta Hawks,82,48,34,0.585
479,2004,2005,Atlanta Hawks,82,13,69,0.159
150,2014,2015,Atlanta Hawks,82,60,22,0.732


In [443]:
# Count the nulls in the data
nba_records.isnull().sum()

Season Start    0
Season End      0
Team            0
GP              0
W               0
L               0
WIN%            0
dtype: int64

In [444]:
# Filter dataframe to be inclusive of only the 2008-09 season through the 2017-18 season
nba_records = nba_records[(nba_records['Season Start'] >= 2008) & (nba_records['Season End'] <= 2018)]
nba_records.reset_index(drop=True,inplace=True)
nba_records.head()

Unnamed: 0,Season Start,Season End,Team,GP,W,L,WIN%
0,2016,2017,Atlanta Hawks,82,43,39,0.524
1,2015,2016,Atlanta Hawks,82,48,34,0.585
2,2014,2015,Atlanta Hawks,82,60,22,0.732
3,2008,2009,Atlanta Hawks,82,47,35,0.573
4,2013,2014,Atlanta Hawks,82,38,44,0.463


# NBA Salaries
This csv file contains the historical salary spend for each NBA team beginning in the 2008 - 2009 season. This information is both inclusive and exclusive of inflation.

In [445]:
# Bring in NBA Salary Information
nba_salary_csv = Path('../nba_salaries.csv')
# Read CSV into a Pandas Dataframe
nba_salaries = pd.read_csv(nba_salary_csv)
nba_salaries = nba_salaries.rename(columns={'Year':'Season'})
nba_salaries['Season Start'] = ''
nba_salaries['Season End'] = ''
nba_salaries.head()

Unnamed: 0,Season,Team,Salary,Salary w/ Inflation,Season Start,Season End
0,2008/2009,New York,"$96,643,646","$113,130,233",,
1,2008/2009,Dallas,"$95,045,559","$111,259,524",,
2,2008/2009,Cleveland,"$91,298,233","$106,872,938",,
3,2008/2009,Portland,"$80,260,059","$93,951,741",,
4,2008/2009,Boston,"$79,188,973","$92,697,938",,


In [446]:
# Formatting the Seasons column to match the syntax in the nba_record dataframe
old_seasons = ['2008/2009','2009/2010','2010/2011','2011/2012','2012/2013','2013/2014','2014/2015','2015/2016','2016/2017','2017/2018']
new_seasons = ['2008-09','2009-10','2010-11','2011-12','2012-13','2013-14','2014-15','2015-16','2016-17','2017-18']
nba_salaries['Season'] = nba_salaries['Season'].replace(old_seasons,new_seasons)
nba_salaries.head()

Unnamed: 0,Season,Team,Salary,Salary w/ Inflation,Season Start,Season End
0,2008-09,New York,"$96,643,646","$113,130,233",,
1,2008-09,Dallas,"$95,045,559","$111,259,524",,
2,2008-09,Cleveland,"$91,298,233","$106,872,938",,
3,2008-09,Portland,"$80,260,059","$93,951,741",,
4,2008-09,Boston,"$79,188,973","$92,697,938",,


In [447]:
# Fill the Season Start and Season End Columns conditional on the Season column
# Extract the start from seasons and convert to an integer
nba_salaries['Season Start'] = nba_salaries['Season'].str[:4]
nba_salaries['Season Start'] = nba_salaries['Season Start'].astype(int)
# Using the start year find the end year
nba_salaries['Season End'] = nba_salaries['Season Start'] +1
nba_salaries.tail()

Unnamed: 0,Season,Team,Salary,Salary w/ Inflation,Season Start,Season End
295,2017-18,Brooklyn,"$95,475,397","$99,836,100",2017,2018
296,2017-18,Indiana,"$95,271,736","$99,623,136",2017,2018
297,2017-18,Phoenix,"$92,684,083","$96,917,295",2017,2018
298,2017-18,Chicago,"$90,466,801","$94,598,740",2017,2018
299,2017-18,Dallas,"$85,440,245","$89,342,606",2017,2018


In [448]:
# Drop the Season column
nba_salaries = nba_salaries.drop(['Season'],axis=1)
nba_salaries = nba_salaries.sort_values(by='Team')
nba_salaries.head()

Unnamed: 0,Team,Salary,Salary w/ Inflation,Season Start,Season End
173,Atlanta,"$58,998,677","$64,718,790",2013,2014
99,Atlanta,"$73,669,912","$83,598,545",2011,2012
255,Atlanta,"$96,315,163","$102,359,377",2016,2017
292,Atlanta,"$99,992,696","$104,559,718",2017,2018
19,Atlanta,"$68,168,841","$79,797,863",2008,2009


In [449]:
# Rearrange the columns in the dataframe
nba_salaries = nba_salaries[['Season Start','Season End','Team','Salary','Salary w/ Inflation']]
nba_salaries.reset_index(drop=True,inplace=True)
nba_salaries.head()

Unnamed: 0,Season Start,Season End,Team,Salary,Salary w/ Inflation
0,2013,2014,Atlanta,"$58,998,677","$64,718,790"
1,2011,2012,Atlanta,"$73,669,912","$83,598,545"
2,2016,2017,Atlanta,"$96,315,163","$102,359,377"
3,2017,2018,Atlanta,"$99,992,696","$104,559,718"
4,2008,2009,Atlanta,"$68,168,841","$79,797,863"


# Standardizing the DataFrames
In order to join the two dataframes above, we will have to start by standardizing the team attribute in each.

In [450]:
# Extracting the 'Team' format from nba_salaries dataframe
salary_team = nba_salaries.groupby('Season Start')['Team'].unique()
# Extracting the 'Team' format from nba_records dataframe
record_team = nba_records.groupby('Season Start')['Team'].unique()
#print(record_team[2008])
print(salary_team[2008])
print(record_team[2008])

['Atlanta' 'Boston' 'Brooklyn' 'Charlotte' 'Chicago' 'Cleveland' 'Dallas'
 'Denver' 'Detroit' 'Golden State' 'Houston' 'Indiana' 'LA Clippers'
 'LA Lakers' 'Memphis' 'Miami' 'Milwaukee' 'Minnesota' 'New Orleans'
 'New York' 'Oklahoma City' 'Orlando' 'Philadelphia' 'Phoenix' 'Portland'
 'Sacramento' 'San Antonio' 'Toronto' 'Utah' 'Washington']
['Atlanta Hawks' 'Boston Celtics' 'Charlotte Bobcats' 'Chicago Bulls'
 'Cleveland Cavaliers' 'Dallas Mavericks' 'Denver Nuggets'
 'Detroit Pistons' 'Golden State Warriors' 'Houston Rockets'
 'Indiana Pacers' 'Los Angeles Clippers' 'Los Angeles Lakers'
 'Memphis Grizzlies' 'Miami Heat' 'Milwaukee Bucks'
 'Minnesota Timberwolves' 'New Jersey Nets' 'New Orleans Hornets'
 'New York Knicks' 'Oklahoma City Thunder' 'Orlando Magic'
 'Philadelphia 76ers' 'Phoenix Suns' 'Portland Trail Blazers'
 'Sacramento Kings' 'San Antonio Spurs' 'Toronto Raptors' 'Utah Jazz'
 'Washington Wizards']


In [451]:
# Renmae the all instances of 'New Jersey Nets' to the 'Brooklyn Nets' in the nba_records DataFrame
nba_records = nba_records.replace('New Jersey Nets','Brooklyn Nets')
nba_records = nba_records.sort_values(by='Team')
nba_records.reset_index(drop=True,inplace=True)
nba_records.head()

Unnamed: 0,Season Start,Season End,Team,GP,W,L,WIN%
0,2016,2017,Atlanta Hawks,82,43,39,0.524
1,2015,2016,Atlanta Hawks,82,48,34,0.585
2,2014,2015,Atlanta Hawks,82,60,22,0.732
3,2008,2009,Atlanta Hawks,82,47,35,0.573
4,2013,2014,Atlanta Hawks,82,38,44,0.463


In [452]:
# Extracting the 'Team' format from nba_salaries dataframe
salary_team = nba_salaries.groupby('Season Start')['Team'].unique()
# Extracting the 'Team' format from nba_records dataframe
record_team = nba_records.groupby('Season Start')['Team'].unique()
#print(record_team[2008])
print(salary_team[2008])
print(record_team[2008])

['Atlanta' 'Boston' 'Brooklyn' 'Charlotte' 'Chicago' 'Cleveland' 'Dallas'
 'Denver' 'Detroit' 'Golden State' 'Houston' 'Indiana' 'LA Clippers'
 'LA Lakers' 'Memphis' 'Miami' 'Milwaukee' 'Minnesota' 'New Orleans'
 'New York' 'Oklahoma City' 'Orlando' 'Philadelphia' 'Phoenix' 'Portland'
 'Sacramento' 'San Antonio' 'Toronto' 'Utah' 'Washington']
['Atlanta Hawks' 'Boston Celtics' 'Brooklyn Nets' 'Charlotte Bobcats'
 'Chicago Bulls' 'Cleveland Cavaliers' 'Dallas Mavericks' 'Denver Nuggets'
 'Detroit Pistons' 'Golden State Warriors' 'Houston Rockets'
 'Indiana Pacers' 'Los Angeles Clippers' 'Los Angeles Lakers'
 'Memphis Grizzlies' 'Miami Heat' 'Milwaukee Bucks'
 'Minnesota Timberwolves' 'New Orleans Hornets' 'New York Knicks'
 'Oklahoma City Thunder' 'Orlando Magic' 'Philadelphia 76ers'
 'Phoenix Suns' 'Portland Trail Blazers' 'Sacramento Kings'
 'San Antonio Spurs' 'Toronto Raptors' 'Utah Jazz' 'Washington Wizards']


In [453]:
# Replace the Team names in nba_salaries with the team attributes from nba_records
nba_salaries['Team'] = nba_salaries['Team'].replace(salary_team[2008],record_team[2008])
nba_salaries.head()

Unnamed: 0,Season Start,Season End,Team,Salary,Salary w/ Inflation
0,2013,2014,Atlanta Hawks,"$58,998,677","$64,718,790"
1,2011,2012,Atlanta Hawks,"$73,669,912","$83,598,545"
2,2016,2017,Atlanta Hawks,"$96,315,163","$102,359,377"
3,2017,2018,Atlanta Hawks,"$99,992,696","$104,559,718"
4,2008,2009,Atlanta Hawks,"$68,168,841","$79,797,863"


# Joining the DataFrames
Now that the two dataframes have been standardized in terms of their team attributes, we can now go ahead and join them.

In [454]:
# Sort nba_records dataframe by Team attribute and by Season Start year
nba_records = nba_records.sort_values(by=['Team','Season Start'])
nba_records.reset_index(drop=True,inplace=True)
nba_records

Unnamed: 0,Season Start,Season End,Team,GP,W,L,WIN%
0,2008,2009,Atlanta Hawks,82,47,35,0.573
1,2009,2010,Atlanta Hawks,82,53,29,0.646
2,2010,2011,Atlanta Hawks,82,44,38,0.537
3,2011,2012,Atlanta Hawks,66,40,26,0.606
4,2012,2013,Atlanta Hawks,82,44,38,0.537
...,...,...,...,...,...,...,...
295,2013,2014,Washington Wizards,82,44,38,0.537
296,2014,2015,Washington Wizards,82,46,36,0.561
297,2015,2016,Washington Wizards,82,41,41,0.500
298,2016,2017,Washington Wizards,82,49,33,0.598


In [455]:
# Sort nba_salaries dataframe by Team attribute and by Season Start year
nba_salaries = nba_salaries.sort_values(by=['Team','Season Start'])
nba_salaries.reset_index(drop=True,inplace=True)
nba_salaries

Unnamed: 0,Season Start,Season End,Team,Salary,Salary w/ Inflation
0,2008,2009,Atlanta Hawks,"$68,168,841","$79,797,863"
1,2009,2010,Atlanta Hawks,"$65,883,642","$78,239,128"
2,2010,2011,Atlanta Hawks,"$71,469,843","$83,988,250"
3,2011,2012,Atlanta Hawks,"$73,669,912","$83,598,545"
4,2012,2013,Atlanta Hawks,"$66,710,178","$74,461,791"
...,...,...,...,...,...
295,2013,2014,Washington Wizards,"$64,698,822","$70,971,580"
296,2014,2015,Washington Wizards,"$73,372,974","$78,852,628"
297,2015,2016,Washington Wizards,"$85,055,155","$91,294,262"
298,2016,2017,Washington Wizards,"$104,016,580","$110,544,093"


In [456]:
# Concat the nba_records and nba_salaries dataframes above
combined_df = pd.concat([nba_records,nba_salaries['Salary'],nba_salaries['Salary w/ Inflation']],axis="columns")
combined_df.head()


Unnamed: 0,Season Start,Season End,Team,GP,W,L,WIN%,Salary,Salary w/ Inflation
0,2008,2009,Atlanta Hawks,82,47,35,0.573,"$68,168,841","$79,797,863"
1,2009,2010,Atlanta Hawks,82,53,29,0.646,"$65,883,642","$78,239,128"
2,2010,2011,Atlanta Hawks,82,44,38,0.537,"$71,469,843","$83,988,250"
3,2011,2012,Atlanta Hawks,66,40,26,0.606,"$73,669,912","$83,598,545"
4,2012,2013,Atlanta Hawks,82,44,38,0.537,"$66,710,178","$74,461,791"
