In [13]:
from bs4 import BeautifulSoup
import pandas as pd
from io import StringIO

In [ ]:
# Assign years

In [14]:
years = list(range(1997,2024))

In [ ]:
# Save webpages with sata as individual html Files

In [6]:
url_1 = "https://www.basketball-reference.com/awards/awards_{}.html"

In [7]:
import requests

for year in years:
    url = url_1.format(year)
    response = requests.get(url)
    
    with open("shares/{}.html".format(year),"w+") as f:
        f.write(response.text)

In [ ]:
# Saving MVP voting data

In [8]:
dfs = []
for year in years:
    with open("shares/{}.html".format(year)) as f:
        page = f.read()
    soup = BeautifulSoup(page, "html.parser")
    mvp = soup.find(id="mvp")
    seasons = pd.read_html(StringIO(str(mvp)))[0]

    dfs.append(seasons)

In [10]:
mvp_shares = pd.concat(dfs)
mvp_shares.to_csv("mvp.csv")

In [ ]:
# Import manually edited csv file as new dataframe (removed columns and added season list)

In [15]:
df = pd.read_csv("mvp.csv")

In [16]:
print(list(df.columns))

['Rank', 'Player', 'Age', 'Tm', 'First', 'Pts Won', 'Pts Max', 'Share', 'Season']


In [ ]:
# Calculating MVP Share metric by totalling Share value for each team per season

In [17]:
mvp_shares_per_team_season =df.groupby(['Season', 'Tm'])['Share'].sum().reset_index()
mvp_shares_per_team_season.columns = ['Season', 'Team', 'MVP_Shares']
mvp_shares_per_team_season 

Unnamed: 0,Season,Team,MVP_Shares
0,2003,BOS,0.001
1,2003,DAL,0.037
2,2003,DET,0.028
3,2003,LAL,0.523
4,2003,MIN,0.732
...,...,...,...
239,2023,MIL,0.606
240,2023,NYK,0.001
241,2023,OKC,0.046
242,2023,PHI,0.915


In [ ]:
# Library of team names based on abbreviations 

In [19]:
team_names = {
    'ATL': 'Atlanta Hawks',
    'BOS': 'Boston Celtics',
    'BKN': 'Brooklyn Nets',
    'BRK': 'Brooklyn Nets',
    'CHB': 'Charlotte Bobcats',
    'CHH': 'Charlotte Hornets',
    'CHA': 'Charlotte Hornets',
    'CHI': 'Chicago Bulls',
    'CLE': 'Cleveland Cavaliers',
    'DAL': 'Dallas Mavericks',
    'DEN': 'Denver Nuggets',
    'DET': 'Detroit Pistons',
    'GSW': 'Golden State Warriors',
    'HOU': 'Houston Rockets',
    'IND': 'Indiana Pacers',
    'LAC': 'Los Angeles Clippers',
    'LAL': 'Los Angeles Lakers',
    'MEM': 'Memphis Grizzlies',
    'MIA': 'Miami Heat',
    'MIL': 'Milwaukee Bucks',
    'MIN': 'Minnesota Timberwolves',
    'NOP': 'New Orleans Pelicans',
    'NYK': 'New York Knicks',
    'OKC': 'Oklahoma City Thunder',
    'ORL': 'Orlando Magic',
    'PHI': 'Philadelphia 76ers',
    'PHX': 'Phoenix Suns',
    'PHO': 'Phoenix Suns',  
    'POR': 'Portland Trail Blazers',
    'SAC': 'Sacramento Kings',
    'SAS': 'San Antonio Spurs',
    'SEA': 'Seattle SuperSonics',
    'TOR': 'Toronto Raptors',
    'UTA': 'Utah Jazz',
    'WAS': 'Washington Wizards',
    'NJN': 'New Jersey Nets',  
    'NOH': 'New Orleans Hornets',  
    'NOK': 'New Orleans/Oklahoma City Hornets' 
}

In [ ]:
# Replacing abbreviated team names with full names

In [20]:
mvp_shares_per_team_season['Team'] = mvp_shares_per_team_season['Team'].replace(team_names)
mvp_shares_per_team_season

Unnamed: 0,Season,Team,MVP_Shares
0,2003,Boston Celtics,0.001
1,2003,Dallas Mavericks,0.037
2,2003,Detroit Pistons,0.028
3,2003,Los Angeles Lakers,0.523
4,2003,Minnesota Timberwolves,0.732
...,...,...,...
239,2023,Milwaukee Bucks,0.606
240,2023,New York Knicks,0.001
241,2023,Oklahoma City Thunder,0.046
242,2023,Philadelphia 76ers,0.915


In [ ]:
# Drop teams that have the name TOT (This results from players changing teams mid-season, meaning they will have very few MVP votes)

In [92]:
mvp_shares_per_team_season.drop(mvp_shares_per_team_season.index[(mvp_shares_per_team_season["Team"] == "TOT")], axis=0, inplace=True)
mvp_shares_per_team_season

In [ ]:
# Saving as csv and combining with complete dataset 

In [94]:
df_new = pd.read_csv("final_regular_season_stats.csv")
combine = df_new.merge(mvp_shares_per_team_season,how='outer', on=["Team", "Season"])
combine

In [ ]:
# Saving All NBA Voting Data 

In [101]:
df_1 = []
for year in years:
    with open("shares/{}.html".format(year)) as f:
        page = f.read()
    soup = BeautifulSoup(page, "html.parser")
    all_nba = soup.find(id="leading_all_nba")
    all_nba_votes = pd.read_html(StringIO(str(all_nba)))[0]

    df_1.append(all_nba_votes)

In [103]:
all_nba_teams = pd.concat(df_1)
all_nba_teams.to_csv("all_nba_teams.csv")

In [ ]:
# Import manually edited csv file as new dataframe (removed columns and added season list)

In [21]:
all_nba_shares = pd.read_csv("all_nba_teams.csv")

In [22]:
all_nba_shares

Unnamed: 0,# Tm,Player,Team,Pts Won,Pts Max,Share,Season
0,1st,Tim Duncan,SAS,603.0,610.0,0.989,2003.0
1,1st,Kobe Bryant,LAL,599.0,610.0,0.982,2003.0
2,1st,Kevin Garnett,MIN,596.0,610.0,0.977,2003.0
3,1st,Shaquille O'Neal,LAL,593.0,610.0,0.972,2003.0
4,1st,Tracy McGrady,ORL,578.0,610.0,0.948,2003.0
...,...,...,...,...,...,...,...
877,ORV,Paul George,LAC,1.0,500.0,0.002,2023.0
878,ORV,Tyrese Haliburton,IND,1.0,500.0,0.002,2023.0
879,ORV,Zach LaVine,CHI,1.0,500.0,0.002,2023.0
880,ORV,Brook Lopez,MIL,1.0,500.0,0.002,2023.0


In [ ]:
# Replace abbreviated team names with full names

In [155]:
all_nba_shares['Team'] = all_nba_shares['Team'].replace(team_names)

In [ ]:
# Drop rows that have null values (rows acting as dividers between tiers of voting) and save as dataframe

In [23]:
new = all_nba_shares.dropna(how='all')
new_new =pd.DataFrame(new)
new_new

Unnamed: 0,# Tm,Player,Team,Pts Won,Pts Max,Share,Season
0,1st,Tim Duncan,SAS,603.0,610.0,0.989,2003.0
1,1st,Kobe Bryant,LAL,599.0,610.0,0.982,2003.0
2,1st,Kevin Garnett,MIN,596.0,610.0,0.977,2003.0
3,1st,Shaquille O'Neal,LAL,593.0,610.0,0.972,2003.0
4,1st,Tracy McGrady,ORL,578.0,610.0,0.948,2003.0
...,...,...,...,...,...,...,...
877,ORV,Paul George,LAC,1.0,500.0,0.002,2023.0
878,ORV,Tyrese Haliburton,IND,1.0,500.0,0.002,2023.0
879,ORV,Zach LaVine,CHI,1.0,500.0,0.002,2023.0
880,ORV,Brook Lopez,MIL,1.0,500.0,0.002,2023.0


In [ ]:
# Drop teams that have the name TOT (This results from players changing teams mid-season, meaning they will have very few All NBA votes)

In [160]:
new_new.drop(new_new.index[(new_new["Team"] == "TOT")], axis=0, inplace=True)

In [161]:
new_new

Unnamed: 0,# Tm,Player,Team,Pts Won,Pts Max,Share,Season
0,1st,Tim Duncan,San Antonio Spurs,603.0,610.0,0.989,2003.0
1,1st,Kobe Bryant,Los Angeles Lakers,599.0,610.0,0.982,2003.0
2,1st,Kevin Garnett,Minnesota Timberwolves,596.0,610.0,0.977,2003.0
3,1st,Shaquille O'Neal,Los Angeles Lakers,593.0,610.0,0.972,2003.0
4,1st,Tracy McGrady,Orlando Magic,578.0,610.0,0.948,2003.0
...,...,...,...,...,...,...,...
877,ORV,Paul George,Los Angeles Clippers,1.0,500.0,0.002,2023.0
878,ORV,Tyrese Haliburton,Indiana Pacers,1.0,500.0,0.002,2023.0
879,ORV,Zach LaVine,Chicago Bulls,1.0,500.0,0.002,2023.0
880,ORV,Brook Lopez,Milwaukee Bucks,1.0,500.0,0.002,2023.0


In [ ]:
# Assigning a weighting system for All NBA votes (Players that are 1st team All NBA usually have a better season and greater impact on team success compared to 2nd team, 3rd team and vote getters that did not win an award (ORV))

In [55]:
rank_weights = {
    '1st': 2,
    '1T': 2,
    '2nd': 1.5,
    '2T': 1.5,
    '3rd': 1,
    '3T': 1,
    'ORV': 0.5
}

In [163]:
new_new['Weighted_Share'] = new_new.apply(lambda row: row['Share'] * rank_weights[row['# Tm']], axis=1)
weighted_all_nba_share_per_team_season = new_new.groupby(['Team', 'Season'])['Weighted_Share'].sum().reset_index()
weighted_all_nba_share_per_team_season

In [ ]:
# Replace abbreviations that were missed in team_name library

In [166]:
weighted_all_nba_share_per_team_season["Team"] = weighted_all_nba_share_per_team_season["Team"].str.replace("CHO", "Charlotte Hornets", regex=False)
weighted_all_nba_share_per_team_season["Team"] = weighted_all_nba_share_per_team_season["Team"].str.replace("NOK", "New Orleans/Oklahoma City Hornets", regex=False)
weighted_all_nba_share_per_team_season

In [ ]:
# Combine dataframes

In [170]:
combine_1 = combine.merge(weighted_all_nba_share_per_team_season,how='outer', on=["Team", "Season"])

In [171]:
combine_1

Unnamed: 0,Team,G,MP Per Game,FG Per Game,FGA Per Game,3P Per Game,3PA Per Game,2P Per Game,2PA Per Game,FT Per Game,...,% of 3P's that were assisted,% of FGA that are dunks,Dunks Made,% of FGA that are layups,# of layups made,% of corner 3PA,3P% from the corner,champion share,MVP_Shares,Weighted_Share
0,Atlanta Hawks,82.0,242.7,34.9,78.5,4.9,13.9,30.0,64.5,19.4,...,0.856,0.048,297.0,0.202,647.0,0.276,0.397,0.0,,0.0015
1,Atlanta Hawks,82.0,242.7,34.5,79.6,5.1,15.2,29.4,64.4,18.7,...,0.845,0.045,289.0,0.232,767.0,0.292,0.370,0.0,,
2,Atlanta Hawks,82.0,242.1,35.9,81.4,3.7,11.9,32.2,69.5,17.3,...,0.872,0.056,354.0,0.289,985.0,0.265,0.295,0.0,,
3,Atlanta Hawks,82.0,242.4,35.9,79.2,5.2,14.1,30.8,65.1,20.2,...,0.837,0.047,290.0,0.272,922.0,0.278,0.383,0.0,,0.0015
4,Atlanta Hawks,82.0,242.4,34.5,77.7,4.2,12.7,30.4,65.0,20.5,...,0.833,0.047,273.0,0.284,940.0,0.253,0.373,0.0,,0.0025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
655,,,,,,,,,,,...,,,,,,,,0.0,,
656,,,,,,,,,,,...,,,,,,,,0.0,,
657,,,,,,,,,,,...,,,,,,,,0.0,,
658,,,,,,,,,,,...,,,,,,,,0.0,,


In [172]:
combine_1.to_csv("combined_stats.csv")

In [ ]:
# Saving Defensive Player of the Year

In [24]:
df_2 = []
for year in years:
    with open("shares/{}.html".format(year)) as f:
        page = f.read()
    soup = BeautifulSoup(page, "html.parser")
    defense = soup.find(id="leading_all_defense")
    all_defense_votes = pd.read_html(StringIO(str(defense)))[0]

    df_2.append(all_defense_votes)

In [223]:
all_team_defense = pd.concat(df_2)
all_team_defense.to_csv("all_team_defense.csv")

In [ ]:
# Import manually edited csv file as new dataframe (removed columns and added season list)

In [227]:
atd = pd.read_csv("all_nba_defense.csv")
atd

In [ ]:
# Replace abbreviated team names with full names, drop teams that have the name TOT and null values and save as dataframe

In [229]:
atd['Team'] = atd['Team'].replace(team_names)
atd.drop(atd.index[(atd["Team"] == "TOT")], axis=0, inplace=True)
atd1 = atd.dropna(how='all')

In [235]:
atd_new = pd.DataFrame(atd1)
atd_new

In [ ]:
# Replace abbreviations that were missed in team_name library

In [241]:
atd_new["Team"] = atd_new["Team"].str.replace("NOK", "New Orleans/Oklahoma City Hornets", regex=False)
atd_new["Team"] = atd_new["Team"].str.replace("Charlotte Hornets", "Charlotte Bobcats", regex=False)

In [ ]:
# Assign weighting system for votes (similar idea as All Team NBA)

In [242]:
rank_weights = {
    '1st': 1,
    '2nd': 0.75,
    'ORV': 0.5
}
atd_new['DPOY_Share'] = atd_new.apply(lambda row: row['Share'] * rank_weights[row['#Tm']], axis=1)
dpoy_shares = atd_new.groupby(['Team', 'Season'])['DPOY_Share'].sum().reset_index()
dpoy_shares

In [ ]:
# Combine dataframes and save as csv file

In [244]:
combine_2 = pd.read_csv("combined_stats.csv")

In [245]:
combine_3 = combine_2.merge(dpoy_shares, how='outer',on=["Team", "Season"])

In [246]:
combine_3

Unnamed: 0.1,Unnamed: 0,Team,G,MP Per Game,FG Per Game,FGA Per Game,3P Per Game,3PA Per Game,2P Per Game,2PA Per Game,...,% of FGA that are dunks,Dunks Made,% of FGA that are layups,# of layups made,% of corner 3PA,3P% from the corner,champion share,MVP_Shares,Weighted_Share,DPOY_Share
0,0.0,Atlanta Hawks,82.0,242.7,34.9,78.5,4.9,13.9,30.0,64.5,...,0.048,297.0,0.202,647.0,0.276,0.397,0.0000,,0.0015,0.0690
1,1.0,Atlanta Hawks,82.0,242.7,34.5,79.6,5.1,15.2,29.4,64.4,...,0.045,289.0,0.232,767.0,0.292,0.370,0.0000,,,
2,2.0,Atlanta Hawks,82.0,242.1,35.9,81.4,3.7,11.9,32.2,69.5,...,0.056,354.0,0.289,985.0,0.265,0.295,0.0000,,,
3,3.0,Atlanta Hawks,82.0,242.4,35.9,79.2,5.2,14.1,30.8,65.1,...,0.047,290.0,0.272,922.0,0.278,0.383,0.0000,,0.0015,
4,4.0,Atlanta Hawks,82.0,242.4,34.5,77.7,4.2,12.7,30.4,65.0,...,0.047,273.0,0.284,940.0,0.253,0.373,0.0000,,0.0025,0.0335
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
626,625.0,Washington Wizards,82.0,243.0,42.1,90.1,11.3,33.3,30.8,56.8,...,0.067,454.0,0.282,1210.0,0.213,0.360,0.0000,,0.0340,0.0025
627,626.0,Washington Wizards,72.0,241.0,41.5,90.9,12.0,32.6,29.5,58.3,...,0.047,276.0,0.285,1029.0,0.167,0.388,0.0000,,0.0320,
628,627.0,Washington Wizards,72.0,241.7,43.2,90.9,10.2,29.0,33.0,61.9,...,0.055,320.0,0.299,1074.0,0.231,0.369,0.0625,0.005,0.1860,0.0025
629,628.0,Washington Wizards,82.0,241.8,40.6,86.0,10.5,30.6,30.1,55.4,...,0.065,408.0,0.280,1135.0,0.246,0.397,0.0000,,,


In [247]:
combine_3.to_csv("combined_stats_1.csv")

In [1]:
years1 = list(range(1997,2003))

In [2]:
years1

[1997, 1998, 1999, 2000, 2001, 2002]

In [11]:
dfs1 = []
for year in years1:
    with open("shares/{}.html".format(year)) as f:
        page = f.read()
    soup = BeautifulSoup(page, "html.parser")
    mvp = soup.find(id="mvp")
    seasons = pd.read_html(StringIO(str(mvp)))[0]

    dfs1.append(seasons)

In [12]:
dfs1

[   Unnamed: 0_level_0 Unnamed: 1_level_0 Unnamed: 2_level_0  \
                  Rank             Player                Age   
 0                   1        Karl Malone                 33   
 1                   2     Michael Jordan                 33   
 2                   3         Grant Hill                 24   
 3                   4       Tim Hardaway                 30   
 4                   5          Glen Rice                 29   
 5                   6        Gary Payton                 28   
 6                   7    Hakeem Olajuwon                 34   
 7                   8      Patrick Ewing                 34   
 8                  9T      Anthony Mason                 30   
 9                  9T   Shaquille O'Neal                 24   
 10                 11     Scottie Pippen                 31   
 11                 12    Alonzo Mourning                 26   
 12                13T    Dikembe Mutombo                 30   
 13                13T     Mitch Richmon

In [13]:
mvp_shares_new = pd.concat(dfs1)

In [18]:
mvp_shares_new.to_csv("mvp_shares_new.csv")

In [30]:
df = pd.read_csv("mvp_shares_new_1.csv")

In [31]:
mvp_shares_per_season = df.groupby(['Season', 'Team'])['Share'].sum().reset_index()
mvp_shares_per_season.columns = ['Season', 'Team', 'MVP_Shares']

In [32]:
mvp_shares_per_season

Unnamed: 0,Season,Team,MVP_Shares
0,1997,ATL,0.004
1,1997,CHH,0.123
2,1997,CHI,0.837
3,1997,DET,0.327
4,1997,HOU,0.085
...,...,...,...
82,2002,PHI,0.023
83,2002,SAC,0.031
84,2002,SAS,0.757
85,2002,SEA,0.043


In [33]:
mvp_shares_per_season['Team'] = mvp_shares_per_season['Team'].replace(team_names)

In [34]:
mvp_shares_per_season

Unnamed: 0,Season,Team,MVP_Shares
0,1997,Atlanta Hawks,0.004
1,1997,Charlotte Hornets,0.123
2,1997,Chicago Bulls,0.837
3,1997,Detroit Pistons,0.327
4,1997,Houston Rockets,0.085
...,...,...,...
82,2002,Philadelphia 76ers,0.023
83,2002,Sacramento Kings,0.031
84,2002,San Antonio Spurs,0.757
85,2002,Seattle SuperSonics,0.043


In [35]:
df1 = pd.read_csv("97-02.csv")

In [36]:
combine1 = df1.merge(mvp_shares_per_season,how='outer', on=["Team", "Season"])

In [37]:
combine1

Unnamed: 0,Team,G,MP Per Game,FG Per Game,FGA Per Game,FG%,3P Per Game,3PA Per Game,3P%,2P Per Game,...,10-16 feet FG%,16 feet - 3P FG%,% of 2P's that were assisted,% of 3P's that were assisted,% of FGA that are dunks,Dunks Made,% of corner 3PA,3P% from the corner,champion share,MVP_Shares
0,Atlanta Hawks,82,241.5,34.3,76.9,0.446,8.0,22.4,0.359,26.3,...,0.344,0.470,0.505,0.712,0.043,259,0.146,0.410,0.266667,0.004
1,Atlanta Hawks,82,242.4,35.2,77.5,0.455,4.1,12.4,0.332,31.1,...,0.361,0.459,0.516,0.748,0.054,304,0.102,0.288,0.066667,
2,Atlanta Hawks,50,241.5,30.8,75.2,0.409,3.9,12.9,0.306,26.8,...,0.330,0.392,0.477,0.716,0.037,126,0.092,0.322,0.200000,0.001
3,Atlanta Hawks,82,241.8,36.6,83.0,0.441,3.1,9.9,0.317,33.4,...,0.361,0.442,0.489,0.806,0.038,236,0.128,0.452,0.000000,
4,Atlanta Hawks,82,240.6,35.1,81.3,0.431,4.1,11.4,0.357,31.0,...,0.387,0.403,0.507,0.811,0.037,224,0.228,0.371,0.000000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169,Washington Wizards,82,241.5,37.6,83.1,0.452,3.9,11.5,0.339,33.7,...,0.353,0.454,0.589,0.853,0.061,394,0.230,0.364,0.000000,0.001
170,Washington Wizards,50,241.0,35.4,79.4,0.445,3.6,11.6,0.309,31.8,...,0.392,0.435,0.578,0.810,0.036,132,0.198,0.322,0.000000,
171,Washington Wizards,82,241.5,36.7,81.5,0.451,4.1,10.9,0.376,32.6,...,0.375,0.445,0.558,0.833,0.048,300,0.204,0.462,0.000000,
172,Washington Wizards,82,240.0,34.5,78.7,0.439,3.4,10.3,0.324,31.2,...,0.397,0.415,0.563,0.753,0.030,177,0.216,0.361,0.000000,


In [38]:
dfs2 = []
for year in years1:
    with open("shares/{}.html".format(year)) as f:
        page = f.read()
    soup = BeautifulSoup(page, "html.parser")
    all_nba = soup.find(id="leading_all_nba")
    all_nba_votes = pd.read_html(StringIO(str(all_nba)))[0]

    dfs2.append(all_nba_votes)

In [39]:
dfs2

[   Unnamed: 0_level_0 Unnamed: 1_level_0  Unnamed: 2_level_0  \
                  # Tm                Pos              Player   
 0                 1st                  G      Michael Jordan   
 1                 1st                  F         Karl Malone   
 2                 1st                  C     Hakeem Olajuwon   
 3                 1st                  F          Grant Hill   
 4                 1st                  G        Tim Hardaway   
 5                 NaN                NaN                 NaN   
 6                 2nd                  G         Gary Payton   
 7                 2nd                  F      Scottie Pippen   
 8                 2nd                  C       Patrick Ewing   
 9                 2nd                  F           Glen Rice   
 10                2nd                  G      Mitch Richmond   
 11                NaN                NaN                 NaN   
 12                3rd                  G       John Stockton   
 13                3rd   

In [40]:
all_nba_new = pd.concat(dfs2)

In [41]:
all_nba_new

Unnamed: 0_level_0,Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Voting,Voting,Voting,Voting,Voting,...,Per Game,Per Game,Per Game,Per Game,Per Game,Shooting,Shooting,Shooting,Advanced,Advanced
Unnamed: 0_level_1,# Tm,Pos,Player,Age,Tm,Pts Won,Pts Max,Share,1st Tm,2nd Tm,...,PTS,TRB,AST,STL,BLK,FG%,3P%,FT%,WS,WS/48
0,1st,G,Michael Jordan,33.0,CHI,575.0,575.0,1.000,115.0,0.0,...,29.6,5.9,4.3,1.7,0.5,0.486,0.374,0.833,18.3,0.283
1,1st,F,Karl Malone,33.0,UTA,575.0,575.0,1.000,115.0,0.0,...,27.4,9.9,4.5,1.4,0.6,0.550,0.000,0.755,16.7,0.268
2,1st,C,Hakeem Olajuwon,34.0,HOU,514.0,575.0,0.894,89.0,,...,23.2,9.2,3.0,1.5,2.2,0.510,0.313,0.787,9.1,0.154
3,1st,F,Grant Hill,24.0,DET,483.0,575.0,0.840,76.0,,...,21.4,9.0,7.3,1.8,0.6,0.496,0.303,0.711,14.6,0.223
4,1st,G,Tim Hardaway,30.0,MIA,435.0,575.0,0.757,62.0,,...,20.3,3.4,8.6,1.9,0.1,0.415,0.344,0.799,12.9,0.198
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44,ORV,C,Elden Campbell,33.0,CHH,1.0,630.0,0.002,0.0,0.0,...,13.9,6.9,1.3,0.8,1.8,0.484,0.000,0.797,7.6,0.168
45,ORV,G,Sam Cassell,32.0,MIL,1.0,630.0,0.002,0.0,0.0,...,19.7,4.2,6.7,1.2,0.2,0.463,0.348,0.860,8.6,0.158
46,ORV,G,Reggie Miller,36.0,IND,1.0,630.0,0.002,0.0,0.0,...,16.5,2.8,3.2,1.1,0.1,0.453,0.406,0.911,9.5,0.159
47,ORV,G,Cuttino Mobley,26.0,HOU,1.0,630.0,0.002,0.0,0.0,...,21.7,4.1,2.5,1.5,0.5,0.438,0.395,0.850,5.7,0.087


In [42]:
all_nba_new.to_csv("all_nba_new.csv")

In [43]:
df2 = pd.read_csv("all_nba_new.csv")

In [44]:
df2

Unnamed: 0,# Tm,Player,Team,Pts Won,Pts Max,Share,1st Tm,Season
0,1st,Michael Jordan,CHI,575,575,1.000,115,1997
1,1st,Karl Malone,UTA,575,575,1.000,115,1997
2,1st,Hakeem Olajuwon,HOU,514,575,0.894,89,1997
3,1st,Grant Hill,DET,483,575,0.840,76,1997
4,1st,Tim Hardaway,MIA,435,575,0.757,62,1997
...,...,...,...,...,...,...,...,...
261,ORV,Elden Campbell,CHH,1,630,0.002,0,2002
262,ORV,Sam Cassell,MIL,1,630,0.002,0,2002
263,ORV,Reggie Miller,IND,1,630,0.002,0,2002
264,ORV,Cuttino Mobley,HOU,1,630,0.002,0,2002


In [47]:
df2.drop(df2.index[(df2["Team"] == "TOT")], axis=0, inplace=True)

In [49]:
df2['Team'] = df2['Team'].replace(team_names)

In [50]:
df2

Unnamed: 0,# Tm,Player,Team,Pts Won,Pts Max,Share,1st Tm,Season
0,1st,Michael Jordan,Chicago Bulls,575,575,1.000,115,1997
1,1st,Karl Malone,Utah Jazz,575,575,1.000,115,1997
2,1st,Hakeem Olajuwon,Houston Rockets,514,575,0.894,89,1997
3,1st,Grant Hill,Detroit Pistons,483,575,0.840,76,1997
4,1st,Tim Hardaway,Miami Heat,435,575,0.757,62,1997
...,...,...,...,...,...,...,...,...
261,ORV,Elden Campbell,Charlotte Hornets,1,630,0.002,0,2002
262,ORV,Sam Cassell,Milwaukee Bucks,1,630,0.002,0,2002
263,ORV,Reggie Miller,Indiana Pacers,1,630,0.002,0,2002
264,ORV,Cuttino Mobley,Houston Rockets,1,630,0.002,0,2002


In [53]:
df2['Team'] = df2['Team'].replace('WSB','Washington Bullets', regex=True)
df2['Team'] = df2['Team'].replace('VAN','Vancouver Grizzlies', regex=True)

In [54]:
df2

Unnamed: 0,# Tm,Player,Team,Pts Won,Pts Max,Share,1st Tm,Season
0,1st,Michael Jordan,Chicago Bulls,575,575,1.000,115,1997
1,1st,Karl Malone,Utah Jazz,575,575,1.000,115,1997
2,1st,Hakeem Olajuwon,Houston Rockets,514,575,0.894,89,1997
3,1st,Grant Hill,Detroit Pistons,483,575,0.840,76,1997
4,1st,Tim Hardaway,Miami Heat,435,575,0.757,62,1997
...,...,...,...,...,...,...,...,...
261,ORV,Elden Campbell,Charlotte Hornets,1,630,0.002,0,2002
262,ORV,Sam Cassell,Milwaukee Bucks,1,630,0.002,0,2002
263,ORV,Reggie Miller,Indiana Pacers,1,630,0.002,0,2002
264,ORV,Cuttino Mobley,Houston Rockets,1,630,0.002,0,2002


In [56]:
df2['Weighted_Share'] = df2.apply(lambda row: row['Share'] * rank_weights[row['# Tm']], axis=1)
all_nba_voting = df2.groupby(['Team', 'Season'])['Weighted_Share'].sum().reset_index()

In [57]:
all_nba_voting

Unnamed: 0,Team,Season,Weighted_Share
0,Atlanta Hawks,1997,0.1245
1,Atlanta Hawks,1998,0.1950
2,Atlanta Hawks,1999,0.0815
3,Atlanta Hawks,2000,0.0215
4,Atlanta Hawks,2002,0.0040
...,...,...,...
139,Vancouver Grizzlies,2001,0.0025
140,Washington Bullets,1997,0.0310
141,Washington Wizards,1998,0.4550
142,Washington Wizards,1999,0.0240


In [58]:
combine2 = combine1.merge(all_nba_voting, how='outer', on=["Team", "Season"])

In [60]:
combine2

Unnamed: 0,Team,G,MP Per Game,FG Per Game,FGA Per Game,FG%,3P Per Game,3PA Per Game,3P%,2P Per Game,...,16 feet - 3P FG%,% of 2P's that were assisted,% of 3P's that were assisted,% of FGA that are dunks,Dunks Made,% of corner 3PA,3P% from the corner,champion share,MVP_Shares,Weighted_Share
0,Atlanta Hawks,82,241.5,34.3,76.9,0.446,8.0,22.4,0.359,26.3,...,0.470,0.505,0.712,0.043,259,0.146,0.410,0.266667,0.004,0.1245
1,Atlanta Hawks,82,242.4,35.2,77.5,0.455,4.1,12.4,0.332,31.1,...,0.459,0.516,0.748,0.054,304,0.102,0.288,0.066667,,0.1950
2,Atlanta Hawks,50,241.5,30.8,75.2,0.409,3.9,12.9,0.306,26.8,...,0.392,0.477,0.716,0.037,126,0.092,0.322,0.200000,0.001,0.0815
3,Atlanta Hawks,82,241.8,36.6,83.0,0.441,3.1,9.9,0.317,33.4,...,0.442,0.489,0.806,0.038,236,0.128,0.452,0.000000,,0.0215
4,Atlanta Hawks,82,240.6,35.1,81.3,0.431,4.1,11.4,0.357,31.0,...,0.403,0.507,0.811,0.037,224,0.228,0.371,0.000000,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169,Washington Wizards,82,241.5,37.6,83.1,0.452,3.9,11.5,0.339,33.7,...,0.454,0.589,0.853,0.061,394,0.230,0.364,0.000000,0.001,0.4550
170,Washington Wizards,50,241.0,35.4,79.4,0.445,3.6,11.6,0.309,31.8,...,0.435,0.578,0.810,0.036,132,0.198,0.322,0.000000,,0.0240
171,Washington Wizards,82,241.5,36.7,81.5,0.451,4.1,10.9,0.376,32.6,...,0.445,0.558,0.833,0.048,300,0.204,0.462,0.000000,,
172,Washington Wizards,82,240.0,34.5,78.7,0.439,3.4,10.3,0.324,31.2,...,0.415,0.563,0.753,0.030,177,0.216,0.361,0.000000,,


In [61]:
dfs3 = []
for year in years1:
    with open("shares/{}.html".format(year)) as f:
        page = f.read()
    soup = BeautifulSoup(page, "html.parser")
    all_defense = soup.find(id="leading_all_defense")
    all_defense_1 = pd.read_html(StringIO(str(all_defense)))[0]

    dfs3.append(all_defense_1)

In [62]:
dfs3

[   Unnamed: 0_level_0 Unnamed: 1_level_0 Unnamed: 2_level_0  \
                  # Tm                Pos             Player   
 0                 1st                  F     Scottie Pippen   
 1                 1st                  G     Michael Jordan   
 2                 1st                  C    Dikembe Mutombo   
 3                 1st                  G        Gary Payton   
 4                 1st                  F        Karl Malone   
 5                 NaN                NaN                NaN   
 6                 2nd                  G    Mookie Blaylock   
 7                 2nd                  C    Hakeem Olajuwon   
 8                 2nd                  F      Anthony Mason   
 9                 2nd                  F         P.J. Brown   
 10                2nd                  G      John Stockton   
 11                NaN                NaN                NaN   
 12                ORV                  F     Charles Oakley   
 13                ORV                  

In [63]:
df_3 = pd.concat(dfs3)

In [64]:
df_3

Unnamed: 0_level_0,Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Voting,Voting,Voting,Unnamed: 8_level_0,Per Game,Per Game,Per Game,Per Game,Shooting,Shooting,Shooting,Advanced,Advanced,Advanced,Advanced,Advanced
Unnamed: 0_level_1,# Tm,Pos,Player,Age,Tm,Pts Won,Pts Max,Share,G,MP,...,STL,BLK,FG%,3P%,FT%,WS,WS/48,DWS,DBPM,DRtg
0,1st,F,Scottie Pippen,31.0,CHI,53.0,58.0,0.914,82.0,37.7,...,1.9,0.5,0.474,0.368,0.701,13.1,0.203,5.2,1.4,101.0
1,1st,G,Michael Jordan,33.0,CHI,51.0,58.0,0.879,82.0,37.9,...,1.7,0.5,0.486,0.374,0.833,18.3,0.283,5.0,1.1,102.0
2,1st,C,Dikembe Mutombo,30.0,ATL,50.0,58.0,0.862,80.0,37.2,...,0.6,3.3,0.527,,0.705,11.3,0.183,6.6,2.4,97.0
3,1st,G,Gary Payton,28.0,SEA,49.0,58.0,0.845,82.0,39.2,...,2.4,0.2,0.476,0.313,0.715,12.9,0.193,4.7,1.1,103.0
4,1st,F,Karl Malone,33.0,UTA,25.0,58.0,0.431,82.0,36.6,...,1.4,0.6,0.550,0.000,0.755,16.7,0.268,5.1,1.2,101.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42,ORV,G,Paul Pierce,24.0,BOS,1.0,58.0,0.017,82.0,40.3,...,1.9,1.0,0.442,0.404,0.809,12.9,0.187,5.6,1.3,99.0
43,ORV,F,Malik Rose,27.0,SAS,1.0,58.0,0.017,82.0,21.0,...,0.9,0.5,0.463,0.083,0.720,4.4,0.123,3.4,0.3,97.0
44,ORV,G,Latrell Sprewell,31.0,NYK,1.0,58.0,0.017,81.0,41.1,...,1.2,0.2,0.404,0.360,0.821,2.8,0.041,1.7,-1.1,109.0
45,ORV,G,John Stockton,39.0,UTA,1.0,58.0,0.017,82.0,31.3,...,1.9,0.3,0.517,0.321,0.857,10.7,0.200,2.6,1.3,105.0


In [65]:
df_3.to_csv("all_defense_new.csv")

In [66]:
defense1 = pd.read_csv("all_defense_new.csv")

In [67]:
defense1

Unnamed: 0,# Tm,Player,Team,Pts Won,Pts Max,Share,Season
0,1st,Scottie Pippen,CHI,53,58,0.914,1997
1,1st,Michael Jordan,CHI,51,58,0.879,1997
2,1st,Dikembe Mutombo,ATL,50,58,0.862,1997
3,1st,Gary Payton,SEA,49,58,0.845,1997
4,1st,Karl Malone,UTA,25,58,0.431,1997
...,...,...,...,...,...,...,...
248,ORV,Paul Pierce,BOS,1,58,0.017,2002
249,ORV,Malik Rose,SAS,1,58,0.017,2002
250,ORV,Latrell Sprewell,NYK,1,58,0.017,2002
251,ORV,John Stockton,UTA,1,58,0.017,2002


In [68]:
defense1['Team'] = defense1['Team'].replace(team_names)

In [69]:
defense1

Unnamed: 0,# Tm,Player,Team,Pts Won,Pts Max,Share,Season
0,1st,Scottie Pippen,Chicago Bulls,53,58,0.914,1997
1,1st,Michael Jordan,Chicago Bulls,51,58,0.879,1997
2,1st,Dikembe Mutombo,Atlanta Hawks,50,58,0.862,1997
3,1st,Gary Payton,Seattle SuperSonics,49,58,0.845,1997
4,1st,Karl Malone,Utah Jazz,25,58,0.431,1997
...,...,...,...,...,...,...,...
248,ORV,Paul Pierce,Boston Celtics,1,58,0.017,2002
249,ORV,Malik Rose,San Antonio Spurs,1,58,0.017,2002
250,ORV,Latrell Sprewell,New York Knicks,1,58,0.017,2002
251,ORV,John Stockton,Utah Jazz,1,58,0.017,2002


In [70]:
defense1.drop(defense1.index[(defense1['Team'] == "TOT")], axis=0, inplace=True)

In [73]:
defense1

Unnamed: 0,# Tm,Player,Team,Pts Won,Pts Max,Share,Season
0,1st,Scottie Pippen,Chicago Bulls,53,58,0.914,1997
1,1st,Michael Jordan,Chicago Bulls,51,58,0.879,1997
2,1st,Dikembe Mutombo,Atlanta Hawks,50,58,0.862,1997
3,1st,Gary Payton,Seattle SuperSonics,49,58,0.845,1997
4,1st,Karl Malone,Utah Jazz,25,58,0.431,1997
...,...,...,...,...,...,...,...
248,ORV,Paul Pierce,Boston Celtics,1,58,0.017,2002
249,ORV,Malik Rose,San Antonio Spurs,1,58,0.017,2002
250,ORV,Latrell Sprewell,New York Knicks,1,58,0.017,2002
251,ORV,John Stockton,Utah Jazz,1,58,0.017,2002


In [75]:
ranked_weights = {
    '1st': 1,
    '2nd': 0.75,
    'ORV': 0.5
}
defense1['DPOY_Share'] = defense1.apply(lambda row: row['Share'] * ranked_weights[row['# Tm']], axis=1)
dpoy_shares_new = defense1.groupby(['Team', 'Season'])['DPOY_Share'].sum().reset_index()

In [76]:
dpoy_shares_new

Unnamed: 0,Team,Season,DPOY_Share
0,Atlanta Hawks,1997,1.31425
1,Atlanta Hawks,1998,1.12575
2,Atlanta Hawks,1999,0.79325
3,Atlanta Hawks,2000,0.07750
4,Boston Celtics,1997,0.00850
...,...,...,...
119,Utah Jazz,1999,0.41350
120,Utah Jazz,2000,0.13800
121,Utah Jazz,2001,0.08600
122,Utah Jazz,2002,0.05150


In [77]:
combine3 = combine2.merge(dpoy_shares_new, how='outer', on=['Team', 'Season'])

In [78]:
combine3

Unnamed: 0,Team,G,MP Per Game,FG Per Game,FGA Per Game,FG%,3P Per Game,3PA Per Game,3P%,2P Per Game,...,% of 2P's that were assisted,% of 3P's that were assisted,% of FGA that are dunks,Dunks Made,% of corner 3PA,3P% from the corner,champion share,MVP_Shares,Weighted_Share,DPOY_Share
0,Atlanta Hawks,82,241.5,34.3,76.9,0.446,8.0,22.4,0.359,26.3,...,0.505,0.712,0.043,259,0.146,0.410,0.266667,0.004,0.1245,1.31425
1,Atlanta Hawks,82,242.4,35.2,77.5,0.455,4.1,12.4,0.332,31.1,...,0.516,0.748,0.054,304,0.102,0.288,0.066667,,0.1950,1.12575
2,Atlanta Hawks,50,241.5,30.8,75.2,0.409,3.9,12.9,0.306,26.8,...,0.477,0.716,0.037,126,0.092,0.322,0.200000,0.001,0.0815,0.79325
3,Atlanta Hawks,82,241.8,36.6,83.0,0.441,3.1,9.9,0.317,33.4,...,0.489,0.806,0.038,236,0.128,0.452,0.000000,,0.0215,0.07750
4,Atlanta Hawks,82,240.6,35.1,81.3,0.431,4.1,11.4,0.357,31.0,...,0.507,0.811,0.037,224,0.228,0.371,0.000000,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169,Washington Wizards,82,241.5,37.6,83.1,0.452,3.9,11.5,0.339,33.7,...,0.589,0.853,0.061,394,0.230,0.364,0.000000,0.001,0.4550,
170,Washington Wizards,50,241.0,35.4,79.4,0.445,3.6,11.6,0.309,31.8,...,0.578,0.810,0.036,132,0.198,0.322,0.000000,,0.0240,0.03450
171,Washington Wizards,82,241.5,36.7,81.5,0.451,4.1,10.9,0.376,32.6,...,0.558,0.833,0.048,300,0.204,0.462,0.000000,,,
172,Washington Wizards,82,240.0,34.5,78.7,0.439,3.4,10.3,0.324,31.2,...,0.563,0.753,0.030,177,0.216,0.361,0.000000,,,


In [81]:
adv = pd.read_csv("advanced.csv")

In [82]:
adv

Unnamed: 0,Team,Age,W,L,PW,PL,MOV,SOS,SRS,ORtg,...,TS%,eFG%,TOV%,ORB%,FT/FGA,eFG%.1,TOV%.1,DRB%,FT/FGA.1,Season
0,Chicago Bulls*,30.7,69,13,68,14,10.80,-0.11,10.70,114.4,...,0.547,0.511,12.5,35.9,0.199,0.471,14.8,69.3,0.196,1997
1,Utah Jazz*,29.6,64,18,64,18,8.79,-0.82,7.97,113.6,...,0.581,0.530,14.7,29.7,0.299,0.480,15.5,71.6,0.290,1997
2,Seattle SuperSonics*,30.0,57,25,62,20,7.68,-0.77,6.91,111.2,...,0.557,0.510,14.2,30.9,0.269,0.490,17.7,69.1,0.242,1997
3,Atlanta Hawks*,28.8,56,26,57,25,5.44,0.08,5.52,108.5,...,0.542,0.498,14.6,31.5,0.236,0.472,15.0,69.2,0.191,1997
4,Miami Heat*,27.2,61,21,57,25,5.49,0.07,5.56,106.8,...,0.546,0.507,15.5,29.2,0.233,0.465,15.6,70.6,0.248,1997
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169,Houston Rockets,25.9,28,54,27,55,-4.89,0.58,-4.31,103.1,...,0.507,0.465,13.4,29.0,0.211,0.491,11.5,69.8,0.178,2002
170,Golden State Warriors,24.8,21,61,26,56,-5.40,0.61,-4.80,102.8,...,0.499,0.452,14.7,35.3,0.242,0.488,12.9,69.4,0.232,2002
171,Denver Nuggets,28.2,27,55,24,58,-5.82,0.63,-5.19,101.4,...,0.495,0.455,13.6,30.6,0.190,0.492,13.6,69.9,0.245,2002
172,Memphis Grizzlies,25.5,23,59,20,62,-7.44,0.70,-6.74,98.2,...,0.501,0.462,15.4,27.8,0.204,0.486,13.7,68.0,0.197,2002


In [83]:
adv["Team"] = adv["Team"].str.replace("*", "", regex=False)

In [84]:
adv

Unnamed: 0,Team,Age,W,L,PW,PL,MOV,SOS,SRS,ORtg,...,TS%,eFG%,TOV%,ORB%,FT/FGA,eFG%.1,TOV%.1,DRB%,FT/FGA.1,Season
0,Chicago Bulls,30.7,69,13,68,14,10.80,-0.11,10.70,114.4,...,0.547,0.511,12.5,35.9,0.199,0.471,14.8,69.3,0.196,1997
1,Utah Jazz,29.6,64,18,64,18,8.79,-0.82,7.97,113.6,...,0.581,0.530,14.7,29.7,0.299,0.480,15.5,71.6,0.290,1997
2,Seattle SuperSonics,30.0,57,25,62,20,7.68,-0.77,6.91,111.2,...,0.557,0.510,14.2,30.9,0.269,0.490,17.7,69.1,0.242,1997
3,Atlanta Hawks,28.8,56,26,57,25,5.44,0.08,5.52,108.5,...,0.542,0.498,14.6,31.5,0.236,0.472,15.0,69.2,0.191,1997
4,Miami Heat,27.2,61,21,57,25,5.49,0.07,5.56,106.8,...,0.546,0.507,15.5,29.2,0.233,0.465,15.6,70.6,0.248,1997
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169,Houston Rockets,25.9,28,54,27,55,-4.89,0.58,-4.31,103.1,...,0.507,0.465,13.4,29.0,0.211,0.491,11.5,69.8,0.178,2002
170,Golden State Warriors,24.8,21,61,26,56,-5.40,0.61,-4.80,102.8,...,0.499,0.452,14.7,35.3,0.242,0.488,12.9,69.4,0.232,2002
171,Denver Nuggets,28.2,27,55,24,58,-5.82,0.63,-5.19,101.4,...,0.495,0.455,13.6,30.6,0.190,0.492,13.6,69.9,0.245,2002
172,Memphis Grizzlies,25.5,23,59,20,62,-7.44,0.70,-6.74,98.2,...,0.501,0.462,15.4,27.8,0.204,0.486,13.7,68.0,0.197,2002


In [85]:
combine4 = combine3.merge(adv, how='outer', on=['Team', 'Season'])

In [86]:
combine4

Unnamed: 0,Team,G,MP Per Game,FG Per Game,FGA Per Game,FG%,3P Per Game,3PA Per Game,3P%,2P Per Game,...,3PAr,TS%,eFG%,TOV%,ORB%,FT/FGA,eFG%.1,TOV%.1,DRB%,FT/FGA.1
0,Atlanta Hawks,82,241.5,34.3,76.9,0.446,8.0,22.4,0.359,26.3,...,0.291,0.542,0.498,14.6,31.5,0.236,0.472,15.0,69.2,0.191
1,Atlanta Hawks,82,242.4,35.2,77.5,0.455,4.1,12.4,0.332,31.1,...,0.160,0.533,0.481,14.1,33.9,0.275,0.468,13.0,69.1,0.194
2,Atlanta Hawks,50,241.5,30.8,75.2,0.409,3.9,12.9,0.306,26.8,...,0.171,0.492,0.436,14.5,33.1,0.277,0.437,13.4,72.1,0.202
3,Atlanta Hawks,82,241.8,36.6,83.0,0.441,3.1,9.9,0.317,33.4,...,0.120,0.503,0.460,14.1,31.7,0.217,0.481,11.3,71.0,0.196
4,Atlanta Hawks,82,240.6,35.1,81.3,0.431,4.1,11.4,0.357,31.0,...,0.140,0.500,0.456,15.5,28.9,0.206,0.472,13.8,70.8,0.249
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169,Washington Wizards,82,241.5,37.6,83.1,0.452,3.9,11.5,0.339,33.7,...,0.139,0.513,0.476,13.0,31.2,0.219,0.481,15.1,69.5,0.248
170,Washington Wizards,50,241.0,35.4,79.4,0.445,3.6,11.6,0.309,31.8,...,0.146,0.507,0.468,14.1,28.2,0.213,0.474,15.1,69.5,0.273
171,Washington Wizards,82,241.5,36.7,81.5,0.451,4.1,10.9,0.376,32.6,...,0.133,0.521,0.476,14.8,30.6,0.234,0.489,13.9,71.7,0.273
172,Washington Wizards,82,240.0,34.5,78.7,0.439,3.4,10.3,0.324,31.2,...,0.131,0.514,0.460,15.7,29.7,0.264,0.503,14.1,71.8,0.236


In [97]:
combine4.to_csv("combined_really_new.csv")

In [98]:
combine5 = pd.read_csv("combined_really_new.csv")

In [99]:
stats = pd.read_csv("combined_stats_1.csv")

In [100]:
stats

Unnamed: 0,Team,G,MP Per Game,FG Per Game,FGA Per Game,3P Per Game,3PA Per Game,2P Per Game,2PA Per Game,FT Per Game,...,% of 2P's that were assisted,% of 3P's that were assisted,% of FGA that are dunks,Dunks Made,% of corner 3PA,3P% from the corner,champion share,MVP_Shares,Weighted_Share,DPOY_Share
0,Atlanta Hawks,82,242.7,34.9,78.5,4.9,13.9,30.0,64.5,19.4,...,0.543,0.856,0.048,297,0.276,0.397,0.0000,,0.0015,0.0690
1,Atlanta Hawks,82,242.7,34.5,79.6,5.1,15.2,29.4,64.4,18.7,...,0.537,0.845,0.045,289,0.292,0.370,0.0000,,,
2,Atlanta Hawks,82,242.1,35.9,81.4,3.7,11.9,32.2,69.5,17.3,...,0.511,0.872,0.056,354,0.265,0.295,0.0000,,,
3,Atlanta Hawks,82,242.4,35.9,79.2,5.2,14.1,30.8,65.1,20.2,...,0.504,0.837,0.047,290,0.278,0.383,0.0000,,0.0015,
4,Atlanta Hawks,82,242.4,34.5,77.7,4.2,12.7,30.4,65.0,20.5,...,0.518,0.833,0.047,273,0.253,0.373,0.0000,,0.0025,0.0335
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
623,Washington Wizards,82,243.0,42.1,90.1,11.3,33.3,30.8,56.8,18.4,...,0.533,0.868,0.067,454,0.213,0.360,0.0000,,0.0340,0.0025
624,Washington Wizards,72,241.0,41.5,90.9,12.0,32.6,29.5,58.3,19.4,...,0.509,0.832,0.047,276,0.167,0.388,0.0000,,0.0320,
625,Washington Wizards,72,241.7,43.2,90.9,10.2,29.0,33.0,61.9,20.1,...,0.501,0.880,0.055,320,0.231,0.369,0.0625,0.005,0.1860,0.0025
626,Washington Wizards,82,241.8,40.6,86.0,10.5,30.6,30.1,55.4,17.0,...,0.532,0.860,0.065,408,0.246,0.397,0.0000,,,


In [103]:
column_order = stats.columns

In [105]:
combine5 = combine5[column_order]

In [106]:
merged_df = pd.concat([combine5, stats], ignore_index=True)

In [109]:
merged_df

Unnamed: 0,Team,G,MP Per Game,FG Per Game,FGA Per Game,3P Per Game,3PA Per Game,2P Per Game,2PA Per Game,FT Per Game,...,% of 2P's that were assisted,% of 3P's that were assisted,% of FGA that are dunks,Dunks Made,% of corner 3PA,3P% from the corner,champion share,MVP_Shares,Weighted_Share,DPOY_Share
0,Atlanta Hawks,82,241.5,34.3,76.9,8.0,22.4,26.3,54.5,18.2,...,0.505,0.712,0.043,259,0.146,0.410,0.266667,0.004,0.1245,1.31425
1,Atlanta Hawks,82,242.4,35.2,77.5,4.1,12.4,31.1,65.1,21.3,...,0.516,0.748,0.054,304,0.102,0.288,0.066667,,0.1950,1.12575
2,Atlanta Hawks,50,241.5,30.8,75.2,3.9,12.9,26.8,62.3,20.8,...,0.477,0.716,0.037,126,0.092,0.322,0.200000,0.001,0.0815,0.79325
3,Atlanta Hawks,82,241.8,36.6,83.0,3.1,9.9,33.4,73.1,18.0,...,0.489,0.806,0.038,236,0.128,0.452,0.000000,,0.0215,0.07750
4,Atlanta Hawks,82,240.6,35.1,81.3,4.1,11.4,31.0,69.9,16.8,...,0.507,0.811,0.037,224,0.228,0.371,0.000000,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
797,Washington Wizards,82,243.0,42.1,90.1,11.3,33.3,30.8,56.8,18.4,...,0.533,0.868,0.067,454,0.213,0.360,0.000000,,0.0340,0.00250
798,Washington Wizards,72,241.0,41.5,90.9,12.0,32.6,29.5,58.3,19.4,...,0.509,0.832,0.047,276,0.167,0.388,0.000000,,0.0320,
799,Washington Wizards,72,241.7,43.2,90.9,10.2,29.0,33.0,61.9,20.1,...,0.501,0.880,0.055,320,0.231,0.369,0.062500,0.005,0.1860,0.00250
800,Washington Wizards,82,241.8,40.6,86.0,10.5,30.6,30.1,55.4,17.0,...,0.532,0.860,0.065,408,0.246,0.397,0.000000,,,


In [112]:
merged_df.fillna(0, inplace=True)

In [116]:
merged_df

Unnamed: 0,Team,G,MP Per Game,FG Per Game,FGA Per Game,3P Per Game,3PA Per Game,2P Per Game,2PA Per Game,FT Per Game,...,% of 2P's that were assisted,% of 3P's that were assisted,% of FGA that are dunks,Dunks Made,% of corner 3PA,3P% from the corner,champion share,MVP_Shares,Weighted_Share,DPOY_Share
0,Atlanta Hawks,82,241.5,34.3,76.9,8.0,22.4,26.3,54.5,18.2,...,0.505,0.712,0.043,259,0.146,0.410,0.266667,0.004,0.1245,1.31425
1,Atlanta Hawks,82,242.4,35.2,77.5,4.1,12.4,31.1,65.1,21.3,...,0.516,0.748,0.054,304,0.102,0.288,0.066667,0.000,0.1950,1.12575
2,Atlanta Hawks,50,241.5,30.8,75.2,3.9,12.9,26.8,62.3,20.8,...,0.477,0.716,0.037,126,0.092,0.322,0.200000,0.001,0.0815,0.79325
3,Atlanta Hawks,82,241.8,36.6,83.0,3.1,9.9,33.4,73.1,18.0,...,0.489,0.806,0.038,236,0.128,0.452,0.000000,0.000,0.0215,0.07750
4,Atlanta Hawks,82,240.6,35.1,81.3,4.1,11.4,31.0,69.9,16.8,...,0.507,0.811,0.037,224,0.228,0.371,0.000000,0.000,0.0000,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
797,Washington Wizards,82,243.0,42.1,90.1,11.3,33.3,30.8,56.8,18.4,...,0.533,0.868,0.067,454,0.213,0.360,0.000000,0.000,0.0340,0.00250
798,Washington Wizards,72,241.0,41.5,90.9,12.0,32.6,29.5,58.3,19.4,...,0.509,0.832,0.047,276,0.167,0.388,0.000000,0.000,0.0320,0.00000
799,Washington Wizards,72,241.7,43.2,90.9,10.2,29.0,33.0,61.9,20.1,...,0.501,0.880,0.055,320,0.231,0.369,0.062500,0.005,0.1860,0.00250
800,Washington Wizards,82,241.8,40.6,86.0,10.5,30.6,30.1,55.4,17.0,...,0.532,0.860,0.065,408,0.246,0.397,0.000000,0.000,0.0000,0.00000


In [115]:
merged_df.to_csv("combined_stats_2.csv")

In [152]:
coaches = pd.read_csv("coaches.csv")

In [153]:
coaches

Unnamed: 0,Coach,Team,Seasons With Franchise,Seasons Coaching,G (CS),W (CS),L (CS),G (F),W (F),L (F),...,G (P),W (P),L (P),G (PF),W (PF),L (PF),G (P Car),W (P Car),L (P Car),Season
0,Lenny Wilkens,ATL,4,24,82,56,26,328,201,127,...,10.0,4.0,6.0,34.0,13.0,21.0,144.0,68.0,76.0,1997
1,M.L. Carr,BOS,2,2,82,15,67,164,48,116,...,,,,,,,,,,1997
2,Dave Cowens,CHH,1,2,82,54,28,82,54,28,...,3.0,0.0,3.0,3.0,0.0,3.0,3.0,0.0,3.0,1997
3,Phil Jackson,CHI,8,8,82,69,13,656,483,173,...,19.0,15.0,4.0,131.0,96.0,35.0,131.0,96.0,35.0,1997
4,Mike Fratello,CLE,4,12,82,42,40,328,179,149,...,,,,10.0,1.0,9.0,50.0,19.0,31.0,1997
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
801,Mike Brown,SAC,1,9,82,48,34,82,48,34,...,7.0,3.0,4.0,7.0,3.0,4.0,90.0,50.0,40.0,2023
802,Gregg Popovich,SAS,27,27,82,22,60,2127,1366,761,...,,,,284.0,170.0,114.0,284.0,170.0,114.0,2023
803,Nick Nurse,TOR,5,5,82,41,41,390,227,163,...,,,,41.0,25.0,16.0,41.0,25.0,16.0,2023
804,Will Hardy,UTA,1,1,82,37,45,82,37,45,...,,,,,,,,,,2023


In [154]:
coaches["Team"] = coaches["Team"].replace(team_names)

In [155]:
coaches

Unnamed: 0,Coach,Team,Seasons With Franchise,Seasons Coaching,G (CS),W (CS),L (CS),G (F),W (F),L (F),...,G (P),W (P),L (P),G (PF),W (PF),L (PF),G (P Car),W (P Car),L (P Car),Season
0,Lenny Wilkens,Atlanta Hawks,4,24,82,56,26,328,201,127,...,10.0,4.0,6.0,34.0,13.0,21.0,144.0,68.0,76.0,1997
1,M.L. Carr,Boston Celtics,2,2,82,15,67,164,48,116,...,,,,,,,,,,1997
2,Dave Cowens,Charlotte Hornets,1,2,82,54,28,82,54,28,...,3.0,0.0,3.0,3.0,0.0,3.0,3.0,0.0,3.0,1997
3,Phil Jackson,Chicago Bulls,8,8,82,69,13,656,483,173,...,19.0,15.0,4.0,131.0,96.0,35.0,131.0,96.0,35.0,1997
4,Mike Fratello,Cleveland Cavaliers,4,12,82,42,40,328,179,149,...,,,,10.0,1.0,9.0,50.0,19.0,31.0,1997
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
801,Mike Brown,Sacramento Kings,1,9,82,48,34,82,48,34,...,7.0,3.0,4.0,7.0,3.0,4.0,90.0,50.0,40.0,2023
802,Gregg Popovich,San Antonio Spurs,27,27,82,22,60,2127,1366,761,...,,,,284.0,170.0,114.0,284.0,170.0,114.0,2023
803,Nick Nurse,Toronto Raptors,5,5,82,41,41,390,227,163,...,,,,41.0,25.0,16.0,41.0,25.0,16.0,2023
804,Will Hardy,Utah Jazz,1,1,82,37,45,82,37,45,...,,,,,,,,,,2023


In [156]:
coaches['Team'] = coaches['Team'].replace('WSB','Washington Bullets', regex=True)
coaches['Team'] = coaches['Team'].replace('VAN','Vancouver Grizzlies', regex=True)
coaches['Team'] = coaches['Team'].replace('CHO', 'Charlotte Hornets', regex=True)

In [157]:
coaches

Unnamed: 0,Coach,Team,Seasons With Franchise,Seasons Coaching,G (CS),W (CS),L (CS),G (F),W (F),L (F),...,G (P),W (P),L (P),G (PF),W (PF),L (PF),G (P Car),W (P Car),L (P Car),Season
0,Lenny Wilkens,Atlanta Hawks,4,24,82,56,26,328,201,127,...,10.0,4.0,6.0,34.0,13.0,21.0,144.0,68.0,76.0,1997
1,M.L. Carr,Boston Celtics,2,2,82,15,67,164,48,116,...,,,,,,,,,,1997
2,Dave Cowens,Charlotte Hornets,1,2,82,54,28,82,54,28,...,3.0,0.0,3.0,3.0,0.0,3.0,3.0,0.0,3.0,1997
3,Phil Jackson,Chicago Bulls,8,8,82,69,13,656,483,173,...,19.0,15.0,4.0,131.0,96.0,35.0,131.0,96.0,35.0,1997
4,Mike Fratello,Cleveland Cavaliers,4,12,82,42,40,328,179,149,...,,,,10.0,1.0,9.0,50.0,19.0,31.0,1997
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
801,Mike Brown,Sacramento Kings,1,9,82,48,34,82,48,34,...,7.0,3.0,4.0,7.0,3.0,4.0,90.0,50.0,40.0,2023
802,Gregg Popovich,San Antonio Spurs,27,27,82,22,60,2127,1366,761,...,,,,284.0,170.0,114.0,284.0,170.0,114.0,2023
803,Nick Nurse,Toronto Raptors,5,5,82,41,41,390,227,163,...,,,,41.0,25.0,16.0,41.0,25.0,16.0,2023
804,Will Hardy,Utah Jazz,1,1,82,37,45,82,37,45,...,,,,,,,,,,2023


In [158]:
del coaches['Coach']

In [159]:
del coaches['G (CS)']
del coaches['W (CS)']
del coaches['L (CS)']
del coaches['G (P)']

In [160]:
del coaches['L (P)']
del coaches['W (P)']

In [161]:
coaches

Unnamed: 0,Team,Seasons With Franchise,Seasons Coaching,G (F),W (F),L (F),G (Car),W (Car),L (Car),W% (Car),G (PF),W (PF),L (PF),G (P Car),W (P Car),L (P Car),Season
0,Atlanta Hawks,4,24,328,201,127,1946,1070,876,0.550,34.0,13.0,21.0,144.0,68.0,76.0,1997
1,Boston Celtics,2,2,164,48,116,164,48,116,0.293,,,,,,,1997
2,Charlotte Hornets,1,2,82,54,28,150,81,69,0.540,3.0,0.0,3.0,3.0,0.0,3.0,1997
3,Chicago Bulls,8,8,656,483,173,656,483,173,0.736,131.0,96.0,35.0,131.0,96.0,35.0,1997
4,Cleveland Cavaliers,4,12,328,179,149,905,503,402,0.556,10.0,1.0,9.0,50.0,19.0,31.0,1997
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
801,Sacramento Kings,1,9,82,48,34,645,395,250,0.612,7.0,3.0,4.0,90.0,50.0,40.0,2023
802,San Antonio Spurs,27,27,2127,1366,761,2127,1366,761,0.642,284.0,170.0,114.0,284.0,170.0,114.0,2023
803,Toronto Raptors,5,5,390,227,163,390,227,163,0.582,41.0,25.0,16.0,41.0,25.0,16.0,2023
804,Utah Jazz,1,1,82,37,45,82,37,45,0.451,,,,,,,2023


In [162]:
combine5 = merged_df.merge(coaches, how='outer', on=['Team', 'Season'])

In [163]:
combine5 

Unnamed: 0,Team,G,MP Per Game,FG Per Game,FGA Per Game,3P Per Game,3PA Per Game,2P Per Game,2PA Per Game,FT Per Game,...,G (Car),W (Car),L (Car),W% (Car),G (PF),W (PF),L (PF),G (P Car),W (P Car),L (P Car)
0,Atlanta Hawks,82,241.5,34.3,76.9,8.0,22.4,26.3,54.5,18.2,...,1946,1070,876,0.550,34.0,13.0,21.0,144.0,68.0,76.0
1,Atlanta Hawks,82,242.4,35.2,77.5,4.1,12.4,31.1,65.1,21.3,...,2028,1120,908,0.552,38.0,14.0,24.0,148.0,69.0,79.0
2,Atlanta Hawks,50,241.5,30.8,75.2,3.9,12.9,26.8,62.3,20.8,...,2078,1151,927,0.554,47.0,17.0,30.0,157.0,72.0,85.0
3,Atlanta Hawks,82,241.8,36.6,83.0,3.1,9.9,33.4,73.1,18.0,...,2160,1179,981,0.546,47.0,17.0,30.0,157.0,72.0,85.0
4,Atlanta Hawks,82,240.6,35.1,81.3,4.1,11.4,31.0,69.9,16.8,...,82,25,57,0.305,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
801,Washington Wizards,82,243.0,42.1,90.1,11.3,33.3,30.8,56.8,18.4,...,791,462,329,0.584,19.0,9.0,10.0,92.0,48.0,44.0
802,Washington Wizards,72,241.0,41.5,90.9,12.0,32.6,29.5,58.3,19.4,...,863,487,376,0.564,19.0,9.0,10.0,92.0,48.0,44.0
803,Washington Wizards,72,241.7,43.2,90.9,10.2,29.0,33.0,61.9,20.1,...,935,521,414,0.557,24.0,10.0,14.0,97.0,49.0,48.0
804,Washington Wizards,82,241.8,40.6,86.0,10.5,30.6,30.1,55.4,17.0,...,82,35,47,0.427,,,,,,


In [165]:
combine5.fillna(0, inplace=True)

In [167]:
combine5.to_csv("final_stats_NBA.csv")