### Importing necessary packages and libraries

In [60]:
from selenium import webdriver 
from bs4 import BeautifulSoup
from bs4 import Comment
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import time
import requests



### Using Requests and BeautifulSoup to scrape NBA data for players

In [2]:
years = list(range(2000, 2024))


In [47]:
# Scrape MVP awards table from 2000-2023 and save as an HTML file
for year in years:
    base_url = f"https://www.basketball-reference.com/awards/awards_{year}.html"
    data = requests.get(base_url)

    with open(f'MVP_Data/{year}.html', 'w+', encoding="utf-8") as f:
        f.write(data.text)

In [13]:
# Open each HTML file, parse out the table, create pandas dataframe
df = []
for year in years:
    with open(f"MVP_data/{year}.html", errors="ignore") as f:
        page = f.read()

        soup = BeautifulSoup(page, "html.parser")
        remove_header = soup.find("tr", class_='over_header').decompose()
        mvp_table = soup.find(id='mvp')
        mvp_df = pd.read_html(str(mvp_table))[0]
        mvp_df['Year'] = year
        df.append(mvp_df)



In [4]:
mvp_data = pd.concat(df)

In [5]:
mvp_data.to_csv(f'MVP_data/mvp_awards.csv')

In [55]:
with open(f'MVP_data/2000.html', errors="ignore") as f:
    page = f.read()
    soup = BeautifulSoup(page, "html.parser")

In [58]:
soup.find(id="roy")

<table class="sortable stats_table" data-cols-to-freeze=",2" id="roy">
<caption>Rookie of the Year (Wilt Chamberlain Trophy) Table</caption>
<colgroup><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/></colgroup>
<thead>
<tr class="over_header">
<th aria-label="" class="over_header center" colspan="4" data-stat=""></th>
<th aria-label="" class="over_header center" colspan="4" data-stat="header_award_voting">Voting</th><th></th>
<th aria-label="" class="over_header center" colspan="6" data-stat="header_per_g">Per Game</th>
<th aria-label="" class="over_header center" colspan="3" data-stat="header_shooting">Shooting</th>
<th aria-label="" class="over_header center" colspan="2" data-stat="header_advanced">Advanced</th>
</tr>
<tr>
<th aria-label="rank" class="poptip center" data-stat="rank" scope="col">Rank</th>
<th aria-label="Player" class="poptip sort_default_asc left" data-stat="player" scope="col">Player</th>
<th ar

In [61]:
soupTables = BeautifulSoup(''.join(soup.find_all(string=lambda text: isinstance(text, Comment) and '<table' in text)))


In [67]:
dpoy_table = soupTables.find('table', id="dpoy")

In [73]:
dpoy_table.find("tr", class_="over_header").decompose()
dpoy_df = pd.read_html(str(dpoy_table))[0]


In [74]:
dpoy_df

Unnamed: 0,Rank,Player,Age,Tm,First,Pts Won,Pts Max,Share,G,MP,...,STL,BLK,FG%,3P%,FT%,WS,WS/48,DWS,DBPM,DRtg
0,1,Alonzo Mourning,29,MIA,62.0,62.0,121,0.512,79,34.8,...,0.5,3.7,0.551,0.0,0.711,12.9,0.226,5.6,2.2,96
1,2,Shaquille O'Neal,27,LAL,21.0,21.0,121,0.174,79,40.0,...,0.5,3.0,0.574,0.0,0.524,18.6,0.283,7.0,2.0,95
2,3T,Eddie Jones,28,CHH,11.0,11.0,121,0.091,72,39.0,...,2.7,0.7,0.427,0.375,0.864,10.1,0.172,4.4,2.0,100
3,3T,Dikembe Mutombo,33,ATL,11.0,11.0,121,0.091,82,36.4,...,0.3,3.3,0.562,,0.708,9.9,0.16,4.3,1.5,101
4,5T,Kobe Bryant,21,LAL,4.0,4.0,121,0.033,66,38.2,...,1.6,0.9,0.468,0.319,0.821,10.6,0.202,4.5,1.6,98
5,5T,Gary Payton,31,SEA,4.0,4.0,121,0.033,82,41.8,...,1.9,0.2,0.448,0.34,0.735,13.9,0.195,3.5,0.7,104
6,7,Kevin Garnett,23,MIN,2.0,2.0,121,0.017,81,40.0,...,1.5,1.6,0.497,0.37,0.765,11.6,0.172,5.6,1.7,99
7,8T,Kendall Gill,31,NJN,1.0,1.0,121,0.008,76,31.0,...,1.8,0.5,0.414,0.256,0.71,3.7,0.076,2.1,0.7,105
8,8T,Jason Kidd,26,PHO,1.0,1.0,121,0.008,67,39.0,...,2.0,0.4,0.409,0.337,0.829,8.5,0.156,4.8,2.0,98
9,8T,Anthony Mason,33,CHH,1.0,1.0,121,0.008,82,38.2,...,0.9,0.4,0.48,0.0,0.746,8.9,0.136,3.9,0.7,102


In [79]:
soupTables.find(id="mip")

<table class="sortable stats_table" data-cols-to-freeze=",2" id="mip">
<caption>Most Improved Player (George Mikan Trophy) Table</caption>
<colgroup><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/></colgroup>
<thead>
<tr class="over_header">
<th aria-label="" class="over_header center" colspan="4" data-stat=""></th>
<th aria-label="" class="over_header center" colspan="4" data-stat="header_award_voting">Voting</th><th></th>
<th aria-label="" class="over_header center" colspan="6" data-stat="header_per_g">Per Game</th>
<th aria-label="" class="over_header center" colspan="3" data-stat="header_shooting">Shooting</th>
<th aria-label="" class="over_header center" colspan="2" data-stat="header_advanced">Advanced</th>
</tr>
<tr>
<th aria-label="rank" class="poptip center" data-stat="rank" scope="col">Rank</th>
<th aria-label="Player" class="poptip sort_default_asc left" data-stat="player" scope="col">Player</th>
<th aria