### Importing necessary packages and libraries

In [1]:
from selenium import webdriver 
from bs4 import BeautifulSoup
from bs4 import Comment
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import time
import requests



### Using Requests and BeautifulSoup to scrape NBA data for players

In [2]:
years = list(range(2000, 2024))


In [47]:
# Scrape MVP awards table from 2000-2023 and save as an HTML file
for year in years:
    base_url = f"https://www.basketball-reference.com/awards/awards_{year}.html"
    data = requests.get(base_url)

    with open(f'MVP_Data/{year}.html', 'w+', encoding="utf-8") as f:
        f.write(data.text)

### Extracting MVP candidates from 2000-2023

In [44]:
# Open each HTML file, parse out the table, create pandas dataframe
mvp_list = []
for year in years:
    with open(f"MVP_data/{year}.html", errors="ignore") as f:
        page = f.read()

        soup = BeautifulSoup(page, "html.parser")
        remove_header = soup.find("tr", class_='over_header').decompose()
        mvp_table = soup.find(id='mvp')
        mvp_df = pd.read_html(str(mvp_table))[0]
        mvp_df['Year'] = year
        mvp_list.append(mvp_df)

mvp_data = pd.concat(mvp_list)
mvp_data.reset_index(drop=True)
mvp_data.to_csv(f'MVP_data/mvp_awards.csv')

### Extracting ROY candidates from 2000-2023

In [26]:
roy_list = []
for year in years:
    with open(f"MVP_data/{year}.html", errors="ignore") as f:
        page = f.read()

        soup = BeautifulSoup(page, "html.parser")
        remove_header = soup.find("tr", class_='over_header').decompose()
        roy_table = soup.find(id='roy')
        roy_df = pd.read_html(str(roy_table),header=1)[0]
        roy_df['Year'] = year
        roy_list.append(roy_df)

roy_data = pd.concat(roy_list)
roy_data.reset_index(drop=True)
roy_data.to_csv(f'MVP_data/roy_awards.csv')

### Extracting DPOY candidates from 2000-2023

In [10]:
# Open each HTML file, parse out the table, create pandas dataframe
dpoy_list = []
for year in years:
    with open(f'MVP_data/{year}.html', errors="ignore") as f:
        page = f.read()

        soup = BeautifulSoup(page, "html.parser")
        soupTables = BeautifulSoup(''.join(soup.find_all(string=lambda text: isinstance(text, Comment) and '<table' in text)))
        soupTables.find("tr", class_="over_header").decompose()
        dpoy_table = soupTables.find('table', id="dpoy")
        dpoy_df = pd.read_html(str(dpoy_table))[0]
        dpoy_df['Year'] = year
        dpoy_list.append(dpoy_df)

dpoy_data = pd.concat(dpoy_list)
dpoy_data.reset_index(drop=True)
dpoy_data.to_csv('MVP_data/dpoy_awards.csv')

### Extracting SMOY candidates from 2000-2023

In [43]:
smoy_list = []
for year in years:
    with open(f'MVP_data/{year}.html', errors="ignore") as f:
        page = f.read()

        soup = BeautifulSoup(page, "html.parser")
        soupTables = BeautifulSoup(''.join(soup.find_all(string=lambda text: isinstance(text, Comment) and '<table' in text)))
        smoy_table = soupTables.find('table', id="smoy")
        smoy_df = pd.read_html(str(smoy_table), header=1)[0]
        smoy_df['Year'] = year
        smoy_list.append(smoy_df)

smoy_data = pd.concat(smoy_list)
smoy_data.reset_index(drop=True)
smoy_data.to_csv('MVP_data/smoy_awards.csv')

### Using Selenium to extract player PPG, Advanced, and Team Record stats