# NBA Data Web-Scrapping using Beautiful Soup

In [2]:
import pandas as pd
import requests
import os
from bs4 import BeautifulSoup

## MVP

In [99]:
url = "https://www.basketball-reference.com/awards/awards_{}.html"

In [101]:
os.makedirs("mvp") # Making a folder

In [102]:
years =list(range(2012,2016)) 

In [103]:
df = []

for year in years:
    link = url.format(year)
    data = requests.get(link) #reading the data
    json = data.text # getting into json format
    
    with open("mvp/{}.html".format(year), "w", encoding = "utf-8") as f: # writing files on the PC
        f.write(json)
    
    with open("mvp/{}.html".format(year), "r", encoding = "utf-8") as f: # reading the files from the PC
        page= f.read()
        html = BeautifulSoup(page, "html.parser") # getting html
        html.find("tr", class_ ='over_header').decompose() # Dropping multilevel index columns
        table = html.find(id = "mvp") # finding mvps' table
        table = pd.read_html(str(table)) # reading the html file
        mvp = table[0] # indexing the required table
        mvp["Year"] = year
        df.append(mvp)

mvps = pd.concat(df)
mvps.reset_index(drop= True, inplace = True)
mvps.to_csv("mvp/mvps.csv", index = False)
        


## Players

In [104]:
url = "https://www.basketball-reference.com/leagues/NBA_{}_per_game.html"

In [106]:
os.makedirs("players")

In [107]:
years =list(range(2012,2016))

In [108]:
df = []

for year in years:
    link = url.format(year)
    data = requests.get(link) #reading the data
    json = data.text # getting into json format
    
    with open("players/{}.html".format(year), "w", encoding = "utf-8") as f: # writing files on the PC
        f.write(json)
        
    with open("players/{}.html".format(year), "r", encoding = "utf-8") as f: # reading the files from the PC
        page= f.read()
    
        html = BeautifulSoup(page, "html.parser") # getting html
        table = html.find(id = "all_per_game_stats")
        table = pd.read_html(str(table)) # reading the html file
        player = table[0] # indexing the required table
        player["Year"] = year
        df.append(player)

players = pd.concat(df)
players.reset_index(drop= True, inplace = True)
players.to_csv("players/players.csv", index = False)

## Division Standings

In [128]:
url = "https://www.basketball-reference.com/leagues/NBA_{}_standings.html"

In [129]:
os.makedirs("divisions")

In [130]:
years =list(range(2012,2016))

In [131]:
df1 = pd.DataFrame()
df2 = pd.DataFrame()


for year in years:
    link = url.format(year)
    data = requests.get(link) #reading the data
    json = data.text # getting into json format
    
    with open("divisions/{}.html".format(year), "w", encoding = "utf-8") as f: # writing files on the PC
        f.write(json)
        
    with open("divisions/{}.html".format(year), "r", encoding = "utf-8") as f: # reading the files from the PC
        page= f.read()
    
        html = BeautifulSoup(page, "html.parser") # getting html 
        
        
        e_table = html.find(id = "all_divs_standings_E")
        w_table = html.find(id = "all_divs_standings_W")
        
        
        e_table = pd.read_html(str(e_table)) # reading the html files
        w_table = pd.read_html(str(w_table))
        
    
        est = e_table[0] # indexing the required table
        wst = w_table[0]
        
        est["Year"] = year # adding year
        wst["Year"] = year
        
        df1 = df1.append(est)
        df2 = df2.append(wst)

df1.rename(columns = {"Eastern Conference":"Team"}, inplace = True)
df2.rename(columns = {"Western Conference":"Team"}, inplace = True)

divisions = pd.concat([df1,df2])
divisions.reset_index(drop= True, inplace = True)
divisions.to_csv("divisions/divisions.csv", index = False)