In [None]:
# Imports
import pandas as pd
import numpy as np
import requests
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
from bs4 import BeautifulSoup

warnings.filterwarnings('ignore')

## Web Scrape NBA Standings from CBS Sports

In [None]:
# URL
url = 'https://www.cbssports.com/nba/standings/'

# Send Requests
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 OPR/116.0.0.0'}

# Response
response = requests.get(url, headers=headers)

# Create a Bs4 object to parse the html content
soup = BeautifulSoup(response.content, 'html.parser')

## Eatern Conference

In [None]:
# Find the tbody containing Eastern Conference Teams, you can use select('tbody')[0] which means, first tbody found
eastern_conferece = soup.select('tbody')[0]

In [None]:
# Find all the team names, you can use .get_text().strip() to get the text and strip extra spaces
team = []

for row in eastern_conferece.select('tr'):
    columns = row.find_all('td')
    team.append(columns[1].get_text().strip())

In [None]:
# Data for Eastern Conference Teams
data = []

for row in eastern_conferece.select('tr'):
    columns = row.find_all('td')
    data.append([col.get_text().strip() for col in columns[2:18]])

In [None]:
#Construct Data Frame of eastern Conference Teams
team_df = pd.DataFrame(team, columns = ['Team'])
team_df

Unnamed: 0,Team
0,Cleveland - x
1,Boston
2,New York
3,Milwaukee
4,Indiana
5,Detroit
6,Atlanta
7,Orlando
8,Miami
9,Chicago


In [None]:
# Construct Data Frame for Eastern Conference Teams
team_data = pd.DataFrame(data, columns = ['W', 'L', 'PCT', 'GB', 'PPG','OPPG','DIFF','HOME', 'ROAD', 'DIV','CONF','STRK', 'L10', 'W','DIV','POST'])
team_data

Unnamed: 0,W,L,PCT,GB,PPG,OPPG,DIFF,HOME,ROAD,DIV,CONF,STRK,L10,W.1,DIV.1,POST
0,54,10,0.844,—,122.9,111.5,11.4,29-4,25-6,11-1,37-7,W14,10-0,62.9,100.0%,100.0%
1,47,18,0.723,7.5,116.9,108.2,8.7,23-11,24-7,11-2,32-11,W5,8-2,59.7,98.5%,100.0%
2,41,23,0.641,13.0,117.1,112.7,4.4,21-11,20-12,10-3,28-13,W1,5-5,52.2,1.5%,100.0%
3,36,27,0.571,17.5,114.6,112.5,2.1,21-11,14-16,6-6,26-18,L2,7-3,43.9,0.0%,88.5%
4,35,28,0.556,18.5,116.4,115.4,1.0,19-9,15-18,7-5,20-20,L3,5-5,46.0,0.0%,98.0%
5,36,29,0.554,18.5,114.7,113.1,1.6,17-14,19-15,4-9,24-20,W1,7-3,42.8,0.0%,88.6%
6,31,34,0.477,23.5,117.3,119.7,-2.4,16-16,15-17,7-4,23-18,W3,5-5,39.0,45.7%,73.4%
7,30,36,0.455,25.0,104.3,106.1,-1.8,18-16,12-20,8-3,24-20,L1,3-7,38.2,24.5%,60.9%
8,29,35,0.453,25.0,110.0,110.5,-0.5,15-15,13-20,7-5,18-22,L4,3-7,38.5,29.8%,60.6%
9,27,38,0.415,27.5,116.6,120.3,-3.7,12-22,15-16,4-11,22-23,W3,5-5,31.7,0.0%,3.1%


In [None]:
# Combine both Data Frame (Eastern Conference)
df_final = pd.concat([team_df, team_data], axis = 1)
df_final

Unnamed: 0,Team,W,L,PCT,GB,PPG,OPPG,DIFF,HOME,ROAD,DIV,CONF,STRK,L10,W.1,DIV.1,POST
0,Cleveland - x,54,10,0.844,—,122.9,111.5,11.4,29-4,25-6,11-1,37-7,W14,10-0,62.9,100.0%,100.0%
1,Boston,47,18,0.723,7.5,116.9,108.2,8.7,23-11,24-7,11-2,32-11,W5,8-2,59.7,98.5%,100.0%
2,New York,41,23,0.641,13.0,117.1,112.7,4.4,21-11,20-12,10-3,28-13,W1,5-5,52.2,1.5%,100.0%
3,Milwaukee,36,27,0.571,17.5,114.6,112.5,2.1,21-11,14-16,6-6,26-18,L2,7-3,43.9,0.0%,88.5%
4,Indiana,35,28,0.556,18.5,116.4,115.4,1.0,19-9,15-18,7-5,20-20,L3,5-5,46.0,0.0%,98.0%
5,Detroit,36,29,0.554,18.5,114.7,113.1,1.6,17-14,19-15,4-9,24-20,W1,7-3,42.8,0.0%,88.6%
6,Atlanta,31,34,0.477,23.5,117.3,119.7,-2.4,16-16,15-17,7-4,23-18,W3,5-5,39.0,45.7%,73.4%
7,Orlando,30,36,0.455,25.0,104.3,106.1,-1.8,18-16,12-20,8-3,24-20,L1,3-7,38.2,24.5%,60.9%
8,Miami,29,35,0.453,25.0,110.0,110.5,-0.5,15-15,13-20,7-5,18-22,L4,3-7,38.5,29.8%,60.6%
9,Chicago,27,38,0.415,27.5,116.6,120.3,-3.7,12-22,15-16,4-11,22-23,W3,5-5,31.7,0.0%,3.1%


## Western Conference

In [None]:
#Find the table containing teams (Western Conference), you can use select('tbody')[1] which means, first tbody found
western_conferece = soup.select('tbody')[1]

In [None]:
# Team Names (Western Conference)
team = []

for row in western_conferece.select('tr'):
    columns = row.find_all('td')
    team.append(columns[1].get_text().strip())

In [None]:
# Data for Western Conference
data = []

for row in western_conferece.select('tr'):
    columns = row.find_all('td')
    data.append([col.get_text().strip() for col in columns[2:18]])

In [None]:
# Team Data Frame for Western Conference
team_df2 = pd.DataFrame(team, columns = ['Team'])
team_df2

Unnamed: 0,Team
0,Oklahoma City
1,Denver
2,L.A. Lakers
3,Memphis
4,Houston
5,Golden St.
6,Minnesota
7,L.A. Clippers
8,Sacramento
9,Dallas


In [None]:
# DataFrame for Western Conference
team_data2 = pd.DataFrame(data, columns = ['W', 'L', 'PCT', 'GB', 'PPG','OPPG','DIFF','HOME', 'ROAD', 'DIV','CONF','STRK', 'L10', 'W','DIV','POST'])
team_data2

Unnamed: 0,W,L,PCT,GB,PPG,OPPG,DIFF,HOME,ROAD,DIV,CONF,STRK,L10,W.1,DIV.1,POST
0,53,12,0.815,—,119.7,107.1,12.6,28-5,24-7,11-4,32-11,L1,8-2,66.1,100.0%,100.0%
1,42,23,0.646,11.0,121.5,116.6,4.9,22-9,20-14,7-5,25-14,W1,6-4,52.8,0.0%,100.0%
2,40,23,0.635,12.0,112.9,111.1,1.8,25-7,15-16,11-3,27-12,L2,8-2,46.1,38.6%,78.4%
3,41,24,0.631,12.0,122.6,116.8,5.8,23-10,18-14,10-5,24-16,W3,5-5,52.0,78.1%,99.8%
4,40,25,0.615,13.0,112.9,108.9,4.0,22-10,18-14,12-3,24-16,W3,6-4,48.7,21.5%,94.4%
5,37,28,0.569,16.0,113.6,111.3,2.3,19-13,18-15,2-10,21-19,W5,9-1,44.6,11.8%,65.0%
6,37,29,0.561,16.5,112.8,109.2,3.6,18-14,19-15,7-5,26-18,W5,6-4,45.8,0.0%,78.9%
7,35,29,0.547,17.5,110.8,108.7,2.1,22-10,13-19,7-7,21-21,W3,4-6,46.8,47.9%,85.7%
8,33,31,0.516,19.5,116.6,115.2,1.4,16-15,17-16,4-8,23-21,L2,5-5,41.7,1.7%,31.5%
9,33,33,0.5,20.5,114.8,114.5,0.3,19-15,14-18,8-5,23-23,W1,3-7,43.8,0.0%,61.3%


In [None]:
# Combine both Data Frame (Western Conference)
df_final2 = pd.concat([team_df2, team_data2], axis = 1)
df_final2

Unnamed: 0,Team,W,L,PCT,GB,PPG,OPPG,DIFF,HOME,ROAD,DIV,CONF,STRK,L10,W.1,DIV.1,POST
0,Oklahoma City,53,12,0.815,—,119.7,107.1,12.6,28-5,24-7,11-4,32-11,L1,8-2,66.1,100.0%,100.0%
1,Denver,42,23,0.646,11.0,121.5,116.6,4.9,22-9,20-14,7-5,25-14,W1,6-4,52.8,0.0%,100.0%
2,L.A. Lakers,40,23,0.635,12.0,112.9,111.1,1.8,25-7,15-16,11-3,27-12,L2,8-2,46.1,38.6%,78.4%
3,Memphis,41,24,0.631,12.0,122.6,116.8,5.8,23-10,18-14,10-5,24-16,W3,5-5,52.0,78.1%,99.8%
4,Houston,40,25,0.615,13.0,112.9,108.9,4.0,22-10,18-14,12-3,24-16,W3,6-4,48.7,21.5%,94.4%
5,Golden St.,37,28,0.569,16.0,113.6,111.3,2.3,19-13,18-15,2-10,21-19,W5,9-1,44.6,11.8%,65.0%
6,Minnesota,37,29,0.561,16.5,112.8,109.2,3.6,18-14,19-15,7-5,26-18,W5,6-4,45.8,0.0%,78.9%
7,L.A. Clippers,35,29,0.547,17.5,110.8,108.7,2.1,22-10,13-19,7-7,21-21,W3,4-6,46.8,47.9%,85.7%
8,Sacramento,33,31,0.516,19.5,116.6,115.2,1.4,16-15,17-16,4-8,23-21,L2,5-5,41.7,1.7%,31.5%
9,Dallas,33,33,0.5,20.5,114.8,114.5,0.3,19-15,14-18,8-5,23-23,W1,3-7,43.8,0.0%,61.3%


## Combine Eastern Conference and Western Conference Data

In [None]:
# Combine Eastern and Western Conference data via pd.concat
df_combined = pd.concat([df_final, df_final2], axis = 0)
df_combined

Unnamed: 0,Team,W,L,PCT,GB,PPG,OPPG,DIFF,HOME,ROAD,DIV,CONF,STRK,L10,W.1,DIV.1,POST
0,Cleveland - x,54,10,0.844,—,122.9,111.5,11.4,29-4,25-6,11-1,37-7,W14,10-0,62.9,100.0%,100.0%
1,Boston,47,18,0.723,7.5,116.9,108.2,8.7,23-11,24-7,11-2,32-11,W5,8-2,59.7,98.5%,100.0%
2,New York,41,23,0.641,13.0,117.1,112.7,4.4,21-11,20-12,10-3,28-13,W1,5-5,52.2,1.5%,100.0%
3,Milwaukee,36,27,0.571,17.5,114.6,112.5,2.1,21-11,14-16,6-6,26-18,L2,7-3,43.9,0.0%,88.5%
4,Indiana,35,28,0.556,18.5,116.4,115.4,1.0,19-9,15-18,7-5,20-20,L3,5-5,46.0,0.0%,98.0%
5,Detroit,36,29,0.554,18.5,114.7,113.1,1.6,17-14,19-15,4-9,24-20,W1,7-3,42.8,0.0%,88.6%
6,Atlanta,31,34,0.477,23.5,117.3,119.7,-2.4,16-16,15-17,7-4,23-18,W3,5-5,39.0,45.7%,73.4%
7,Orlando,30,36,0.455,25.0,104.3,106.1,-1.8,18-16,12-20,8-3,24-20,L1,3-7,38.2,24.5%,60.9%
8,Miami,29,35,0.453,25.0,110.0,110.5,-0.5,15-15,13-20,7-5,18-22,L4,3-7,38.5,29.8%,60.6%
9,Chicago,27,38,0.415,27.5,116.6,120.3,-3.7,12-22,15-16,4-11,22-23,W3,5-5,31.7,0.0%,3.1%


In [None]:
# Sort by PCT in descending order
df_combined.sort_values(by = 'PCT', ascending = False)

Unnamed: 0,Team,W,L,PCT,GB,PPG,OPPG,DIFF,HOME,ROAD,DIV,CONF,STRK,L10,W.1,DIV.1,POST
0,Cleveland - x,54,10,0.844,—,122.9,111.5,11.4,29-4,25-6,11-1,37-7,W14,10-0,62.9,100.0%,100.0%
0,Oklahoma City,53,12,0.815,—,119.7,107.1,12.6,28-5,24-7,11-4,32-11,L1,8-2,66.1,100.0%,100.0%
1,Boston,47,18,0.723,7.5,116.9,108.2,8.7,23-11,24-7,11-2,32-11,W5,8-2,59.7,98.5%,100.0%
1,Denver,42,23,0.646,11.0,121.5,116.6,4.9,22-9,20-14,7-5,25-14,W1,6-4,52.8,0.0%,100.0%
2,New York,41,23,0.641,13.0,117.1,112.7,4.4,21-11,20-12,10-3,28-13,W1,5-5,52.2,1.5%,100.0%
2,L.A. Lakers,40,23,0.635,12.0,112.9,111.1,1.8,25-7,15-16,11-3,27-12,L2,8-2,46.1,38.6%,78.4%
3,Memphis,41,24,0.631,12.0,122.6,116.8,5.8,23-10,18-14,10-5,24-16,W3,5-5,52.0,78.1%,99.8%
4,Houston,40,25,0.615,13.0,112.9,108.9,4.0,22-10,18-14,12-3,24-16,W3,6-4,48.7,21.5%,94.4%
3,Milwaukee,36,27,0.571,17.5,114.6,112.5,2.1,21-11,14-16,6-6,26-18,L2,7-3,43.9,0.0%,88.5%
5,Golden St.,37,28,0.569,16.0,113.6,111.3,2.3,19-13,18-15,2-10,21-19,W5,9-1,44.6,11.8%,65.0%


In [None]:
# Reset the Index
df_combined.reset_index(drop = True, inplace = True)
df_combined

Unnamed: 0,Team,W,L,PCT,GB,PPG,OPPG,DIFF,HOME,ROAD,DIV,CONF,STRK,L10,W.1,DIV.1,POST
0,Cleveland - x,54,10,0.844,—,122.9,111.5,11.4,29-4,25-6,11-1,37-7,W14,10-0,62.9,100.0%,100.0%
1,Boston,47,18,0.723,7.5,116.9,108.2,8.7,23-11,24-7,11-2,32-11,W5,8-2,59.7,98.5%,100.0%
2,New York,41,23,0.641,13.0,117.1,112.7,4.4,21-11,20-12,10-3,28-13,W1,5-5,52.2,1.5%,100.0%
3,Milwaukee,36,27,0.571,17.5,114.6,112.5,2.1,21-11,14-16,6-6,26-18,L2,7-3,43.9,0.0%,88.5%
4,Indiana,35,28,0.556,18.5,116.4,115.4,1.0,19-9,15-18,7-5,20-20,L3,5-5,46.0,0.0%,98.0%
5,Detroit,36,29,0.554,18.5,114.7,113.1,1.6,17-14,19-15,4-9,24-20,W1,7-3,42.8,0.0%,88.6%
6,Atlanta,31,34,0.477,23.5,117.3,119.7,-2.4,16-16,15-17,7-4,23-18,W3,5-5,39.0,45.7%,73.4%
7,Orlando,30,36,0.455,25.0,104.3,106.1,-1.8,18-16,12-20,8-3,24-20,L1,3-7,38.2,24.5%,60.9%
8,Miami,29,35,0.453,25.0,110.0,110.5,-0.5,15-15,13-20,7-5,18-22,L4,3-7,38.5,29.8%,60.6%
9,Chicago,27,38,0.415,27.5,116.6,120.3,-3.7,12-22,15-16,4-11,22-23,W3,5-5,31.7,0.0%,3.1%
