# Scraping the NBA schedule from the Basketball Reference website

# Setup

## Imports

In [1]:
import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import time

# Scraping
Use BeautifulSoup to scrape the NBA schedule data from
www.basketball-reference.com

In [2]:
from bs4 import BeautifulSoup
import requests

def scrape_month(month):
    time.sleep(2.5)
    month_url = 'https://www.basketball-reference.com/leagues/NBA_2023_games-' + month + '.html'
    month_req = requests.get(month_url)
    month_content = month_req.text
    month_soup = BeautifulSoup(month_content, 'lxml')
    month_games = month_soup.find(name = 'table')
    
    games = []
    for row in month_games.find_all('tr')[1:]:
        game = {}
        game['Date'] = row.find('a').text.strip()
        game['Away team'] = row.find('td', {'data-stat' : 'visitor_team_name'}).text
        game['Home team'] = row.find('td', {'data-stat' : 'home_team_name'}).text
        games.append(game)
        
    return pd.DataFrame(games)

In [3]:
months = ['october', 'november', 'december', 'january', 'february', 'march', 'april']

games_df = pd.DataFrame(columns=['Date', 'Away team', 'Home team'])

for month in months:
    games_df = games_df.append(pd.DataFrame(scrape_month(month)))
    
games_df

Unnamed: 0,Date,Away team,Home team
0,"Tue, Oct 18, 2022",Philadelphia 76ers,Boston Celtics
1,"Tue, Oct 18, 2022",Los Angeles Lakers,Golden State Warriors
2,"Wed, Oct 19, 2022",Orlando Magic,Detroit Pistons
3,"Wed, Oct 19, 2022",Washington Wizards,Indiana Pacers
4,"Wed, Oct 19, 2022",Houston Rockets,Atlanta Hawks
...,...,...,...
65,"Sun, Apr 9, 2023",Utah Jazz,Los Angeles Lakers
66,"Sun, Apr 9, 2023",New Orleans Pelicans,Minnesota Timberwolves
67,"Sun, Apr 9, 2023",Memphis Grizzlies,Oklahoma City Thunder
68,"Sun, Apr 9, 2023",Los Angeles Clippers,Phoenix Suns


In [4]:
games_df['Date']= pd.to_datetime(games_df['Date'])

## Save to local excel file
I want to use this schedule for other projects

In [5]:
games_df.to_excel('../NBA_schedule.xlsx', index=False)