# Webscrape the 2024 MLB Regular Season Schedule

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

## Get schedule information

In [208]:
response = requests.get("https://www.baseball-reference.com/leagues/majors/2024-schedule.shtml")
if response.status_code == 200:
    soup = BeautifulSoup(response.text, "html5lib")
    days = soup.find("div", {"class": "section_content"}).find_all("div")
    games = []
    for day in days:

        date = day.find("h3")
        todays_games = day.find_all("p", {"class": "game"})

        for game in todays_games:

            current_game = {}
            current_game["date"] = date.text

            teams = game.find_all("a")
            time =  game.find("strong")
            away_team = teams[0].text
            home_team = teams[1].text

            current_game["away"] = away_team
            current_game["home"] = home_team
            current_game["time"] = time.text

            games.append(current_game)

In [210]:
# save schedule information to a dataframe
data = pd.DataFrame(games)
data

Unnamed: 0,date,away,home,time
0,"Wednesday, March 20, 2024",Los Angeles Dodgers,San Diego Padres,Los Angeles Dodgers\n (5)
1,"Thursday, March 21, 2024",San Diego Padres,Los Angeles Dodgers,San Diego Padres\n (15)
2,"Friday, March 22, 2024",Minnesota Twins,Tampa Bay Rays,1:05 pm
3,"Friday, March 22, 2024",Detroit Tigers,Philadelphia Phillies,1:05 pm
4,"Friday, March 22, 2024",New York Mets,New York Yankees,1:05 pm
...,...,...,...,...
2494,"Sunday, September 29, 2024",New York Mets,Milwaukee Brewers,3:10 pm
2495,"Sunday, September 29, 2024",Baltimore Orioles,Minnesota Twins,3:10 pm
2496,"Sunday, September 29, 2024",Oakland Athletics,Seattle Mariners,3:10 pm
2497,"Sunday, September 29, 2024",Kansas City Royals,Atlanta Braves,3:20 pm


In [211]:
data.drop(index=[0, 1], axis=0, inplace=True)  # remove spring training games
data = data.reset_index()
data = data[["date", "away", "home", "time"]]
data

Unnamed: 0,date,away,home,time
0,"Friday, March 22, 2024",Minnesota Twins,Tampa Bay Rays,1:05 pm
1,"Friday, March 22, 2024",Detroit Tigers,Philadelphia Phillies,1:05 pm
2,"Friday, March 22, 2024",New York Mets,New York Yankees,1:05 pm
3,"Friday, March 22, 2024",Atlanta Braves,Minnesota Twins,1:05 pm
4,"Friday, March 22, 2024",Boston Red Sox,Toronto Blue Jays,1:07 pm
...,...,...,...,...
2492,"Sunday, September 29, 2024",New York Mets,Milwaukee Brewers,3:10 pm
2493,"Sunday, September 29, 2024",Baltimore Orioles,Minnesota Twins,3:10 pm
2494,"Sunday, September 29, 2024",Oakland Athletics,Seattle Mariners,3:10 pm
2495,"Sunday, September 29, 2024",Kansas City Royals,Atlanta Braves,3:20 pm


## Get stadium information

In [199]:
response = requests.get("https://www.mlb.com/team")
if response.status_code == 200:
    addresses_home = BeautifulSoup(response.text, "html5lib")
    stadiums = addresses_home.find_all("div", {"class": "p-forge-list-item l-grid__content l-grid__content--card t-none u-app-hide"})
    team_stadiums = []
    for stadium in stadiums:
        team_name = stadium.find("h2", {"class": "p-heading__text p-heading__text--lined p-heading__text--center p-heading__text--h5 styles-sc-zrz8sa-0 bFNgUm"}).text
        raw_address = str(stadium.find("p")).split("<br/>Phone")[0]  # .get_text().split("Phone:")[0]
        stadium_name = raw_address.split("<br/>")[1]
        street = raw_address.split("<br/>")[1]
        city = raw_address.split("<br/>")[-1]
        address = street + ", " + city
        city = city.rsplit(" ", 1)[0]

        team_stadiums.append({"team": team_name, "stadium": stadium_name, "address": address, "city": city, "street": street})

In [200]:
stadium_data = pd.DataFrame(team_stadiums)
stadium_data

Unnamed: 0,team,stadium,address,city,street
0,Arizona Diamondbacks,401 East Jefferson Street,"401 East Jefferson Street, Phoenix, AZ 85004","Phoenix, AZ",401 East Jefferson Street
1,Atlanta Braves,755 Battery Avenue,"755 Battery Avenue, Atlanta, GA 30339","Atlanta, GA",755 Battery Avenue
2,Baltimore Orioles,333 West Camden Street,"333 West Camden Street, Baltimore, MD 21201","Baltimore, MD",333 West Camden Street
3,Boston Red Sox,4 Jersey Street,"4 Jersey Street, Boston, MA 02215","Boston, MA",4 Jersey Street
4,Chicago Cubs,1060 West Addison,"1060 West Addison, Chicago, IL 60613-4397","Chicago, IL",1060 West Addison
5,Chicago White Sox,333 West 35th Street,"333 West 35th Street, Chicago, IL 60616","Chicago, IL",333 West 35th Street
6,Cincinnati Reds,100 Main Street,"100 Main Street, Cincinnati, OH 45202-4109","Cincinnati, OH",100 Main Street
7,Cleveland Guardians,2401 Ontario Street,"2401 Ontario Street, Cleveland, OH 44115","Cleveland, OH",2401 Ontario Street
8,Colorado Rockies,2001 Blake Street,"2001 Blake Street, Denver, CO 80205-2000","Denver, CO",2001 Blake Street
9,Detroit Tigers,2100 Woodward Avenue,"2100 Woodward Avenue, Detroit, MI 48201","Detroit, MI",2100 Woodward Avenue


## Combine schedule and stadium information

In [203]:
merged = data.merge(stadium_data, how = "left", left_on = "home", right_on = "team").drop("team", axis=1)
merged

Unnamed: 0,date,away,home,time,stadium,address,city,street
0,"Friday, March 22, 2024",Minnesota Twins,Tampa Bay Rays,1:05 pm,One Tropicana Drive,"One Tropicana Drive, St. Petersburg, FL 33705","St. Petersburg, FL",One Tropicana Drive
1,"Friday, March 22, 2024",Detroit Tigers,Philadelphia Phillies,1:05 pm,One Citizens Bank Way,"One Citizens Bank Way, Philadelphia, PA 19148","Philadelphia, PA",One Citizens Bank Way
2,"Friday, March 22, 2024",New York Mets,New York Yankees,1:05 pm,One East 161st Street,"One East 161st Street, Bronx, NY 10451","Bronx, NY",One East 161st Street
3,"Friday, March 22, 2024",Atlanta Braves,Minnesota Twins,1:05 pm,1 Twins Way,"1 Twins Way, Minneapolis, MN 55403","Minneapolis, MN",1 Twins Way
4,"Friday, March 22, 2024",Boston Red Sox,Toronto Blue Jays,1:07 pm,"1 Blue Jays Way, Suite 3200","1 Blue Jays Way, Suite 3200, Toronto, Ontario,...","Toronto, Ontario, Canada","1 Blue Jays Way, Suite 3200"
...,...,...,...,...,...,...,...,...
2492,"Sunday, September 29, 2024",New York Mets,Milwaukee Brewers,3:10 pm,One Brewers Way,"One Brewers Way, Milwaukee, WI 53214","Milwaukee, WI",One Brewers Way
2493,"Sunday, September 29, 2024",Baltimore Orioles,Minnesota Twins,3:10 pm,1 Twins Way,"1 Twins Way, Minneapolis, MN 55403","Minneapolis, MN",1 Twins Way
2494,"Sunday, September 29, 2024",Oakland Athletics,Seattle Mariners,3:10 pm,P.O. Box 4100,"P.O. Box 4100, Seattle, WA 98104","Seattle, WA",P.O. Box 4100
2495,"Sunday, September 29, 2024",Kansas City Royals,Atlanta Braves,3:20 pm,755 Battery Avenue,"755 Battery Avenue, Atlanta, GA 30339","Atlanta, GA",755 Battery Avenue


In [207]:
# save the data to a CSV
merged.to_csv("MLB_Schedule_2024.csv", columns=list(merged.columns))