In [1]:
import requests
import pandas as pd

from bs4 import BeautifulSoup
from datetime import datetime, timedelta
from dateutil import parser

In [2]:
# Scrape daily Wordle answers
url = "https://wordfinder.yourdictionary.com/wordle/answers/"

# Fetch site html
r = requests.get(url)

# Parse html
soup = BeautifulSoup(r.content, "html5lib")

In [3]:
# Find today's date
today = soup.find("div", {"class":"wordle-answer-section"})
today_string = today.text

# Parse date from string
today_date = parser.parse(today_string, fuzzy=True)

# Advance backwards by one day to remove today's date
end_date = today_date + timedelta(days=-1)

In [4]:
# Create datetime sequence from date of first Wordle until today
wordle_dates = pd.date_range(start="2021-06-19", end=end_date)
print(wordle_dates)

DatetimeIndex(['2021-06-19', '2021-06-20', '2021-06-21', '2021-06-22',
               '2021-06-23', '2021-06-24', '2021-06-25', '2021-06-26',
               '2021-06-27', '2021-06-28',
               ...
               '2022-11-25', '2022-11-26', '2022-11-27', '2022-11-28',
               '2022-11-29', '2022-11-30', '2022-12-01', '2022-12-02',
               '2022-12-03', '2022-12-04'],
              dtype='datetime64[ns]', length=534, freq='D')


In [5]:
# Create data frame from all the answers
df = pd.read_html(url, index_col=0)
df = pd.concat(df).reset_index()

df.head()

Unnamed: 0,Date,Wordle #,Answer
0,Dec. 04,533,ADORE
1,Dec. 03,532,TORSO
2,Dec. 02,531,CHAFE
3,Dec. 01,530,EJECT
4,Nov. 30,529,STUDY


In [6]:
# Replace Date column with pandas date_range
# Reverse values first so they're most recent to oldest
df["Date"] = wordle_dates.values[::-1]

df.head()

Unnamed: 0,Date,Wordle #,Answer
0,2022-12-04,533,ADORE
1,2022-12-03,532,TORSO
2,2022-12-02,531,CHAFE
3,2022-12-01,530,EJECT
4,2022-11-30,529,STUDY
