# Wordle Source Parsing

Digging into the Wordle web page to find the latest lists of solutions and acceptable plays.

In [3]:
import pandas as pd

In [4]:
import requests
from bs4 import BeautifulSoup

# grab the entire html and store as a variable
wordle_url = 'https://www.nytimes.com/games-assets/v2/wordle.6b88b1c8c6541b07820ab0896f6bc19be0ae34b2.js'
html_text = requests.get(wordle_url).text
# soup = BeautifulSoup(html_text, 'html.parser')

In [5]:
# html_text

## List of Solutions

In [6]:
# find the starting point of the "J" list of words
soln_list_s_loc = html_text.find("J=")+2
soln_list_s_loc

# find the ending point of the "J" list of words
soln_list_e_loc = html_text.find("]",soln_list_s_loc)+1
soln_list_e_loc

# subset the overall file based on the start and end locations
soln_string = html_text[soln_list_s_loc:soln_list_e_loc]
# examine the first 1000 characters of this new string variable
soln_string[:1000]

'["cigar","rebut","sissy","humph","awake","blush","focal","evade","naval","serve","heath","dwarf","model","karma","stink","grade","quiet","bench","abate","feign","major","death","fresh","crust","stool","colon","abase","marry","react","batty","pride","floss","helix","croak","staff","paper","unfed","whelp","trawl","outdo","adobe","crazy","sower","repay","digit","crate","cluck","spike","mimic","pound","maxim","linen","unmet","flesh","booby","forth","first","stand","belly","ivory","seedy","print","yearn","drain","bribe","stout","panel","crass","flume","offal","agree","error","swirl","argue","bleed","delta","flick","totem","wooer","front","shrub","parry","biome","lapel","start","greet","goner","golem","lusty","loopy","round","audit","lying","gamma","labor","islet","civic","forge","corny","moult","basic","salad","agate","spicy","spray","essay","fjord","spend","kebab","guild","aback","motor","alone","hatch","hyper","thumb","dowry","ought","belch","dutch","pilot","tweed","comet","jaunt","enema

In [7]:
# examine the end of this same string variable
soln_string[-100:]

're","payer","sooth","unset","unlit","vomit","fanny","fetus","butch","stalk","flack","widow","augur"]'

In [8]:
# convert to a list
import ast 

soln_list = ast.literal_eval(soln_string)
print(type(soln_list))
print(len(soln_list))
# view a few of the later entries in this new list
#   we should see a prettier version of the output above this cell
soln_list[len(soln_list)-10:len(soln_list)]

<class 'list'>
2309


['unset',
 'unlit',
 'vomit',
 'fanny',
 'fetus',
 'butch',
 'stalk',
 'flack',
 'widow',
 'augur']

In [6]:
# save to a file

# sort list alphabetically
soln_list.sort()

# create DataFrame
hr_df = pd.DataFrame(soln_list)

# add column name
hr_df.columns = ['word']

# export to csv format
hr_df.to_csv('wordle_solutions.csv', index = False)

## List of Plays

In [9]:
# find the starting point of the "J" list of words
play_list_s_loc = html_text.find("Q=")+2
play_list_s_loc

# find the ending point of the "J" list of words
play_list_e_loc = html_text.find("]",play_list_s_loc)+1
play_list_e_loc

# subset the overall file based on the start and end locations
play_string = html_text[play_list_s_loc:play_list_e_loc]
play_string[:1000]

'["aahed","aalii","aargh","aarti","abaca","abaci","abacs","abaft","abaka","abamp","aband","abash","abask","abaya","abbas","abbed","abbes","abcee","abeam","abear","abele","abers","abets","abies","abler","ables","ablet","ablow","abmho","abohm","aboil","aboma","aboon","abord","abore","abram","abray","abrim","abrin","abris","absey","absit","abuna","abune","abuts","abuzz","abyes","abysm","acais","acari","accas","accoy","acerb","acers","aceta","achar","ached","aches","achoo","acids","acidy","acing","acini","ackee","acker","acmes","acmic","acned","acnes","acock","acold","acred","acres","acros","acted","actin","acton","acyls","adaws","adays","adbot","addax","added","adder","addio","addle","adeem","adhan","adieu","adios","adits","adman","admen","admix","adobo","adown","adoze","adrad","adred","adsum","aduki","adunc","adust","advew","adyta","adzed","adzes","aecia","aedes","aegis","aeons","aerie","aeros","aesir","afald","afara","afars","afear","aflaj","afore","afrit","afros","agama","agami","agars

In [10]:
# convert to list as we did with the other collection of words (above)

play_list = ast.literal_eval(play_string)
print(type(play_list))
print(len(play_list))
play_list[0:15]

<class 'list'>
10665


['aahed',
 'aalii',
 'aargh',
 'aarti',
 'abaca',
 'abaci',
 'abacs',
 'abaft',
 'abaka',
 'abamp',
 'aband',
 'abash',
 'abask',
 'abaya',
 'abbas']

In [9]:
# save to a file

# sort list alphabetically
play_list.sort()

# create DataFrame
hr_df = pd.DataFrame(play_list)

# add column name
hr_df.columns = ['word']

# export to csv format
hr_df.to_csv('wordle_plays.csv', index = False)

### References

https://www.geeksforgeeks.org/python-convert-a-string-representation-of-list-into-list/