# Notebook with functions to solve a Wordle puzzle

In [14]:
import csv
from collections import defaultdict
from math import log2

## Get the set of possible guess and solution words

In [70]:
with open("solutions_raw.txt", newline='') as file:
    reader = csv.reader(file, delimiter="\t")
    soln_set = {row[1] for row in reader}
# Guesses also include possible solutions
with open("guesses_raw.txt", newline='') as file:
    reader = csv.reader(file, delimiter="\t")
    guess_set = {row[0].upper() for row in reader}

## Create the entropy tree

In [83]:
WORD_LEN = 5
def get_tile_overlap(guess: str, soln: str) -> str:
    """Return the tiles result of guessing 'guess' when the solution is 'soln'.
    All misses would be 'bbbbb', all correct would be 'ggggg', all correct but wrong order would be 'yyyyy''"""
    # Start with all misses
    tiles = ["b"]*WORD_LEN
    # Set all the correct letters green
    for i in range(len(guess)):
        if guess[i] == soln[i]:
            tiles[i] ="g"
    # Get the remaining letters for both the guess and the solution
    guess_rest = [(i, letter) for i, letter in enumerate(guess) if tiles[i]!="g"]
    soln_rest = [letter for i, letter in enumerate(soln) if tiles[i]!="g"]
    # All the remaining letters are wrong, can just mark the first ones yellow
    for i, letter in guess_rest:
        if letter in soln_rest:
            tiles[i] = "y"
            soln_rest.remove(letter)
    return "".join(tiles)

def split_tree(guess: str, soln_subset: set[str]) -> defaultdict[str, set[str]]:
    """Create a dictionary with keys being possible tile results from guessing 'guess' (bbbbb, ybbbg, etc), 
    and values beings sets of words that could still be solutions"""
    tree: defaultdict[str, set[str]] = defaultdict(set)
    for word in soln_subset:
        tree[get_tile_overlap(guess, word)].add(word)
    return tree

def calc_entropy(proportions: list[float]) -> float:
    """Calculates the entropy of the proportions given"""
    return -1*sum([p*log2(p) for p in proportions])

def split_entropy(guess: str, soln_subset: set[str]):
    """Calculate the entropy for 'guess' and the 'soln_subset', 
    to measure how much information we gain about the solution from this guess"""
    tree = split_tree(guess, soln_subset)
    denom = len(soln_subset)
    proportions = [len(subset)/denom for subset in tree.values()]
    return calc_entropy(proportions)

def get_best_guess(soln_subset: set[str]) -> str:
    """Find the guess that creates the highest entropy split in the given solution subset"""
    # Note that we check every possible guess, including words that might not be the answer
    if len(soln_subset) == 1:
        return soln_subset.pop()
    entropies = [(split_entropy(guess, soln_subset), guess) for _, guess in enumerate(guess_set)]
    best_guess = max(entropies)[1]
    return best_guess

## Get the best first guess

In [72]:
first_guess = get_best_guess(soln_set)
print(first_guess)

SOARE


## Get the best possible move given the previous guesses and results
This lets the user get the optimal move from any point, after any previous guesses

In [73]:
def get_cur_set(history: list[tuple[str, str]]) -> set[str]:
    """Get the current set of possible solutions by splitting using the given history"""
    current_set = soln_set
    for guess, result in history:
        current_set = split_tree(guess.upper(), current_set)[result]
    return current_set

In order to use, add a tuple to the end of the below list for each guess you have made in the format of ("GUESS", "xxxxx"), where the first item is the word you guessed and the second item is a string representation of the result of that guess, where b=black tile, y=yellow tile, and g=green tile. The best first guess is SOARE

In [92]:
get_best_guess(get_cur_set(
    [("SOARE", "bbbbb")]
    ))

'CLINT'