In [192]:
url = 'https://raw.githubusercontent.com/tabatkins/wordle-list/main/words'
import requests
import numpy as np
from numba import jit

# Download the file from `url`, a newline-separated list of words in Wordle, and save it locally under `file_name`
def download_words(url, file_name):
    with open(file_name, 'wb') as f:
        f.write(requests.get(url).content)

download_words(url, 'words.txt')

words = open('words.txt').read().split('\n')
assert all(len(word)==5 for word in words)

# Wordle gives players six chances to guess a randomly selected five-letter word. As shown above, if you have the right letter in the right spot, it shows up green. A correct letter in the wrong spot shows up yellow. A letter that isn't in the word in any spot shows up gray. 

# You can enter a total of six words, meaning you can enter five burner words from which you can learn hints about the letters and their placements. Then you get one chance to put those hints to use. Or you can try for performance and guess the word of the day in three, two or even one go.

def conditional_entropy(words, condition=None, verbose=False):
    """
    Calculate the entropy of a list of words conditioned on a dictionary that maps letters to the positions to which they have had a positive or negative match in the word.
    :param words: list of words
    :param condition: dictionary of the form {letter: {position: is_present}} where the bool indicates whether the letter is present in the word at the given position or if it is in the word at a different position, respectively.
    :return: float
    """
    def word_is_compatible(word, condition=None):
        """
        Check whether a word is compatible with the condition.
        :param word: string
        :param condition: dictionary of the form {letter: {position: is_present}} where the bool indicates whether the letter is present in the word at the given position or if it is in the word at a different position, respectively.
        :return: bool
        """
        for letter, positions in condition.items():
            for position, is_present in positions.items():
                if is_present and (letter not in word or word[position] != letter):
                    return False
                if not is_present and word[position] == letter:
                    return False
        return True

    condition = condition or {}
    compatible_words = [word for word in words if word_is_compatible(word, condition)]
    if verbose:
        if len(compatible_words) < 20:
            print(f"Compatible words: {compatible_words}")
        else:
            print(f"Number of compatible words: {len(compatible_words)} (too many to print)")
    # Suppose each word has an equal probability of being the word of the day.
    conditional_entropy = np.log2(len(compatible_words))
    return conditional_entropy

def information_gain(words, condition, condition_initial=None, verbose=False):
    """
    Calculate the information gain of a list of words conditioned on a dictionary that maps letters to the positions to which they have had a positive or negative match in the word.
    :param words: list of words
    :param condition: dictionary of the form {letter: {position: is_present}} where the bool indicates whether the letter is present in the word at the given position or if it is in the word at a different position, respectively.
    :return: float
    """
    return conditional_entropy(words, condition_initial, verbose=verbose) - conditional_entropy(words, condition, verbose=verbose)

# Calculate the base entropy of the list of words ahead of time to save compute
base_entropy = conditional_entropy(words)

def evaluate_guess_to_condition(ground_truth, guess):
    """
    Convert a guess to a condition.
    :param ground_truth: string
    :param guess: string
    :return: dictionary of the form {letter: {position: is_present}} where the bool indicates whether the letter is present in the word at the given position or if it is in the word at a different position, respectively.
    """
    assert len(ground_truth) == 5 and len(guess) == 5
    condition = {guess_letter: dict() for guess_letter in guess}
    for i in range(5):
        condition[guess[i]][i] = guess[i] == ground_truth[i]
    return condition

def simplify_condition(condition):
    """
    Simplify a condition by removing all position: is_present pairs for which is_present is True for another letter at the same position. Does not yet do process of elimination.
    :param condition: dictionary of the form {letter: {position: is_present}} where the bool indicates whether the letter is present in the word at the given position or if it is in the word at a different position, respectively.
    :return: a simplified version of the condition
    """
    determined_positions = {position for letter, positions in condition.items() for position, is_present in positions.items() if is_present}
    simplified_condition = {letter: {position: is_present for position, is_present in positions.items() if is_present or position not in determined_positions} for letter, positions in condition.items()}
    # remove all letters that have no positions left
    simplified_condition = {letter: positions for letter, positions in simplified_condition.items() if positions}
    return simplified_condition

def combine_conditions(*conditions):
    """
    Combine conditions into a single condition.
    :param conditions: list of conditions
    :return: dictionary of the form {letter: {position: is_present}} where the bool indicates whether the letter is present in the word at the given position or if it is in the word at a different position, respectively.
    """
    condition = {}
    for condition_i in conditions:
        for letter, positions in condition_i.items():
            for position, is_present in positions.items():
                if letter not in condition:
                    condition[letter] = {}
                if position not in condition[letter]:
                    condition[letter][position] = is_present
                else:
                    assert condition[letter][position] == is_present, f"Contradictory information about {letter} at position {position}."
    return simplify_condition(condition)

def evaluate_guesses_to_condition(ground_truth, guesses):
    """
    Convert a list of guesses to a condition.
    :param ground_truth: string
    :param guesses: list of strings
    :return: dictionary of the form {letter: {position: is_present}} where the bool indicates whether the letter is present in the word at the given position or if it is in the word at a different position, respectively.
    """
    return combine_conditions(*[evaluate_guess_to_condition(ground_truth, guess) for guess in guesses])

def expected_information_gain(words, guess, n_gt_samples=None, n_wordlist_samples=None):
    """
    Calculate the expected information gain of a guess.
    :param words: list of words
    :param guess: string
    :param n_samples: number of samples to use in the Monte Carlo simulation
    :return: float
    """
    average_information_gain = 0
    for _ in range(n_gt_samples):
        if n_wordlist_samples:
            wordlist_sample = np.random.choice(words, size=min(n_wordlist_samples, len(words)), replace=False)
        else:
            wordlist_sample = words
        ground_truth = np.random.choice(wordlist_sample)
        condition = evaluate_guess_to_condition(ground_truth, guess)
        this_information_gain = (base_entropy - conditional_entropy(wordlist_sample, condition) - np.log2(len(words)/len(wordlist_sample))) / n_gt_samples
        average_information_gain += this_information_gain
    return average_information_gain

if __name__ == "__main__":
    print(expected_information_gain(words, 'adeiu', n_gt_samples=1000, n_wordlist_samples=None))

In [205]:
base_entropy

13.660330223675741

In [203]:
expected_information_gain(words, 'adeiu', n_gt_samples=1000, n_wordlist_samples=1000)

1.1407539295777844

In [172]:
expected_information_gain(words, 'aaaaa', n_gt_samples=1000, n_wordlist_samples=None)

2.127028995649937

In [None]:
# 1. Information gain if we know the all letters in the word "awake".
gain = information_gain(words, {'a': {0: True, 2: True}, 'w': {1: True}, 'k': {3: True}, 'e': {4: True}}, verbose=True)
print(f"1. Information gain: {gain}")
# 2. Information gain if we know the all letters in the word "awake" except the "k".
gain = information_gain(words, {'a': {0: True, 2: True}, 'w': {1: True}, 'e': {4: True}}, verbose=True)
print(f"2. Information gain: {gain}")
# 3. Information gain if we know the all letters in the word "awake" except the "k", but we know the fourth letter (at index 3) is not a "r".
gain = information_gain(words, {'a': {0: True, 2: True}, 'w': {1: True}, 'e': {4: True}, 'r': {3: False}}, verbose=True)
print(f"3. Information gain: {gain}")