In [1]:
import numpy as np
import pandas as pd
import collections
import time

In [2]:
# Words that are answers
word_answers = list(np.loadtxt("wordle-answers-alphabetical.txt", comments="#", delimiter="\n", unpack=False, dtype=str))
# Words that are accepted, not including answers
word_allowed = list(np.loadtxt("wordle-allowed-guesses.txt", comments="#", delimiter="\n", unpack=False, dtype=str))

num_answers = len(word_answers)
num_allowed = len(word_allowed)

# Separate into letters
# E.g. letter[0] is the list of all first letters of the words
letter_answers = list(zip(*[list(_) for _ in word_answers]))
letter_allowed = list(zip(*[list(_) for _ in word_allowed]))

# Flattened list of letters (all letters regardless of position)
all_letter_answers = [item for sublist in letter_answers for item in sublist]
all_letter_allowed = [item for sublist in letter_allowed for item in sublist]

all_letters = list(all_letter_answers) + list(all_letter_allowed)
all_words = word_answers + word_allowed

## Wordle game as a python function

In [3]:
def wordle(target_word_idx: int, guess: str) -> dict:
    # For a chosen target word (identified by index), make a guess
    # This function will output in a dictionary:
    #   - "correct_guess" [boolean]: Whether the guess was correct or not,
    #   - "correct_letters" [set]: The letters in the guess that were correct
    #   - "incorrect_letters" [set]: The letters in the guess that were incorrect
    #   - "correct_locations" [list]: The locations of letters in the guess that were correct and in the right place

    if not (len(guess) == 5):
        raise AssertionError("Guess needs to have 5 letters")
    if not (guess in (word_answers + word_allowed)):
        raise AssertionError("Guess is not in the word list")

    target_word = word_answers[target_word_idx]
    target_letters = list(target_word)

    guess_letters = list(guess)

    correct_locations = [target_letters[j] ==
                         guess_letters[j] for j in range(5)]
    correct_letters = set(target_letters).intersection(set(guess_letters))

    incorrect_letters = set(guess_letters) - correct_letters

    result = {"correct_guess": np.prod(correct_locations) == 1,
              "correct_letters": correct_letters,
              "incorrect_letters": incorrect_letters,
              "correct_locations": correct_locations}

    return result


wordle(0, "abate")


{'correct_guess': False,
 'correct_letters': {'a', 'b'},
 'incorrect_letters': {'e', 't'},
 'correct_locations': [True, True, True, False, False]}

### Summary statistics

In [4]:
# Most common letters in a certain position
letter_idx = 0
letters_in_pos = list(letter_answers[letter_idx]) + list(letter_allowed[letter_idx])
c = collections.Counter(letters_in_pos)

dict(c.most_common())

{'s': 1565,
 'c': 922,
 'b': 909,
 'p': 859,
 't': 815,
 'a': 737,
 'm': 693,
 'd': 685,
 'g': 638,
 'r': 628,
 'f': 598,
 'l': 577,
 'h': 489,
 'w': 413,
 'k': 376,
 'n': 325,
 'e': 303,
 'o': 262,
 'v': 242,
 'j': 202,
 'u': 189,
 'y': 181,
 'i': 165,
 'z': 105,
 'q': 78,
 'x': 16}

In [5]:
# Most common letters in general
letters = list(all_letter_answers) + list(all_letter_allowed)
c = collections.Counter(letters)

dict(c.most_common())

{'s': 6665,
 'e': 6662,
 'a': 5990,
 'o': 4438,
 'r': 4158,
 'i': 3759,
 'l': 3371,
 't': 3295,
 'n': 2952,
 'u': 2511,
 'd': 2453,
 'y': 2074,
 'c': 2028,
 'p': 2019,
 'm': 1976,
 'h': 1760,
 'g': 1644,
 'b': 1627,
 'k': 1505,
 'f': 1115,
 'w': 1039,
 'v': 694,
 'z': 434,
 'j': 291,
 'x': 288,
 'q': 112}

## Strategy 1
- Use all answers and allowed words
- Guess words based on incorrect letters only
    

In [10]:

count_letters = collections.Counter(all_letters)
# Create a scoring system for letters based on their popularity
letter_score = dict(count_letters.most_common())
# Divide through by the sum of all scores, so they all add to 1
total = sum(letter_score.values())
letter_score = {k: v / total for k, v in letter_score.items()}

# For each word, get the list of unique letters in it
word_letter_set = [list(set(list(_))) for _ in all_words]

def bestWord1(banned_letters: list = [], banned_words: list = []) -> str:
    # Make a copy of letter scoring without banned letters
    ls = dict(letter_score)
    for b in banned_letters:
        ls[b] = 0
    
    # Calculate the letter score for each word
    word_score = []
    for w in word_letter_set:
        score = sum([ls[_] for _ in w])
        word_score.append(score)

    # Get the word indices of the banned words
    banned_word_idx = [all_words.index(_) for _ in banned_words]
    # Set banned word scores to zero
    for j in banned_word_idx:
        word_score[j] = 0
    

    best_idx = np.argmax(word_score)

    best_word = all_words[best_idx]

    return best_word


# bestWord1(["a", "b", "c"], ["osier"])


Play the game using strategy 1

In [11]:
word_idx = 1
print(f"Target word: {word_answers[word_idx]}\n\n")

banned_letters = set()
banned_words = set()
num_guesses = 0
guesses = []
while True:
    best_word = bestWord1(list(banned_letters), list(banned_words))
    guesses.append(best_word)
    num_guesses = num_guesses + 1

    result = wordle(word_idx, best_word)

    if result["correct_guess"]:
        break
    
    banned_letters = banned_letters.union(result["incorrect_letters"])
    banned_words = banned_words.union(best_word)
    print(best_word)
    print(banned_letters)
    time.sleep(1)

print(f"Number of guesses: {num_guesses}")
print(guesses)


Target word: abase


arose
{'r', 'o'}


ValueError: 'o' is not in list

In [None]:
# Function that takes the bestWord function as an input and plays every game of wordle (for all word_answers)
# def playWordle(bestWord, )