In [1]:
import numpy as np
import pandas as pd
import collections

In [85]:
# Words that are answers
word_answers = list(np.loadtxt("wordle-answers-alphabetical.txt", comments="#", delimiter="\n", unpack=False, dtype=str))
# Words that are accepted, not including answers
word_allowed = list(np.loadtxt("wordle-allowed-guesses.txt", comments="#", delimiter="\n", unpack=False, dtype=str))

num_answers = len(word_answers)
num_allowed = len(word_allowed)

# Separate into letters
# E.g. letter[0] is the list of all first letters of the words
letter_answers = list(zip(*[list(_) for _ in word_answers]))
letter_allowed = list(zip(*[list(_) for _ in word_allowed]))

# Flattened list of letters (all letters regardless of position)
all_letter_answers = [item for sublist in letter_answers for item in sublist]
all_letter_allowed = [item for sublist in letter_allowed for item in sublist]



53285

## Wordle game as a python function

In [94]:
def wordle(target_word_idx: int, guess: str):
    if not (len(guess) == 5):
        raise AssertionError("Guess needs to have 5 letters")
    if not (guess in (word_answers + word_allowed)):
        raise AssertionError("Guess is not in the word list")

    target_word = word_answers[target_word_idx]
    target_letters = list(target_word)

    guess_letters = list(guess)

    correct_location = [target_letters[j] == guess_letters[j] for j in range(5)]
    correct_letters = set(target_letters).intersection(set(guess_letters))

    incorrect_letters = set(guess_letters) - correct_letters

    return (correct_letters, incorrect_letters, correct_location, target_word)

    
    
    
    

wordle(0, "orate")
    


({'a'}, {'e', 'o', 'r', 't'}, [False, False, True, False, False], 'aback')

### Summary statistics

In [None]:
# Most common letters in a certain position
letter_idx = 0
letters_in_pos = list(letter_answers[letter_idx]) + list(letter_allowed[letter_idx])
c = collections.Counter(letters_in_pos)

dict(c.most_common())

In [None]:
# Most common letters in general
letters = list(all_letter_answers) + list(all_letter_allowed)
c = collections.Counter(letters)

dict(c.most_common())

## Strategy 1
- Use all answers and allowed words
- Guess words regardless of positional knowledge

In [88]:
all_letters = list(all_letter_answers) + list(all_letter_allowed)
all_words = word_answers + word_allowed


count_letters = collections.Counter(all_letters)
# Create a scoring system for letters based on their popularity
letter_score = dict(count_letters.most_common())
# Divide through by the sum of all scores, so they all add to 1
total = sum(letter_score.values())
letter_score = {k: v / total for k, v in letter_score.items()}

# For each word, get the list of unique letters in it
word_letter_set = [list(set(list(_))) for _ in all_words]

def bestWord(banned_letters: list = []) -> str:
    # Make a copy of letter scoring without banned letters
    ls = dict(letter_score)
    for b in banned_letters:
        ls[b] = 0
    # Calculate the letter score for each word

    word_score = []
    for w in word_letter_set:
        score = sum([ls[_] for _ in w])
        word_score.append(score)

    best_idx = np.argmax(word_score)

    best_word = all_words[best_idx]

    return best_word


bestWord(["a", "b", "c"])


'osier'

In [67]:
2**4

16