In [6]:
import pandas as pd
from collections import defaultdict
from pprint import pprint
from random import choices
from itertools import product
from tqdm import tqdm

In [7]:
words = pd.read_csv("wordle_words.csv", header=None).values
initial_wordset = words.flatten().tolist()
initial_wordset = ["cat", "bat", "hat", "sat", "tis", "sit"]

longest_paths = {word: defaultdict(list) for word in initial_wordset}


In [8]:
def find_viable_words(yellow, green, gray):
    # (green & gray & yellow) criterion
    viable = set()

    # basecase if we have no green letters found
    # all letters are possible
    if len(green) == 0: 
        viable = set(initial_wordset)
    
    # if we have found any green letters, then
    # the list of viable words must include those letters
    else:
        for word in initial_wordset:
            for letter, positions in green.items():
                for position in positions:
                    if word[position] == letter:
                        viable.add(word)
    
    # gray criterion
    viable2 = set()
    for word in viable:
        invalid_word = any(letter in word for letter in gray)
        if not invalid_word:
            viable2.add(word)

    
    # yello criterion
    results = []
    for word in viable2:
        valid = True
        
        for letter, invalid_positions in yellow.items():
            if not valid:
                break
                
            if letter not in word:
                valid = False
            
            for invalid_position in invalid_positions:
                if word[invalid_position] == letter:
                    valid = False
                    
        if valid:
            results += [word]
  
    return results
                
def evaluate_guess(guess, target, yellow, green, gray):
    # given a guess word and a target word
    # update all of the letters which have
    # been used
    
    for idx, letter in enumerate(guess):
        
        # 1) if the letter is in the word 
        # 2) and in the same position
        # 
        # -> green
        if (letter in target) and (target[idx] == guess[idx]):
            green[letter] += [idx]
        
        # 1) if the letter is in the word
        # 2) and is NOT in the same position
        #
        # -> yellow
        if (letter in target) and (target[idx] != guess[idx]):
                yellow[letter] += [idx]
        
        if (letter not in target):
            gray[letter] = [idx]

    return yellow, green, gray

In [15]:
def recursion(guess, target, curr_yellow, curr_green, curr_gray, path):
    if guess == target:
        return []

# THIS Optimization cannot be used b.c it presume that you have the same amount
# of green, yellow, and gray information
#     if (guess in longest_paths) and (target in longest_paths[guess]):
#         return longest_paths[guess][target]
    
    curr_yellow, curr_green, curr_gray = evaluate_guess(guess=guess, 
                                                        target=target,
                                                        yellow=curr_yellow,
                                                        green=curr_green,
                                                        gray=curr_gray)
    
    viable_words = find_viable_words(yellow=curr_yellow,
                                     green=curr_green,
                                     gray=curr_gray)
    
    
    longest_path = []
    for guess in viable_words:
        # if guess is tiger
        if guess == target:
            tmp = [*path, guess]
        
        else:
            # the longest path from guess to target
            tmp = recursion(guess=guess, 
                            target=target,
                            curr_yellow=curr_yellow,
                            curr_green=curr_green,
                            curr_gray=curr_gray,
                            path=[*path, guess])
            
        if len(tmp) > len(longest_path):
            longest_path = tmp
    
    return longest_path

In [16]:
for (initial_guess, target) in tqdm(list(product(initial_wordset, initial_wordset)), desc="finding longest path"):
    curr_yellow, curr_green, curr_gray = defaultdict(list), defaultdict(list), defaultdict(list)
    path = []
    result = recursion(guess=initial_guess, 
              target=target,
              curr_yellow=curr_yellow,
              curr_green=curr_green,
              curr_gray=curr_gray,
              path=[])
    
    
    longest_paths[initial_guess][target] = [initial_guess] + result

finding longest path: 100%|██████████| 36/36 [00:00<00:00, 24310.89it/s]
