In [None]:
import pandas as pd
import numpy as np
import os
from pathlib import Path

In [None]:
# Make my dictionary
num_letters = 5
words = pd.read_csv("dict.csv", header=None)
words = words.rename(columns={0:'word'})

for i in range(1,21):
    new_name = "letter"+str(i)
    words[new_name] = words['word'].str[i-1] # -1 because they are indexed from 0, so letter1 is words['word'].str[0]
words = words[words['letter1'].str.lower() == words['letter1']]

masks = dict()
words_subsets = dict()

for i in range(1,21):
    words_subset_name = "words"+str(i)
    words_subsets[words_subset_name] = words.loc[(words['word'].str.len() == i)]

words = words_subsets['words'+str(num_letters)]
words.head(3)

In [270]:
def is_word_possible(guess, outcome, target):
    '''
    Checks if target word is possible given the outcome of the guess
    Parameters
    ----------
    guess : str
        word you entered
    
    outcome : str
        str of strict type like BBYYG
        where 
        B is black (letter not in word)
        Y is yellow (right letter wrong spot)
        G is green (right letter right spot)
    
    target : str
        word to check if it is possible
    
    Returns
    --------
    TRUE is possible
    FALSE if impossible
    
    Examples
    --------
    >>> is_word_possible('whaup', 'GGGBB', 'whack')
    '''

    res = True
    guess_arr = list(guess)
    outcome_arr = list(outcome.lower())
    target_arr = list(target)
    
    yellows, blacks = set(), set()
    
    for i in range(len(guess)):
        if outcome_arr[i] == 'b':
            blacks.add(guess[i])
        elif outcome_arr[i] == 'y':
            yellows.add(guess[i])
    
    #print(blacks)
    #print(yellows)
    # Set outcome to false unless we find the yellow in the word
    if len(yellows) > 0:
        res = False
        
    for i in range(len(target)):
        #print(f"i is {i} and letter is {guess_arr[i]}. outcome letter is {outcome_arr[i]}")
        # if we whiff on green, exit loop and return False
        if outcome_arr[i] == 'g':
            if guess_arr[i] != target_arr[i]:
                #print("failed greens")
                res = False
                break

        
        # if we have a yellow letter, exit loop and return False
        elif target_arr[i] in blacks:
            #print(f"failed blacks at letter#{i+1} for letter {guess_arr[i]}")
            res = False
            break
        
        # if we have a yellow we need, set outcome back to True
        elif len(yellows)>0:
            #print("testing yellows")
            # only if it's not on the same spot, otherwise it would have been green
            if target_arr[i] in yellows and guess_arr[i] != target_arr[i]:
                res = True
        #print(f"success i is {i} and letter is {guess_arr[i]}")
    return res

is_word_possible('whata', 'GGGBB', 'whala')

False

In [248]:
def get_outcome_str(guess, target):
    '''
    Return outcome str given guess and target
    
    Parameters
    ----------
    guess : str
        word you entered
    
    target : str
        correct word

    Returns
    --------
    str of strict type like BBYYG
        where 
        B is black (letter not in word)
        Y is yellow (right letter wrong spot)
        G is green (right letter right spot)
    
    Examples
    --------
    >>> get_outcome_str('whaup', 'whack')
    '''

    guess_arr = list(guess)
    outcome_arr = []
    target_arr = list(target)
    yellows = list(target)
    
    #Problem:
    #guess: whata, target: whack, outcome: GGGBY, guesses left: 0

    
    for i in range(len(guess)):
        if guess_arr[i] == target_arr[i]:
            outcome_arr.append('G')
            yellows.remove(guess_arr[i])
        elif guess_arr[i] in yellows:
            outcome_arr.append('Y')
        else:
            outcome_arr.append('B')
            
    res = ''.join(outcome_arr)
    
    return res

get_outcome_str('whata', 'whaak')

'GGGBY'

In [113]:
guess_arr = list('guess')
guess_arr
'un' in guess_arr
hello = []
hello.append('T')

hello.append('M')
hello

['T', 'M']

In [250]:
def filter_dict(guess, outcome, words):
    
    '''
    Filters possible remaining words based on the one you provided
    Parameters
    ----------
    guess : str
        word you entered
    
    outcome : str
        str of strict type like BBYYG
        where 
        B is black (letter not in word)
        Y is yellow (right letter wrong spot)
        G is green (right letter right spot)
    
    words : dataframe
        all possible words
    
    Returns
    --------
    a dataframe with all possible words given the info provided by word
    
    Examples
    --------
    >>> filter_dict('whaup', 'BYBGG', dict)
    '''
    
    res = words.copy()

    res['possible'] = res.apply(lambda row: is_word_possible(guess, outcome, row['word']), axis = 1)
    res = res.loc[res['possible']==True]
    res = res[['word']]#.values
    
    return(res)

filter_dict('whale', 'GGGBB', words)

Unnamed: 0,word
231506,whack
231536,whamp
231540,whand
231541,whang
231547,whank
231556,wharf
231569,wharp
231571,whart
231576,whata
231585,whats


In [282]:
def wordle_solver(guess, outcome, words):
    '''
    Checks if target word is possible given the outcome of the guess
    Parameters
    ----------
    guess : str
        word you entered
    
    outcome : str
        str of strict type like BBYYG
        where 
        B is black (letter not in word)
        Y is yellow (right letter wrong spot)
        G is green (right letter right spot)
    
    words : dataframe
        dictionary of remaining possible words
    
    Returns
    --------
    String with embedded values to help you decide your next step
    eg
    "There are 78 possible words remaining
    Of them, the best guess for narrowing words down is TUBSY for average remaining number of 15
    Of all possible words, the best guess for narrowing words down is STARE for average remaining number of 3."
    
    Examples
    --------
    >>> wordle_solver('whaup', 'GGGBB', words)
    '''
    
    possible_words = filter_dict(guess, outcome, words)
    possible_words_arr = possible_words.values
    
    ave_yolo_key = optimal_guess(possible_words, possible_words)[0]
    ave_yolo_val = optimal_guess(possible_words, possible_words)[1]
    
    ave_all_key = optimal_guess(possible_words, possible_words)[0]
    ave_yolo_val = optimal_guess(possible_words, possible_words)[1]
    
    #print(averages)
    
    res = f"There are {str(possible_words.shape[0])} possible words remaining\n"
    res = res + f"Of them, the best guess for narrowing words down is {ave_yolo_key.upper()} for average remaining number of {str(ave_yolo_val)}.\n"
    res = res + f"Of all possible words, the best guess for narrowing words down is {ave_all_key.upper()} for average remaining number of {str(ave_all_val)}."
    
    return res

print(wordle_solver('whaup', 'GGGBB', words))



There are 15 possible words remaining
Of them, the best guess for narrowing words down is WHATA for average remaining number of 10.43.
Of all possible words, the best guess for narrowing words down is STARE for average remaining number of 0.


In [279]:
def optimal_guess(guesses, targets):
    averages = dict()
    #print(guesses)
    
    # loop all guesses
    guesses = pd.DataFrame(data={'word': ['whata']})

    for guess in guesses.values:
        
        ave_guess = 0
        rem_list = []
        
        # loop all possible final words
        for target in targets.values:
            if target[0] != guess[0]:
                # given this guess + target, calc outcome
                #print(guess[0])
                #print(target[0])
                outcome = get_outcome_str(guess[0], target[0])
                # how many words are still possible with this outcome?
                rem_poss_guesses = len(filter_dict(guess[0], outcome, targets))
                rem_list.append(rem_poss_guesses)
                #print(f"guess: {guess[0]}, target: {target[0]}, outcome: {outcome}, guesses left: {rem_poss_guesses}")
        
        # get the average
        ave_guess = np.mean(rem_list)
        averages[guess[0]] = round(ave_guess,2)
        
    remaining = possible_words.shape[0]
    
    min_key = min(averages, key=averages.get)
    min_val = averages[min_key]
    res = f"There are {str(remaining)} possible words remaining\n"
    res = res + f"Of them, the best guess for narrowing words down is {min_key.upper()} for average remaining number of {str(min_val)}.\n"
    
    return min_key, min_val
    
possible_words = filter_dict('whaup', 'GGGBB', words)
optimal_guess(possible_words, possible_words)

('whata', 10.43)

In [247]:
possible_words = filter_dict('whaup', 'GGGBB', words)
filter_dict('whale', 'GGGBB', possible_words)
#possible_words

Unnamed: 0,word
231506,whack
231540,whand
231541,whang
231547,whank
231556,wharf
231571,whart
231576,whata
231585,whats


In [245]:
possible_words

Unnamed: 0,word
231506,whack
231511,whale
231529,whalm
231531,whaly
231534,whame
231540,whand
231541,whang
231547,whank
231554,whare
231556,wharf
