In [None]:
import pandas as pd
import numpy as np
import os
from pathlib import Path

In [None]:
# Make my dictionary
num_letters = 5
words = pd.read_csv("dict.csv", header=None)
words = words.rename(columns={0:'word'})

for i in range(1,21):
    new_name = "letter"+str(i)
    words[new_name] = words['word'].str[i-1] # -1 because they are indexed from 0, so letter1 is words['word'].str[0]
words = words[words['letter1'].str.lower() == words['letter1']]

masks = dict()
words_subsets = dict()

for i in range(1,21):
    words_subset_name = "words"+str(i)
    words_subsets[words_subset_name] = words.loc[(words['word'].str.len() == i)]

words = words_subsets['words'+str(num_letters)]
words.head(3)

In [72]:
def filter_dict(guess, outcome, words):
    
    '''
    Filters possible remaining words based on the one you provided
    Parameters
    ----------
    guess : str
        word you entered
    
    outcome : str
        str of strict type like BBYYG
        where 
        B is black (letter not in word)
        Y is yellow (right letter wrong spot)
        G is green (right letter right spot)
    
    words : dataframe
        all possible words
    
    Returns
    --------
    a dataframe with all possible words given the info provided by word
    
    Examples
    --------
    >>> filter_dict('whaup', 'BYBGG', dict)
    '''
    
    res = words 

    res['possible'] = res.apply(lambda row: is_word_possible(guess, outcome, row['word']), axis = 1)
    res = res.loc[res['possible']==True]
    res = res[['word']]
    
    return(res)

filter_dict('whaup', 'GGGBB', words)

Unnamed: 0,word
231506,whack
231511,whale
231529,whalm
231531,whaly
231534,whame
231540,whand
231541,whang
231547,whank
231554,whare
231556,wharf


In [68]:
def is_word_possible(guess, outcome, target):
    '''
    Checks if target word is possible given the outcome of the guess
    Parameters
    ----------
    guess : str
        word you entered
    
    outcome : str
        str of strict type like BBYYG
        where 
        B is black (letter not in word)
        Y is yellow (right letter wrong spot)
        G is green (right letter right spot)
    
    target : str
        word to check if it is possible
    
    Returns
    --------
    TRUE is possible
    FALSE if impossible
    
    Examples
    --------
    >>> is_word_possible('whaup', 'GGGBB', 'whack')
    '''

    res = True
    guess_arr = list(guess)
    outcome_arr = list(outcome.lower())
    target_arr = list(target)
    
    yellows, blacks = set(), set()
    
    for i in range(len(guess)):
        if outcome_arr[i] == 'b':
            blacks.add(guess[i])
        elif outcome_arr[i] == 'y':
            yellows.add(guess[i])
    
    # Set outcome to false unless we find the yellow in the word
    if len(yellows) > 0:
        res = False
        
    for i in range(len(target)):
        # if we whiff on green, exit loop and return False
        if outcome_arr[i] == 'g' and guess_arr[i] != target_arr[i]:
            #print("failed greens")
            res = False
            break
        
        # if we have a yellow letter, exit loop and return False
        if target_arr[i] in blacks:
            #print("failed blacks")
            res = False
            break
        
        # if we have a yellow we need, set outcome back to True
        elif len(yellows)>0:
            #print("testing yellows")
            # only if it's not on the same spot, otherwise it would have been green
            if target_arr[i] in yellows and guess_arr[i] != target_arr[i]:
                res = True
    
    return res

is_word_possible('tubsy', 'GBBBG', 'tangy')

True

In [92]:
def wordle_solver(guess, outcome, words):
    '''
    Checks if target word is possible given the outcome of the guess
    Parameters
    ----------
    guess : str
        word you entered
    
    outcome : str
        str of strict type like BBYYG
        where 
        B is black (letter not in word)
        Y is yellow (right letter wrong spot)
        G is green (right letter right spot)
    
    words : dataframe
        dictionary of remaining possible words
    
    Returns
    --------
    String with embedded values to help you decide your next step
    eg
    "There are 78 possible words remaining
    Of them, the best guess for narrowing words down is TUBSY for average remaining number of 15
    Of all possible words, the best guess for narrowing words down is STARE for average remaining number of 3."
    
    Examples
    --------
    >>> wordle_solver('whaup', 'GGGBB', words)
    '''
    
    possible_words = filter_dict(guess, outcome, words)

    remaining = possible_words.shape[0]
    ave_yolo = 0
    ave_all = 0
    
    res = f"There are {str(remaining)} possible words remaining\n"
    res = res + f"Of them, the best guess for narrowing words down is TUBSY for average remaining number of {str(ave_yolo)}.\n"
    res = res + f"Of all possible words, the best guess for narrowing words down is STARE for average remaining number of {str(ave_all)}."
    
    return res

print(wordle_solver('whaup', 'GGGBB', words))

There are 15 possible words remaining
Of them, the best guess for narrowing words down is TUBSY for average remaining number of 0.
Of all possible words, the best guess for narrowing words down is STARE for average remaining number of 0.


- "if it's word A, word X will leave to n remaining"   
- "if it's word B, word X will leave to m remaining"
- "if it's word C, word X will leave to k remaining"
.   
.   
.   

Score for word X then is average of n,m,k... etc

For dictionary(targets) of size N, and dictionary(guesses) of size M, that's NxM operations

Can do:   
**dictionary(targets) = dictionary(guesses)** to find (1) best possible guess that can win   
then   
**dictionary(targets) =/= dictionary(guesses)** to find (2) best possible word for narrowing down overall,


then compare how much better (2) is than (1) at narrowing down the dictionary and decide whether to yolo
