In [144]:
"""
Made by Joe Geoghegan
"""
import pandas as pd
from IPython.display import clear_output

### Helper Class

In [145]:
class wordleBreaker:
    """
    Made by Joe Geoghegan
    -Work in Progress
    When provided a dictionary, provides tools to solve the New York Times game Wordle
    """
    # Class Variables
    word_pool=None
    origin=None
    play_information=None

    # Class creation and management
    def __init__(self, dict_url):
        self.origin = pd.read_csv(dict_url,index_col='words')
        self.word_pool = self.origin.copy()
        self.update_scores()
        self.play_information = {
            'guesses':[],
            'results':[],
            'green_letters':[None,None,None,None,None],
            'in_word':[],
            'safe':[]
        }

    def reset(self):
        self.word_pool = self.origin.copy()
        self.update_scores()
        self.play_information = {
            'guesses':[],
            'results':[],
            'green_letters':[None,None,None,None,None],
            'in_word':[],
            'safe':[]
        }

    # Dictionary Manipulation Functions
    def remove_letter_from_character(self,letter,character_space):
        """
        Creates a mask where True if a word DOES NOT contains the given letter at the given character space
        Applies mask to that character row which turns the values we want to remove to NaN
        Then drop the NaN rows
        """
        character = "c"+str(character_space)
        self.word_pool[character] = self.word_pool[character][self.word_pool[character]!=letter]
        self.word_pool.dropna(inplace=True)
    
    def not_included_letter(self,letter):
        """
        Runs the remove_letter_from_character operation on all character spaces
        """
        # if letter not in self.play_information['in_word']: #insures double letter guesses do not remove the answer
        for char in range(1,6): #for every character
            if self.play_information['green_letters'][char-1] == None: #if that character is not a green
                self.remove_letter_from_character(letter,char)
    
    def set_green(self,letter,character_space):
        """
        Filters the dictionary to only have words that would fit a GREEN case
        Creates a mask where True if a word DOES contains the given letter at the given character space
        Applies mask to that character row which turns the values we want to remove to NaN
        Then drop the NaN rows
        """
        character = "c"+str(character_space)
        self.word_pool[character] = self.word_pool[character][self.word_pool[character]==letter]
        self.word_pool.dropna(inplace=True)
        self.play_information['green_letters'][character_space-1] = letter
    
    def filter_to_include_letter(self,letter,guess):
        """
        Uses a mask for each character space that has True where given letter is present
        Creates a mask which combines all the given letter present masks
        """
        in_word = (
            (self.word_pool['c1']==letter) |
            (self.word_pool['c2']==letter) |
            (self.word_pool['c3']==letter) |
            (self.word_pool['c4']==letter) |
            (self.word_pool['c5']==letter)
            )
        self.word_pool = self.word_pool[in_word]
        self.word_pool.dropna(inplace=True)
    
    def yellow_filter(self, letter, character_space,guess):
        """
        Filters the dictionary to only have words that would fit a YELLOW case
            Uses the filter_to_include_letter() and remove_letter_from_character() functions
        """
        self.filter_to_include_letter(letter,guess)
        self.remove_letter_from_character(letter,character_space)
        # self.play_information['in_word'].append(letter)

    # Guess Evaluation Functions
    def update_scores(self,sort=True):
        """
        Populates/updates the score value to a rudamentary point system
        The score is a sum of character scores for each character in a word
        The character score is the count of the letter's occurances in the
            entire word pool's population at that character spot
        """
        # Count all occurances of every letter in each character space
        values = pd.DataFrame({
            "c1":self.word_pool["c1"].value_counts(),
            "c2":self.word_pool["c2"].value_counts(),
            "c3":self.word_pool["c3"].value_counts(),
            "c4":self.word_pool["c4"].value_counts(),
            "c5":self.word_pool["c5"].value_counts()
        }).fillna(0)
        # For each word's letters add space scores together
        score_grid = pd.DataFrame(index=self.word_pool.index) # Create an empty df with the words as index
        for char in ['c1','c2','c3','c4','c5']: # for each character space
            score_col = pd.DataFrame(values[char][self.word_pool[char]] # lookup the spaces score for each word in the word pool
                            ).set_index(self.word_pool.index) # make it a dictionary with the words as index
            score_grid[f'{char}_score'] = score_col # Add the score column as a column in the score grid
        self.word_pool['score'] = score_grid.sum(axis='columns')
        if sort: # sorts by default, or if specifically selected
            self.sort_pool()
    
    def sort_pool(self):
        """
        Updates the word pool based on the score
        """
        self.word_pool.sort_values(by='score',ascending=False,inplace=True)

    def unique_letter_words(self):
        """
        Provides the dictionary sliced to only include words with all unique letters
        """
        c1Match = ( (self.word_pool['c1']!=self.word_pool['c2']) &
                    (self.word_pool['c1']!=self.word_pool['c3']) &
                    (self.word_pool['c1']!=self.word_pool['c4']) & 
                    (self.word_pool['c1']!=self.word_pool['c5']) )
        c2Match = ( (self.word_pool['c2']!=self.word_pool['c3']) &
                    (self.word_pool['c2']!=self.word_pool['c4']) &
                    (self.word_pool['c2']!=self.word_pool['c5']) )
        c3Match = ( (self.word_pool['c3']!=self.word_pool['c4']) &
                    (self.word_pool['c3']!=self.word_pool['c5']) )
        c4Match = ( (self.word_pool['c4']!=self.word_pool['c5']) )
        return self.word_pool[c1Match & c2Match & c3Match & c4Match]
    
    #play wordle functions
    def process_guess(self, guess, result):
        """
        Takes a guess and result and correctly calls the correct
        g/G means green, y/Y means yellow, b/B means black
        """
        for char in range(1,6): #First Check if all Result's are valid characters
            g_char = guess[char-1]
            char_result = result[char-1]
            if ( not(
                    ((char_result == 'g') or (char_result == 'G')) or #green
                    ((char_result == 'y') or (char_result == 'Y')) or #yellow
                    ((char_result == 'b') or (char_result == 'B'))     #black
                    )
                ):
                    result = input(f"Whoops! g/G means green, y/Y means yellow, b/B means black.\nIssue on character #{char}.\nPlease re-input result: ")
        # To reduce edge cases, have to execute results in order: Greens, Blacks, then Yellows and not in letter order
            # Marking green spaces first removes risk of removing the valid word.
            #   Guessing 'erase' when the word is 'barre' would result in the first e being black.
            #   Finding the greens first allows to avoid removing the correct answer
            # Then must mark yellow, for similar cases as green
        for char in range(1,6):
            g_char = guess[char-1]
            char_result = result[char-1]
            if (char_result == 'g') or (char_result == 'G'): #green
                self.set_green(g_char,char)
        for char in range(1,6):
            g_char = guess[char-1]
            char_result = result[char-1]
            if (char_result == 'y') or (char_result == 'Y'): #yellow
                self.yellow_filter(g_char,char,guess)
        for char in range(1,6):
            g_char = guess[char-1]
            char_result = result[char-1]
            if (char_result == 'b') or (char_result == 'B'): #black
                if char not in self.play_information['safe']:
                    self.not_included_letter(g_char) #not in the word entirely
                else:
                    self.remove_letter_from_character(g_char,char) #just not in that space
    
    def disp(self):
        """
        Displays only words and the score
        """
        return self.word_pool['score'] 

    
    def compute_results(self,answer,guess):
        result = 'yyyyy' # yellow cases will be a bit of a pain to do specifically
                         # but yellow = NOT(green or black)
        for char in range(0,5):
            if answer[char] == guess[char]: # Found a green
                result = result[:char] + 'g' + result[char + 1:]
                if self.play_information['green_letters'][char] == None:
                    self.play_information['green_letters'][char] = guess[char]
        for char in range(0,5): # Look for blacks
            if answer[char] != guess[char]: # If not green
                char_ans_count = answer.count(guess[char]) # How many of the character are in the answer
                already_green = self.play_information['green_letters'].count(guess[char])
                count_no_green = char_ans_count - already_green
                occuranceNumber = 1 + guess[:char].count(guess[char])

                if count_no_green <= 0: # Letter completely not present
                    if char != 4 : result = result[:char] + 'b' + result[char + 1:]
                    else : result = result[:char] + 'b'
                elif occuranceNumber > count_no_green: # Forgive the first "char_ans_count" occurances in guess
                    if char != 4 : result = result[:char] + 'b' + result[char + 1:]
                    else:
                        result = result[:char] + 'b'
                    self.play_information['safe'].append(char+1)
        return result

    def play(self,auto_help=True,num_guesses=5):
        """
        Prompts guess, then result, then displays sorted words and repeats until result is all green
        """
        self.run(Play_Mode='nyt',
                auto_help=auto_help,
                num_guesses=num_guesses)

    def play_word(self,word,show_answer=False,auto_help=True,num_guesses=5):
        """
        Prompts guess, then computes result, then displays sorted words and repeats until result is all green
        """
        self.run(Play_Mode='word',
                solution=word,
                show_answer=show_answer,
                auto_help=auto_help,
                num_guesses=num_guesses)

    def play_random(self,word_dataframe,show_answer=False,auto_help=True,num_guesses=5):
        """
        Plays Wordle with a random word dictionary
        The Word must be the index of the dataframe
        """
        # Samples a random row in the given dictionary
        word = word_dataframe.sample(1).index.to_list()[0]
        # Runs a word Play_Mode on the sampled word
        self.run(Play_Mode='word',
                solution=word,
                show_answer=show_answer,
                auto_help=auto_help,
                num_guesses=num_guesses)

    def run(self,Play_Mode,solution='slate',show_answer=False,auto_help=True,num_guesses=5):
        """
        Selects actions based on Play_Mode
        PlayMode = 
            'word' automatically computes results based on given solution
            'nyt' takes manual guesses and results to help with online wordle game
        """
        guesses = []
        self.update_scores()
        
        for turn in range(1,num_guesses+1):
            clear_output(wait=False)
            if turn == 1 : print("Welcome to Wordle Breaker!")

            if auto_help : print("Top ten guesses:\n", self.disp().head(10))
            print(f'You are on Guess #{turn}! {num_guesses+1-turn} turns remaining!')
            if show_answer : print(f'Debug Mode: The answer is "{solution}"')
            if turn != 1 : print(f'This is how the game has gone ([Turn,Guess,Result of Guess]) : {guesses}')
            guess = input("Guess: ").lower()

            #'nyt' Play_Mode only
            if Play_Mode == 'nyt' :
                print("for Results input:\n",
                    "g/G means green\n",
                    "y/Y means yellow\n",
                    "b/B means black\n")
                result = input("Result: ").lower()

            #'word' Play_Mode only
            if Play_Mode == 'word' : result = self.compute_results(solution,guess)

            guesses.append([turn,guess,result])
            clear_output(wait=False)
            if result.lower() == 'ggggg':
                if auto_help : print("Glad I could help!")
                if not auto_help : print("You did it!")
                return
            else:
                self.process_guess(guess,result)
                self.update_scores()
        print("Sorry! Better luck next time!")
    
    def auto_guess(self,solution):
        """
        Updates scores, pulls the best guess, computes result and processes the result
        """
        guess = self.word_pool.head(1).index.to_list()[0] # Takes the highest rated guess
        result = self.compute_results(solution,guess)
        self.process_guess(guess,result)
        self.update_scores()
        return [guess,result]

    def auto_solve(self,solution,printable=True,num_turns=0):
        """
        Computes the number of turns it will take until the answer is found for the given solution
        if num_turns=0 or less, will take as many turns as it needs to solve
        """
        turns = 0
        result = [None,'bbbbb']
        guesses = []
        bailed = False
        while (result[1] != 'ggggg'):
            turns += 1
            result = self.auto_guess(solution)
            guesses.append([turns,result[0],result[1]])
            if (turns>num_turns) and (num_turns > 0):
                bailed = True
                break
        if printable :
            if bailed : return f"Did not solve the word '{solution}' in {turns} turns. \n Guesses: {guesses}"
            else : return f"Solved the word '{solution}' in {turns} turns. \n Guesses: {guesses}"
        else:
            if bailed : return num_turns
            else : return turns

## Running

In [146]:
# Set dictionary path 
# dict_path = ".\Resources\dict_allAccepted.csv"
dict_path = ".\Resources\dict_minedAnswers.csv"
# CSV must be formatted as
# word,c1,c2,c3,c4,c5,score

In [147]:
# Initialize Wordle Breaker
wb = wordleBreaker(dict_path)

In [148]:
# Solves a random game
wb.reset()
solution = wb.word_pool.sample(1).index.to_list()[0]
print(f'Solving {solution}')
print(wb.auto_solve(solution,num_turns=5))

Solving retch
Solved the word 'retch' in 4 turns. 
 Guesses: [[1, 'slate', 'bbbyy'], [2, 'tenet', 'ygbbb'], [3, 'retro', 'gggbb'], [4, 'retch', 'ggggg']]


In [149]:
#Plays every word in the dictionary (takes about 2.5-3 minutes on my computer)
wb.reset()
df = wb.word_pool.copy()
# Entire_Word_Pool = df.index.to_list()

sample_size = 20
Entire_Word_Pool = df.sample(sample_size).index.to_list() #more managable runtime

all_results = []
count = 0
for word in Entire_Word_Pool:
    count+=1
    # print(f'Running word #{count}: {word}')
    all_results.append([word,wb.auto_solve(word,printable=False,num_turns=0)])
    wb.reset()


In [150]:
results_df = pd.DataFrame(all_results).rename(columns={0:'words',1:'turns'}).set_index('words')
results_df

Unnamed: 0_level_0,turns
words,Unnamed: 1_level_1
snipe,3
treat,3
stoic,6
other,4
haute,2
manly,3
mushy,4
slant,2
basil,3
begun,4
