In [98]:
"""
Made by Joe Geoghegan
-Work in Progress
"""
import pandas as pd

## Helper Class

In [99]:
class wordleBreaker:
    """
    Made by Joe Geoghegan
    -Work in Progress
    When provided a dictionary, provides tools to solve the New York Times game Wordle
    """
    # Class Variables
    word_pool=None
    origin=None

    # Class creation and management
    def __init__(self, origin):
        self.word_pool = origin.copy()
        self.origin = origin.copy()
        # self.update_score_pool()
    def reset(self):
        self.word_pool = self.origin.copy()
        self.update_score_pool()

    # Dictionary Manipulation Functions
    def remove_letter_from_character(self,letter,character_space):
        """
        Creates a mask where True if a word DOES NOT contains the given letter at the given character space
        Applies mask to that character row which turns the values we want to remove to NaN
        Then drop the NaN rows
        """
        character = "c"+str(character_space)
        self.word_pool[character] = self.word_pool[character][self.word_pool[character]!=letter]
        self.word_pool.dropna(inplace=True)
    
    def not_included_letter(self,letter):
        """
        Runs the remove_letter_from_character operation on all character spaces
        """
        for char in range(1,6):
            self.remove_letter_from_character(letter,char)
    
    def set_letter_to_character(self,letter,character_space):
        """
        Filters the dictionary to only have words that would fit a GREEN case
        Creates a mask where True if a word DOES contains the given letter at the given character space
        Applies mask to that character row which turns the values we want to remove to NaN
        Then drop the NaN rows
        """
        character = "c"+str(character_space)
        self.word_pool[character] = self.word_pool[character][self.word_pool[character]==letter]
        self.word_pool.dropna(inplace=True)
        # #@TODO REMOVE
        # self.word_pool = self.word_pool.head(20)
    
    def filter_to_include_letter(self,letter):
        """
        Uses a mask for each character space that has True where given letter is present
        Creates a mask which combines all the given letter present masks
        """
        in_word = (
            (self.word_pool['c1']==letter) |
            (self.word_pool['c2']==letter) |
            (self.word_pool['c3']==letter) |
            (self.word_pool['c4']==letter) |
            (self.word_pool['c5']==letter)
            )
        self.word_pool = self.word_pool[in_word]
        self.word_pool.dropna(inplace=True)
    
    def yellow_filter(self, letter, character_space):
        """
        Filters the dictionary to only have words that would fit a YELLOW case
            Uses the filter_to_include_letter() and remove_letter_from_character() functions
        """
        self.filter_to_include_letter(letter)
        self.remove_letter_from_character(letter,character_space)

    # Guess Evaluation Functions
    def update_score_pool(self):
        """
        Populates/updates the score value to a rudamentary point system
        The score is a sum of character scores for each character in a word
        The character score is the count of the letter's occurances in the
            entire word pool's population at that character spot
        """
        """
        @TODO: Currently has a bug, NaN values are put in for scores the more the pool is sliced
                NOT an issue with value_counts().
                
        Bugged code:
        # Count all occurances of every letter in each character space
        values = pd.DataFrame({
            "c1":self.word_pool["c1"].value_counts(),
            "c2":self.word_pool["c2"].value_counts(),
            "c3":self.word_pool["c3"].value_counts(),
            "c4":self.word_pool["c4"].value_counts(),
            "c5":self.word_pool["c5"].value_counts()
        })
        # For each word's letters add space scores together
        self.word_pool['score'] = (
            values["c1"][self.word_pool["c1"]].reset_index()["c1"] + 
            values["c2"][self.word_pool["c2"]].reset_index()["c2"] + 
            values["c3"][self.word_pool["c3"]].reset_index()["c3"] + 
            values["c4"][self.word_pool["c4"]].reset_index()["c4"] + 
            values["c5"][self.word_pool["c5"]].reset_index()["c5"]
        )
        """
        # Create baseline values database so number of rows is consistent
        alphabet = { #Does not seem to be the bug can likely replace with .value_counts() quicker method
            "a":0,"b":0,"c":0,"d":0,"e":0,"f":0,"g":0,"h":0,"i":0,
            "j":0,"k":0,"l":0,"m":0,"n":0,"o":0,"p":0,"q":0,"r":0,
            "s":0,"t":0,"u":0,"v":0,"w":0,"x":0,"y":0,"z":0
        }
        values = pd.DataFrame({ #Does not seem to be the bug can likely replace with .value_counts() quicker method
            'c1':alphabet.copy(),
            'c2':alphabet.copy(),
            'c3':alphabet.copy(),
            'c4':alphabet.copy(),
            'c5':alphabet.copy()
        })
        # Count all occurances of every letter in each character space
        for char in ['c1','c2','c3','c4','c5']:
            values[char] = (values[char] + self.word_pool[char].value_counts()).fillna(0)
        
        # For each word's letters add space scores together
        self.word_pool['score']  = values['c1'][self.word_pool['c1']].reset_index()['c1']
        for char in ['c2','c3','c4','c5']:
            self.word_pool['score'] += values[char][self.word_pool[char]].reset_index()[char]
        return values
    
    def unique_letter_words(self):
        """
        Provides the dictionary sliced to only include words with all unique letters
        """
        c1Match = ( (self.word_pool['c1']!=self.word_pool['c2']) &
                    (self.word_pool['c1']!=self.word_pool['c3']) &
                    (self.word_pool['c1']!=self.word_pool['c4']) & 
                    (self.word_pool['c1']!=self.word_pool['c5']) )
        c2Match = ( (self.word_pool['c2']!=self.word_pool['c3']) &
                    (self.word_pool['c2']!=self.word_pool['c4']) &
                    (self.word_pool['c2']!=self.word_pool['c5']) )
        c3Match = ( (self.word_pool['c3']!=self.word_pool['c4']) &
                    (self.word_pool['c3']!=self.word_pool['c5']) )
        c4Match = ( (self.word_pool['c4']!=self.word_pool['c5']) )
        return self.word_pool[c1Match & c2Match & c3Match & c4Match]
    
    #play wordle functions
    def process_guess(self, guess, result):
        """
        Takes a guess and result and correctly calls the correct
        g/G means green, y/Y means yellow, b/B means black
        """
        for char in range(1,6):
            g_char = guess[char-1]
            char_result = result[char-1]
            if (char_result == 'g') or (char_result == 'G'): #green
                self.set_letter_to_character(g_char,char)
            elif (char_result == 'y') or (char_result == 'Y'): #yellow
                self.yellow_filter(g_char,char)
            elif (char_result == 'b') or (char_result == 'B'): #black
                self.not_included_letter(g_char)
            else:
                print(f"Whoops! g/G means green, y/Y means yellow, b/B means black.\nI did not recognize that letter!\nPlease manually redo character #{char}")


## Running

In [100]:
# Set dictionary path 
dict_path = ".\Resources\dict.csv"
"""
CSV must be formatted as
word,c1,c2,c3,c4,c5,init_score
"""

'\nCSV must be formatted as\nword,c1,c2,c3,c4,c5,init_score\n'

In [101]:
# Initialize Dictionary
origin = pd.read_csv(dict_path)
wb = wordleBreaker(origin)

In [105]:
#any pool limiting operation causes the bug to start occuring (slicing word pool)
wb.set_letter_to_character("i",2)

In [106]:
test = wb.update_score_pool()
print(wb.word_pool.head(1))
scores = [
        #using .word_pool["c1"]].reset_index()["c1"] would give you every element in the dictionary
        test['c1'][wb.word_pool["c1"]].reset_index()["c1"].head(1), # math gets off, likely where bug exits
        test['c2'][wb.word_pool["c2"]].reset_index()["c2"].head(1),
        test['c3'][wb.word_pool["c3"]].reset_index()["c3"].head(1),
        test['c4'][wb.word_pool["c4"]].reset_index()["c4"].head(1),
        test['c5'][wb.word_pool["c5"]].reset_index()["c5"].head(1)
        # test['c1']['s'], # Correct Math (likely not bugged)
        # test['c2']['i'],
        # test['c3']['r'],
        # test['c4']['e'],
        # test['c5']['e']
]
print(f"scores\nc1 (s): {scores[0]} c2 (i): {scores[1]} c3 (r): {scores[2]} c4 (e): {scores[3]} c5 (e): {scores[4]}")
print(f"Sum: (note this sum is different from the function! Related issue! It is off by 3, same as the index value...) {scores[0]+scores[1]+scores[2]+scores[3]+scores[4]}")
test
# wb.word_pool["c1"].reset_index()['c1'].head(1)
# wb.word_pool

        # self.word_pool['score'] = (
        #     values["c1"][self.word_pool["c1"]].reset_index()["c1"] + 
        #     values["c2"][self.word_pool["c2"]].reset_index()["c2"] + 
        #     values["c3"][self.word_pool["c3"]].reset_index()["c3"] + 
        #     values["c4"][self.word_pool["c4"]].reset_index()["c4"] + 
        #     values["c5"][self.word_pool["c5"]].reset_index()["c5"]
        # )

Empty DataFrame
Columns: [word, c1, c2, c3, c4, c5, score]
Index: []
scores
c1 (s): Series([], Name: c1, dtype: float64) c2 (i): Series([], Name: c2, dtype: float64) c3 (r): Series([], Name: c3, dtype: float64) c4 (e): Series([], Name: c4, dtype: float64) c5 (e): Series([], Name: c5, dtype: float64)
Sum: (note this sum is different from the function! Related issue! It is off by 3, same as the index value...) Series([], dtype: float64)


Unnamed: 0,c1,c2,c3,c4,c5
a,0.0,0.0,0.0,0.0,0.0
b,0.0,0.0,0.0,0.0,0.0
c,0.0,0.0,0.0,0.0,0.0
d,0.0,0.0,0.0,0.0,0.0
e,0.0,0.0,0.0,0.0,0.0
f,0.0,0.0,0.0,0.0,0.0
g,0.0,0.0,0.0,0.0,0.0
h,0.0,0.0,0.0,0.0,0.0
i,0.0,0.0,0.0,0.0,0.0
j,0.0,0.0,0.0,0.0,0.0


In [107]:
wb.word_pool.tail(863).head(10)

Unnamed: 0,word,c1,c2,c3,c4,c5,score
