In [96]:
"""
Made by Joe Geoghegan
-Work in Progress
"""
import pandas as pd

## Helper Class

In [97]:
class wordleBreaker:
    """
    Made by Joe Geoghegan
    -Work in Progress
    When provided a dictionary, provides tools to solve the New York Times game Wordle
    """
    # Class Variables
    word_pool=None
    origin=None

    # Class creation and management
    def __init__(self, origin):
        self.word_pool = origin.copy()
        self.origin = origin.copy()
        # self.update_score_pool()
    def reset(self):
        self.word_pool = self.origin.copy()
        self.update_score_pool()

    # Dictionary Manipulation Functions
    def remove_letter_from_character(self,letter,character_space):
        """
        Creates a mask where True if a word DOES NOT contains the given letter at the given character space
        Applies mask to that character row which turns the values we want to remove to NaN
        Then drop the NaN rows
        """
        character = "c"+str(character_space)
        self.word_pool[character] = self.word_pool[character][self.word_pool[character]!=letter]
        self.word_pool.dropna(inplace=True)
    
    def not_included_letter(self,letter):
        """
        Runs the remove_letter_from_character operation on all character spaces
        """
        self.remove_letter_from_character(letter,1)
        self.remove_letter_from_character(letter,2)
        self.remove_letter_from_character(letter,3)
        self.remove_letter_from_character(letter,4)
        self.remove_letter_from_character(letter,5)
    
    def set_letter_to_character(self,letter,character_space):
        """
        Filters the dictionary to only have words that would fit a GREEN case
        Creates a mask where True if a word DOES contains the given letter at the given character space
        Applies mask to that character row which turns the values we want to remove to NaN
        Then drop the NaN rows
        """
        character = "c"+str(character_space)
        self.word_pool[character] = self.word_pool[character][self.word_pool[character]==letter]
        self.word_pool.dropna(inplace=True)
        #@TODO REMOVE
        self.word_pool = self.word_pool.head(20)
    
    def filter_to_include_letter(self,letter):
        """
        Uses a mask for each character space that has True where given letter is present
        Creates a mask which combines all the given letter present masks
        """
        in_word = (
            self.word_pool['c1']==letter |
            self.word_pool['c2']==letter |
            self.word_pool['c3']==letter |
            self.word_pool['c4']==letter |
            self.word_pool['c5']==letter
            )
        self.word_pool = self.word_pool[in_word]
        self.word_pool.dropna(inplace=True)
    
    def yellow_filter(self, letter, character_space):
        """
        Filters the dictionary to only have words that would fit a YELLOW case
            Uses the filter_to_include_letter() and remove_letter_from_character() functions
        """
        self.filter_to_include_letter(letter)
        self.remove_letter_from_character(letter,character_space)

    # Guess Evaluation Functions
    def update_score_pool(self):
        """
        Populates/updates the score value to a rudamentary point system
        The score is a sum of character scores for each character in a word
        The character score is the count of the letter's occurances in the
            entire word pool's population at that character spot

        @TODO: Currently has a bug, NaN values are put in for scores the more the pool is sliced
                NOT an issue with value_counts().
                
        Bugged code:
        # Count all occurances of every letter in each character space
        values = pd.DataFrame({
            "c1":self.word_pool["c1"].value_counts(),
            "c2":self.word_pool["c2"].value_counts(),
            "c3":self.word_pool["c3"].value_counts(),
            "c4":self.word_pool["c4"].value_counts(),
            "c5":self.word_pool["c5"].value_counts()
        })
        # For each word's letters add space scores together
        self.word_pool['score'] = (
            values["c1"][self.word_pool["c1"]].reset_index()["c1"] + 
            values["c2"][self.word_pool["c2"]].reset_index()["c2"] + 
            values["c3"][self.word_pool["c3"]].reset_index()["c3"] + 
            values["c4"][self.word_pool["c4"]].reset_index()["c4"] + 
            values["c5"][self.word_pool["c5"]].reset_index()["c5"]
        )
        """
        # Create baseline values database so number of rows is consistent
        alphabet = {
            "a":0,"b":0,"c":0,"d":0,"e":0,"f":0,"g":0,"h":0,"i":0,
            "j":0,"k":0,"l":0,"m":0,"n":0,"o":0,"p":0,"q":0,"r":0,
            "s":0,"t":0,"u":0,"v":0,"w":0,"x":0,"y":0,"z":0
        }
        values = pd.DataFrame({
            'c1':alphabet.copy(),
            'c2':alphabet.copy(),
            'c3':alphabet.copy(),
            'c4':alphabet.copy(),
            'c5':alphabet.copy()
        })
        # Count all occurances of every letter in each character space
        values['c1'] = (values['c1'] + self.word_pool['c1'].value_counts()).fillna(0)
        values['c2'] = (values['c2'] + self.word_pool['c2'].value_counts()).fillna(0)
        values['c3'] = (values['c3'] + self.word_pool['c3'].value_counts()).fillna(0)
        values['c4'] = (values['c4'] + self.word_pool['c4'].value_counts()).fillna(0)
        values['c5'] = (values['c5'] + self.word_pool['c5'].value_counts()).fillna(0)

        # For each word's letters add space scores together
        self.word_pool['score'] = (
            values['c1'][self.word_pool['c1']].reset_index()['c1'] + 
            values['c2'][self.word_pool['c2']].reset_index()['c2'] + 
            values['c3'][self.word_pool['c3']].reset_index()['c3'] + 
            values['c4'][self.word_pool['c4']].reset_index()['c4'] + 
            values['c5'][self.word_pool['c5']].reset_index()['c5']
        )
        return values
    
    def unique_letter_words(self):
        """
        Provides the dictionary sliced to only include words with all unique letters
        """
        c1Match = ( (self.word_pool['c1']!=self.word_pool['c2']) &
                    (self.word_pool['c1']!=self.word_pool['c3']) &
                    (self.word_pool['c1']!=self.word_pool['c4']) & 
                    (self.word_pool['c1']!=self.word_pool['c5']) )
        c2Match = ( (self.word_pool['c2']!=self.word_pool['c3']) &
                    (self.word_pool['c2']!=self.word_pool['c4']) &
                    (self.word_pool['c2']!=self.word_pool['c5']) )
        c3Match = ( (self.word_pool['c3']!=self.word_pool['c4']) &
                    (self.word_pool['c3']!=self.word_pool['c5']) )
        c4Match = ( (self.word_pool['c4']!=self.word_pool['c5']) )
        return self.word_pool[c1Match & c2Match & c3Match & c4Match]

## Running

In [98]:
# Set dictionary path 
dict_path = ".\Resources\dict.csv"
"""
CSV must be formatted as
word,c1,c2,c3,c4,c5,init_score
"""

'\nCSV must be formatted as\nword,c1,c2,c3,c4,c5,init_score\n'

In [99]:
# Initialize Dictionary
origin = pd.read_csv(dict_path)
wb = wordleBreaker(origin)

In [100]:
wb.set_letter_to_character("i",2)
test = wb.update_score_pool()
print(wb.word_pool.head(1))
scores = [
        test['c1'][wb.word_pool["c1"].head(1)],
        test['c2'][wb.word_pool["c2"].head(1)],
        test['c3'][wb.word_pool["c3"].head(1)],
        test['c4'][wb.word_pool["c4"].head(1)],
        test['c5'][wb.word_pool["c5"].head(1)]
]
print(f"score c1 (s): {scores[0]}")
print(f"score c2 (i): {scores[1]}")
print(f"score c3 (r): {scores[2]}")
print(f"score c4 (e): {scores[3]}")
print(f"score c5 (e): {scores[4]}")
print(f"Sum: {scores[0]+scores[1]+scores[2]+scores[3]+scores[4]}")
test
# wb.word_pool

        # self.word_pool['score'] = (
        #     values["c1"][self.word_pool["c1"]] + 
        #     values["c2"][self.word_pool["c2"]] + 
        #     values["c3"][self.word_pool["c3"]] + 
        #     values["c4"][self.word_pool["c4"]] + 
        #     values["c5"][self.word_pool["c5"]]
        # )

    word c1 c2 c3 c4 c5  score
3  siree  s  i  r  e  e   64.0
score c1 (s): s    11.0
Name: c1, dtype: float64
score c2 (i): i    20.0
Name: c2, dtype: float64
score c3 (r): r    6.0
Name: c3, dtype: float64
score c4 (e): e    20.0
Name: c4, dtype: float64
score c5 (e): e    4.0
Name: c5, dtype: float64
Sum: e   NaN
i   NaN
r   NaN
s   NaN
dtype: float64


Unnamed: 0,c1,c2,c3,c4,c5
a,3.0,0.0,0.0,0.0,0.0
b,0.0,0.0,0.0,0.0,0.0
c,0.0,0.0,0.0,0.0,0.0
d,0.0,0.0,2.0,0.0,8.0
e,0.0,0.0,0.0,20.0,4.0
f,0.0,0.0,0.0,0.0,0.0
g,0.0,0.0,0.0,0.0,0.0
h,1.0,0.0,0.0,0.0,0.0
i,0.0,20.0,0.0,0.0,0.0
j,0.0,0.0,0.0,0.0,0.0


In [101]:
# wb.word_pool.tail(863).head(10)