In [None]:
import pandas as pd
from collections import Counter
import pprint as pp
import datetime as dt


In [None]:
class wordle_game:
    
    def __init__(self, game_num, filename="wordle_scores.csv"):
        self.game_num = game_num
        self.round_num = 0
        self.word_letters = {
            0: { 'is': None, 'is_not': [] },
            1: { 'is': None, 'is_not': [] },
            2: { 'is': None, 'is_not': [] },
            3: { 'is': None, 'is_not': [] },
            4: { 'is': None, 'is_not': [] },           
        }
        self.good_letters = []
        self.bad_letters = []
        self.filename = filename
        self.results = {
            "date": "",
            "winning_round": 0,
            "word1": "",
            "w1_score": "",
            "word2": "",
            "w2_score": "",
            "word3": "",
            "w3_score": "",
            "word4": "",
            "w4_score": "",
            "word5": "",
            "w5_score": "",
            "word6": "",
            "w6_score": "",
        }
        
    def load_data(self, folder, filename):
        self.words = pd.read_csv(f"{folder}{filename}")
        print(f"Words in corpus {self.words.shape[0]}")

    def prep_data(self):
        self.new_words = self.words[self.words['word'].str.len() == 5].copy().reset_index()

        total_count = sum(self.new_words['count'])
        probs = [x / total_count for x in self.new_words['count']]
        self.new_words['prob'] = probs

        print(f"Count of 5 Letter Words {self.new_words.shape[0]}")
        print("Highest frequency 5 letter words")
        print(self.new_words.head(10))

        self.get_letter_freq()
        print("Highest frequency letters")
        print(self.letter_freq.most_common(10))
        
    def recommend_next(self, options=50, display=10, allow_dupe_letters=True):
        # compute score for each of the top X words
        ranked_words = []
        print(f"\nRemaining word count {self.new_words.shape[0]}")
        
        for index, this_row in self.new_words[:options].iterrows():
            score = 0
            this_word = this_row['word']

            c = Counter(this_word)
            if allow_dupe_letters or max(c.values()) == 1:
                for i in range(5):
                    score += self.letter_freq[this_word[i]]

            score = round(score * this_row['prob'], 0)
            ranked_words.append([this_word, score])
            
        pp.pprint(ranked_words[:display])
              
    def get_letter_freq(self):
        all_words = "".join(self.new_words['word'])
        self.letter_freq = Counter(all_words)

    def build_rules(self, word, results):
        for i in range(5):
            result = results[i]
            letter = word[i]

            if result == 'c':
                self.word_letters[i]['is'] = letter
                if letter not in self.good_letters:
                    self.good_letters.append(letter)
            elif result == 'y':
                self.word_letters[i]['is_not'].append(letter)
                if letter not in self.good_letters:
                    self.good_letters.append(letter)
            else:
                if letter not in self.bad_letters:
                    self.bad_letters.append(letter)

    def remove_bad_words(self):
        pattern = ""
        for i in range(len(self.bad_letters)-1):
            pattern += f"{self.bad_letters[i]}|"
        pattern += f"{self.bad_letters[-1]}"
        self.new_words = self.new_words[~self.new_words['word'].str.contains(pattern, case=False)]

    def words_with_good_letters(self):
        # cut down to only the words containing all of the good letters
        for i in range(len(self.good_letters)):
            self.new_words = self.new_words[self.new_words['word'].str.contains(self.good_letters[i], case=False)]
        # self.new_words.reset_index(inplace=True)
        
    def letters_in_correct_place(self):
        # now we can iterate across this smaller list to check placement of the correct letters
        keep_rows = []
        for _, row in self.new_words.iterrows():
            keep = True
            for i in range(5):
                if self.word_letters[i]['is'] != None:
                    if row['word'][i] != self.word_letters[i]['is']:
                         keep = False
                elif row['word'][i] in self.word_letters[i]['is_not']:
                    keep=False

            if keep:
                keep_rows.append(row)

        self.new_words = pd.DataFrame(keep_rows)

    def show_results(self):
        print(f"\nRound Number {self.round_num}")
        print("\n")
        pp.pprint(f"Correct= {self.word_letters}")
        pp.pprint(f"Good\t= {self.good_letters}")
        pp.pprint(f"Bad\t= {self.bad_letters}")
        # print(f"\tRemaining Words = {self.new_words.shape[0]}")
        # print(self.letter_freq.most_common(10))
        # print(self.new_words.head(10))
    
    def save_results(self):
        this_results_df = pd.DataFrame(self.results, index=[self.game_num])
        try:
            results_tracking_df = pd.read_csv(self.filename)
            results_tracking_df.concat(this_results_df)
            results_tracking_df.to_csv(self.filename)
        except:
            this_results_df.to_csv(self.filename)
    
    def new_round(self, word, results, round_num):
        self.round_num = round_num
        self.results["date"]=dt.date.today()
        self.results[f"word{round_num}"] = word
        self.results[f"w{round_num}_score"] = results
        
        if results != 'ccccc':
            if round_num < 6:
                self.build_rules(word, results)
                self.remove_bad_words()
                self.words_with_good_letters()
                self.letters_in_correct_place()
                self.get_letter_freq()
                self.show_results()
            else:
                # self.results["winning_round"] = 0
                print(
                    f"\nFailed to find word in 6 round limit :(\n"
                    # f"Win ratio is {}
                )
        else:
            self.results["winning_round"] = self.round_num
            print(f"\nCongrats!  Victory in round {self.round_num}")

        self.save_results()


In [None]:
data_folder = 'data\\'
kaggle_file = 'unigram_freq.csv'


In [None]:
game = wordle_game(game_num=13)
game.load_data(data_folder, kaggle_file)
game.prep_data()
game.recommend_next(allow_dupe_letters=False)


In [None]:
round_num = 1
game.new_round('their', 'yyxxx', round_num)
game.recommend_next(options=20, display=20, allow_dupe_letters=False)


In [None]:
game.new_round('south', 'xyxcc', round_num=2)
game.recommend_next(options=20, display=20)


In [None]:
game.new_round('cloth', 'ccccc', round_num=3)
game.recommend_next(options=20, display=20)  # , allow_dupe_letters=False)


In [None]:
game.new_round('arise', 'xccxc', round_num=4)
game.recommend_next(options=20, display=20, allow_dupe_letters=False)


In [None]:
game.new_round('urine', 'xcccc', round_num=5)
game.recommend_next(options=20, display=20)


In [None]:
game.new_round('brine', 'ccccc', round_num=6)