# Wordle Bot & Virutual Wordle Environment 

Eventually, this will be split into 2 functions. WordleBot will take the current state of the board, and return its next best guess for the word. Wordle is a virtual environment which will take a 'Target_Word' and a 'Next_Guess'. Greens, Yellows and greys should be defined as globals which are only appended to in the latter function, not overwritten. 

In [1]:
import pandas as pd
import re

# Wordle Bot

In [2]:
def WordleBot(Greens, Yellows, Repeated, Not_Repeated, Greys, Choices):
    link = 'https://raw.githubusercontent.com/tabatkins/wordle-list/main/words'
    df = pd.read_csv(link)
    list = df.values.tolist()
    result = ' '.join([item for sublist in list for item in sublist])
    counter = 0
    pattern = r'\b'
    for sublist in Yellows:
        if sublist:
            for item1 in sublist:
                pattern += f'(?=[a-z]*{item1})'  # Positive lookahead for required letter
    for item2 in Greys:
        if item2 != '_':
            pattern += f'(?![a-z]*{item2})'  # Negative lookahead for excluded letters
    for item in Greens:
        if item != '_':
            pattern += '[a-z]' * counter + item
            counter = 0
        else:
            counter += 1
    pattern += '[a-z]' * counter + r'\b'
    output = re.findall(pattern, result)
    invalids = set()
    for number in range(5):
        if Yellows[number]:
            for letter in Yellows[number]:
                for word in output:
                    if word[number] == letter:
                        invalids.add(word)  # Add word to the set of words to remove
                    
    for word in output:
    # Check for characters that should not repeat
        for character in Not_Repeated:
            counter1 = 0
            for letter in word:
                if letter == character:
                    counter1 += 1
                    if counter1 >= 2:
                        invalids.add(word)
                        break  # Once a character repeats, we can stop checking this word
        # Check for characters that should repeat
        for character in Repeated:
            counter2 = 0
            for letter in word:
                if letter == character:
                    counter2 += 1
            if counter2 < 2:
                invalids.add(word)
                break  # If any repeated character doesn't appear twice, mark the word as invalid

    output = [word for word in output if word not in invalids]
    scores = []
    for sample in output:
        sample_word_letter = set()
        sample_word_letter_and_place = set()
        sample_degeneracy = 0
        sample_multiplier = 0
        for i in range(5):
            sample_word_letter.add(sample[i])
            sample_word_letter_and_place.add((sample[i], i))
            for j in range(5):
                if j != i:
                    if sample[j] == sample[i]:
                        sample_degeneracy += 1
            if sample_degeneracy == 0:
                sample_multiplier = 1
            if sample_degeneracy == 2:
                sample_multiplier = 0.9
            if sample_degeneracy == 4:
                sample_multiplier = 0.8
            if sample_degeneracy == 6:
                sample_multiplier = 0.7
            if sample_degeneracy >= 6:
                sample_multiplier = 0.6
        sample_score = 0
        for word in output:
            working_word_letter = set()
            working_word_letter_and_place = set()
            for i in range(5):
                working_word_letter.add(word[i])
                working_word_letter_and_place.add((word[i], i))
            common_letter = working_word_letter.intersection(sample_word_letter)                                      
            common_letter_and_place = working_word_letter_and_place.intersection(sample_word_letter_and_place)
            sample_score += len(common_letter) + 2 * len(common_letter_and_place)   
        scores.append(sample_score * sample_multiplier)
    try:
        best_guess = output[scores.index(max(scores))]  
        if Choices == False:
            if len(output) != 1:
                return f'Out of {len(output)} options, I think \'{best_guess}\' is the best choice'
            else:
                return f'{best_guess} is the solution'
        else:
            return f'The options are: {output}'
    except ValueError:
        return 'No words match this set of conditions'

The methodology behind this approach is as follows: assuming all the words are equally likely to come up, the best next choice for a word is the word which reduces the number of other possible words that could come up after. Following this, I reason that we should chose a word which is most similar to all other words on the list. 'Similarity' is determined in terms of sharing common letters, and bonus points if they are in the same place. 

# Example usage

In [35]:
WordleBot(Greens = ['b', '_', '_', '_', '_'],        # Add green letters in place
           Yellows = [['o'], ['k'], [], [], []],# Add yellow letters in place 
           Repeated = ['b'],
           Not_Repeated = ['o'],                        # Any yellows or greeens that you know only appear once
           Greys = ['t', 'r'],   # All letters that are known to not appear
           Choices = False                           # Choices = True shows all possible solutions given current set of words. 
         )

'bobak is the solution'

# Misc