## Get all 5 Letter Words

In [36]:
import requests

def fetch_gist_content(gist_url):
    # Extract the gist ID from the URL
    gist_id = gist_url.split('/')[-1].split('.')[0]
    
    # Construct the URL for the raw content
    raw_url = f"https://gist.githubusercontent.com/dracos/{gist_id}/raw/"
    
    response = requests.get(raw_url)
    
    if response.status_code == 200:
        return string_to_list(response.text)
    else:
        return None

def string_to_list(input_string):
    # Split the string by newline to get a list of words and remove any leading/trailing whitespace
    return [word.upper() for word in input_string.strip().split("\n")]

# Example usage
five_letter_words = fetch_gist_content("https://gist.github.com/dracos/dd0668f281e685bad51479e5acaadb93")

first_word = five_letter_words[:2]
last_word = five_letter_words[-1]
print("First word:", first_word)
print("Last word:", last_word)


First word: ['AAHED', 'AALII']
Last word: ZYMIC


## Finds all used words.

In [37]:
import requests
from bs4 import BeautifulSoup

url = "https://www.rockpapershotgun.com/wordle-past-answers"
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

OldWords = []
for li in soup.select("ul.inline li"):
  OldWords.append(li.text)

print(OldWords)

['ABACK', 'ABASE', 'ABATE', 'ABBEY', 'ABOUT', 'ABOVE', 'ABYSS', 'ACRID', 'ACUTE', 'ADMIT', 'ADOBE', 'ADOPT', 'ADORE', 'ADULT', 'AGAIN', 'AGAPE', 'AGATE', 'AGENT', 'AGILE', 'AGLOW', 'AGONY', 'AGREE', 'AHEAD', 'ALBUM', 'ALIEN', 'ALIKE', 'ALLOW', 'ALOFT', 'ALONE', 'ALOUD', 'ALPHA', 'ALTAR', 'ALTER', 'AMBER', 'AMISS', 'AMPLE', 'ANGEL', 'ANGER', 'ANGRY', 'ANODE', 'ANTIC', 'AORTA', 'APHID', 'APPLE', 'APPLY', 'APRON', 'APTLY', 'ARBOR', 'ARGUE', 'AROMA', 'ASIDE', 'ASKEW', 'ASSET', 'ATOLL', 'ATONE', 'AUDIO', 'AUDIT', 'AVAIL', 'AVERT', 'AWAIT', 'AWAKE', 'AWFUL', 'AXIOM', 'AZURE', 'BADGE', 'BADLY', 'BAGEL', 'BAKER', 'BALSA', 'BANAL', 'BARGE', 'BASIC', 'BATHE', 'BATON', 'BATTY', 'BAYOU', 'BEACH', 'BEADY', 'BEAST', 'BEEFY', 'BEGET', 'BEGIN', 'BEING', 'BELCH', 'BELIE', 'BELLY', 'BELOW', 'BENCH', 'BERET', 'BERTH', 'BESET', 'BINGE', 'BIOME', 'BIRCH', 'BIRTH', 'BLACK', 'BLAME', 'BLAND', 'BLEED', 'BLEEP', 'BLOKE', 'BLOWN', 'BLUFF', 'BLURB', 'BLURT', 'BLUSH', 'BOOBY', 'BOOST', 'BOOZE', 'BOOZY', 'BORAX', 

## Remove all the used words

In [38]:
def remove_words(five_letter_words, OldWords):
  """Removes all words from `five_letter_words` that are also in `words`."""

  print(f"Length of OldWords: {len(OldWords)}")
  print(f"Length of five_letter_words before update: {len(five_letter_words)}")

  for word in OldWords:
    if word in five_letter_words:
      five_letter_words.remove(word)

  print(f"Length of five_letter_words after update: {len(five_letter_words)}")


remove_words(five_letter_words, OldWords)


Length of OldWords: 856
Length of five_letter_words before update: 14855
Length of five_letter_words after update: 13999


## Find the most common letters.

In [39]:
from collections import Counter

def count_letter_frequency(words_list):
    letter_frequency = Counter("".join(words_list))
    return sorted(letter_frequency.items(), key=lambda x: x[1], reverse=True)

top_common_letters = count_letter_frequency(five_letter_words)

# Print the most common letters
print("Most Common Letters:")
for letter, count in top_common_letters:
    print(f"{letter}: {count}")


Most Common Letters:
S: 7102
E: 7003
A: 6743
O: 4910
R: 4365
I: 4146
L: 3526
T: 3420
N: 3282
U: 2762
D: 2596
P: 2305
M: 2298
Y: 2257
C: 2061
H: 1836
B: 1745
G: 1742
K: 1665
F: 1161
W: 1059
V: 750
Z: 485
J: 333
X: 309
Q: 134


## Keep only word matching top 10 letters.
Return the first non-empty list when exluding word matching the top 10 common letters in order

In [40]:
def filter_and_extract_unique_letters(words_list, top_common_letters):
    # Filtering words
    filtered_words = words_list.copy()  # Make a copy of the original list
    
    for item in top_common_letters:
        letter = item[0] if isinstance(item, tuple) else item  # Extract the actual letter from tuple if necessary
        temp_words = [word for word in filtered_words if letter in word]
        
        if temp_words:  # Check if temp_words is not empty
            filtered_words = temp_words
            print(letter)
            print(filtered_words[:5])

    # Extracting unique letters from the filtered words
    unique_letters = set("".join(filtered_words))
    
    return filtered_words, list(unique_letters)

# Usage
starting_words, starting_letters = filter_and_extract_unique_letters(five_letter_words, top_common_letters)
print(starting_words)
print(starting_letters)



S
['AAPAS', 'ABACS', 'ABASH', 'ABASK', 'ABBAS']
E
['ABBES', 'ABERS', 'ABETS', 'ABEYS', 'ABIES']
A
['ABBES', 'ABERS', 'ABETS', 'ABEYS', 'ABIES']
O
['AEONS', 'AEROS', 'ALOES', 'ALOSE', 'AROSE']
R
['AEROS', 'AROSE', 'SOARE']
['AEROS', 'AROSE', 'SOARE']
['A', 'E', 'O', 'R', 'S']


In [41]:
from collections import Counter, defaultdict

def most_common_combinations_v2(words_list, starting_words, top_n=5):
    # Create all possible combinations of two letters from starting_words
    two_letter_combinations = set()
    for word in starting_words:
        for i in range(len(word)):
            for j in range(i+1, len(word)):
                two_letter_combinations.add((word[i], word[j]))
    
    # Count the occurrences of each combination in words_list
    combination_counter = Counter()
    for word in words_list:
        for combo in two_letter_combinations:
            if combo[0] in word and combo[1] in word:
                combination_counter[combo] += 1
                
    return combination_counter.most_common(top_n)

def most_common_positions_v2(words_list, starting_words):
    # Dictionary to count positions for each letter
    position_counter = defaultdict(lambda: defaultdict(int))
    for word in words_list:
        for start_word in starting_words:
            for index, letter in enumerate(start_word):
                if letter in word:
                    # Adjusting the index to go from 1 to 5
                    position = word.index(letter) + 1
                    position_counter[letter][position] += 1
                    
    # Determine the most common position for each letter
    common_positions = {}
    for letter, positions in position_counter.items():
        common_positions[letter] = max(positions, key=positions.get)
        
    return common_positions

# Results
top_5_combinations = most_common_combinations_v2(five_letter_words, starting_words)
adjusted_positions = most_common_positions_v2(five_letter_words, starting_words)

top_5_combinations, adjusted_positions, starting_words


([(('A', 'S'), 2449),
  (('S', 'A'), 2449),
  (('S', 'E'), 2417),
  (('E', 'S'), 2417),
  (('R', 'E'), 1911)],
 {'A': 2, 'E': 4, 'S': 5, 'R': 3, 'O': 2},
 ['AEROS', 'AROSE', 'SOARE'])

In [42]:
from collections import Counter, defaultdict

def best_matching_words(combinations, positions, words, top_n=5):
    """
    Determine the top matching words based on the provided combinations and positions.
    
    Args:
    - combinations (list of tuples): The top letter combinations.
    - positions (dict): The most common positions for each letter.
    - words (list of str): The words to evaluate.
    - top_n (int): The number of top words to return.

    Returns:
    - list of tuples: The top matching words and their scores.
    """
    scores = {}
    for word in words:
        score = 0
        
        # Score based on two-letter combinations
        for i in range(len(word)):
            for j in range(i+1, len(word)):
                combination = (word[i], word[j])
                if combination in [combo[0] for combo in combinations]:
                    score += 1

        # Score based on letter positions
        for index, letter in enumerate(word, start=1):
            if positions.get(letter) == index:
                score += 1

        scores[word] = score

    # Sort the words based on their scores and return the top_n words
    sorted_words = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    
    return sorted_words[:top_n]

# Using the provided variables to get the top 5 matching words
top_5_words_scores = best_matching_words([combo[0] for combo in top_5_combinations], adjusted_positions, starting_words)
top_5_words_scores


[('AEROS', 2), ('SOARE', 1), ('AROSE', 0)]

# Play Wordel

In [49]:
def filter_words_by_criteria(words_list, 
                             letters_included=None, 
                             letters_not_included=None, 
                             locked_positions=None, 
                             excluded_positions=None):
    """
    Filter words based on the provided criteria.
    
    Args:
    - words_list (list of str): The list of words to filter.
    - letters_included (list or None): Letters that must be present in the word.
    - letters_not_included (list or None): Letters that should not be present in the word.
    - locked_positions (dict or None): Dictionary specifying which letters must be in certain positions.
    - excluded_positions (dict or None): Dictionary specifying which letters must not be in certain positions.

    Returns:
    - list of str: The filtered list of words.
    """
    filtered_words = words_list.copy()
    
    # Include words that have all the letters from letters_included
    if letters_included:
        filtered_words = [word for word in filtered_words if all(letter in word for letter in letters_included)]
    
    # Exclude words that have any letter from letters_not_included
    if letters_not_included:
        filtered_words = [word for word in filtered_words if not any(letter in word for letter in letters_not_included)]
    
    # Include words that match the locked positions criteria
    if locked_positions:
        filtered_words = [word for word in filtered_words if all(word[position-1] == letter for position, letter in locked_positions.items())]
    
    # Exclude words that match the excluded positions criteria
    if excluded_positions:
        filtered_words = [word for word in filtered_words if not any(word[position-1] == letter for position, letter in excluded_positions.items())]

    return filtered_words

# Example usage
'''
sample_words = ["apple", "alien", "alike", "leaks", "peaks", "lemon", "melon"]
letters_included = ['a', 'e']
letters_not_included = ['p']
locked_positions = {1: 'a', 3: 'i'}
excluded_positions = {5: 'n'}
'''

letters_included = ['E', 'O' , 'T', 'M']
letters_not_included = ['A', 'R', 'S', 'L', 'N', 'H']
locked_positions = {2: 'E' ,5: 'O'}
excluded_positions = {1: 'M', 3: 'T'}


filtered_results = filter_words_by_criteria(five_letter_words, letters_included, letters_not_included, locked_positions, excluded_positions)
filtered_results[:10]


['TEMPO']

In [50]:
top_common_letters = count_letter_frequency(filtered_results)

# Update the most comon letter left
print("Most Common Letters:")
for letter, count in top_common_letters:
    print(f"{letter}: {count}")
    

# Usage
next_words, next_letters = filter_and_extract_unique_letters(five_letter_words, top_common_letters)
print(next_words)
print(next_letters)


top_5_combinations = most_common_combinations_v2(five_letter_words, next_words)
adjusted_positions = most_common_positions_v2(five_letter_words, next_words)

top_5_combinations, adjusted_positions, next_words

# Using the provided variables to get the top 5 matching words
top_5_words_scores = best_matching_words([combo[0] for combo in top_5_combinations], adjusted_positions, next_words)
top_5_words_scores


Most Common Letters:
T: 1
E: 1
M: 1
P: 1
O: 1
T
['AARTI', 'ABAFT', 'ABAHT', 'ABBOT', 'ABEAT']
E
['ABEAT', 'ABETS', 'ABLET', 'ABNET', 'ACETA']
M
['AMATE', 'AMENT', 'ARMET', 'COMTE', 'DEMIT']
P
['DEMPT', 'EMPTS', 'KEMPT', 'MPRET', 'MTEPE']
O
['TEMPO']
['TEMPO']
['T', 'E', 'M', 'P', 'O']


[('TEMPO', 1)]