## Get all 5 Letter Words

In [25]:
import nltk

# Download the English words dataset if you haven't already
nltk.download('words')

# Import the English words
from nltk.corpus import words

# Function to get all 5-letter words
# Här får vi alla 5 bokstäver lpnga ord i listan "five_letter_words"
def get_5_letter_words():
  english_words = set(words.words())
  five_letter_words = [word.upper() for word in english_words if len(word) == 5 and word.isalpha()]


  return five_letter_words


if __name__ == "__main__":
    five_letter_words = get_5_letter_words()
    print(f"Total 5-letter words in English: {len(five_letter_words)}")
    print("Example 5-letter words:")
    print(five_letter_words[:10])  # Print the first 10 words as an example

[nltk_data] Downloading package words to
[nltk_data]     C:\Users\jonca\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!


Total 5-letter words in English: 10230
Example 5-letter words:
['KILIM', 'CABAS', 'GROFF', 'CHOIL', 'FLAIL', 'CHEEK', 'SIEVY', 'SHIRL', 'VALVA', 'NARES']


## Finds all used words.

In [26]:
import requests
from bs4 import BeautifulSoup

url = "https://www.rockpapershotgun.com/wordle-past-answers"
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

OldWords = []
for li in soup.select("ul.inline li"):
  OldWords.append(li.text)

print(OldWords)

['ABACK', 'ABASE', 'ABATE', 'ABBEY', 'ABOUT', 'ABOVE', 'ABYSS', 'ACRID', 'ACUTE', 'ADMIT', 'ADOBE', 'ADOPT', 'ADORE', 'ADULT', 'AGAIN', 'AGAPE', 'AGATE', 'AGENT', 'AGILE', 'AGLOW', 'AGONY', 'AGREE', 'AHEAD', 'ALBUM', 'ALIEN', 'ALIKE', 'ALLOW', 'ALOFT', 'ALONE', 'ALOUD', 'ALPHA', 'ALTAR', 'ALTER', 'AMBER', 'AMISS', 'AMPLE', 'ANGEL', 'ANGER', 'ANGRY', 'ANODE', 'ANTIC', 'AORTA', 'APHID', 'APPLE', 'APPLY', 'APRON', 'APTLY', 'ARBOR', 'ARGUE', 'AROMA', 'ASIDE', 'ASKEW', 'ASSET', 'ATOLL', 'ATONE', 'AUDIO', 'AUDIT', 'AVAIL', 'AVERT', 'AWAIT', 'AWAKE', 'AWFUL', 'AXIOM', 'AZURE', 'BADGE', 'BADLY', 'BAGEL', 'BAKER', 'BALSA', 'BANAL', 'BARGE', 'BASIC', 'BATHE', 'BATON', 'BATTY', 'BAYOU', 'BEACH', 'BEADY', 'BEAST', 'BEEFY', 'BEGET', 'BEGIN', 'BEING', 'BELCH', 'BELIE', 'BELLY', 'BELOW', 'BENCH', 'BERET', 'BERTH', 'BESET', 'BINGE', 'BIOME', 'BIRCH', 'BIRTH', 'BLACK', 'BLAME', 'BLAND', 'BLEED', 'BLEEP', 'BLOKE', 'BLOWN', 'BLUFF', 'BLURB', 'BLURT', 'BLUSH', 'BOOBY', 'BOOST', 'BOOZE', 'BOOZY', 'BORAX', 

In [27]:
def remove_words(five_letter_words, OldWords):
  """Removes all words from `five_letter_words` that are also in `words`."""

  print(f"Length of OldWords: {len(OldWords)}")
  print(f"Length of five_letter_words before update: {len(five_letter_words)}")

  for word in OldWords:
    if word in five_letter_words:
      five_letter_words.remove(word)

  print(f"Length of five_letter_words after update: {len(five_letter_words)}")


remove_words(five_letter_words, OldWords)


Length of OldWords: 855
Length of five_letter_words before update: 10230
Length of five_letter_words after update: 9399


## Find the most common letters.

In [28]:
from collections import Counter

# Function to count letter frequency in 5-letter words
def count_letter_frequency(words_list):
    letter_frequency = Counter()
    for word in words_list:
        letter_frequency.update(word)
    return letter_frequency

if __name__ == "__main__":
    letter_frequency = count_letter_frequency(five_letter_words)
    
    # Sort the letter frequency by count (highest to lowest)
    sorted_frequency = sorted(letter_frequency.items(), key=lambda x: x[1], reverse=True)
    
    # Print the top 10 most common letters
    print("Top 10 Most Common Letters:")
    for letter, count in sorted_frequency[:10]:
        print(f"{letter}: {count}")

    # Extract the top 10 most common letters
    top_10_common_letters = [letter for letter, _ in sorted_frequency[:10]]


Top 10 Most Common Letters:
A: 5425
E: 4462
R: 3221
I: 3084
O: 3024
S: 2674
N: 2594
L: 2548
T: 2453
U: 2143


## Keep only word matching top 10 letters.
Return the first non-empty list when exluding word matching the top 10 common letters in order

In [29]:
def filter_words_by_top_10_common_letters(words_list, top_10_common_letters):
    filtered_words = words_list.copy()  # Make a copy of the original list
    
    for letter in top_10_common_letters:
        # Keep only words containing the current letter
        filtered_words_ = [word for word in filtered_words if letter in word]
        
        if len(filtered_words_) == 0:  # Check if filtered_words_ is empty
            continue
            ##return filtered_words
        
        filtered_words = filtered_words_
        print(letter)
        print(filtered_words[:5])
    
    return filtered_words


starting_words = filter_words_by_top_10_common_letters(five_letter_words, top_10_common_letters)
print(starting_words)


A
['CABAS', 'VALVA', 'NARES', 'PANOS', 'TUNCA']
E
['NARES', 'FREAM', 'DEOTA', 'PAYEE', 'AULAE']
R
['NARES', 'FREAM', 'FRAZE', 'TAREQ', 'BEARD']
I
['HAIRE', 'AIRER', 'MARIE', 'DEAIR', 'BEIRA']
S
['RAISE', 'ARISE', 'SERAI', 'ARIES']
['RAISE', 'ARISE', 'SERAI', 'ARIES']


In [30]:
def extract_unique_letters(starting_words):
    unique_letters = set()
    for word in starting_words:
        for letter in word:
            unique_letters.add(letter)
    return list(unique_letters)


starting_letters = extract_unique_letters(starting_words)
print(starting_letters)

['I', 'R', 'A', 'E', 'S']


In [31]:
from collections import defaultdict

def find_most_common_positions(unique_letters, five_letter_words):
    letter_positions = defaultdict(list)

    # Initialize the defaultdict with lists for each letter
    for letter in unique_letters:
        letter_positions[letter] = []

    # Iterate through each word in five_letter_words and record positions of letters
    for word in five_letter_words:
        for i, letter in enumerate(word):
            if letter in unique_letters:
                letter_positions[letter].append(i + 1)  # Add 1 to convert to 1-based indexing

    # Find the most common position for each letter
    most_common_positions = {}
    for letter, positions in letter_positions.items():
        if positions:
            most_common_position = max(set(positions), key=positions.count)
            most_common_positions[letter] = most_common_position

    return most_common_positions

# Usage
common_positions = find_most_common_positions(starting_letters, five_letter_words)
print(common_positions)
print(starting_words)


{'I': 2, 'R': 3, 'A': 2, 'E': 4, 'S': 1}
['RAISE', 'ARISE', 'SERAI', 'ARIES']


## Play Wordel

In [32]:
def filter_words_by_conditions(words_list, required_letters=None, position_conditions=None, do_not_use=None):
    filtered_words = []

    for word in words_list:
        # Check if the word is in the do_not_use list, and if so, skip it
        if do_not_use and any(letter in word for letter in do_not_use):
            continue

        # Check condition one: if required_letters is specified, ensure all letters are in the word
        if required_letters and not all(letter in word for letter in required_letters):
            continue
        
        # Check condition two: if position_conditions is specified, ensure letters are in the correct positions
        if position_conditions:
            valid_word = True
            for position, letter in position_conditions.items():
                position = int(position.lstrip("Pos"))  # Convert position to an integer
                if len(word) < position or word[position - 1] != letter:
                    valid_word = False
                    break
            if not valid_word:
                continue
        
        # If the word meets all conditions, add it to the filtered list
        filtered_words.append(word)

    return filtered_words

required_letters = ["S", "A"]
position_conditions = {}# {"Pos1": "S", "Pos2": "A"}
do_not_use = ["R", "I" ,"E"]

possible_words = filter_words_by_conditions(five_letter_words, required_letters, position_conditions, do_not_use)
print(possible_words)
print(len(possible_words))



['CABAS', 'PANOS', 'SATAN', 'KHASS', 'HAMSA', 'SHAWN', 'SHUBA', 'OSSAL', 'SANTA', 'VASAL', 'SNAFF', 'NASUS', 'ALOSA', 'TAPAS', 'PHYSA', 'MANUS', 'BOAST', 'TSUBA', 'SHONA', 'KADOS', 'ATLAS', 'TAMAS', 'TAMUS', 'ASTAY', 'SOMAL', 'PASHM', 'STAWN', 'PANSY', 'VAGAS', 'SAUNA', 'FAGUS', 'SANGA', 'YASHT', 'CLASH', 'SAGGY', 'CASCO', 'VALSA', 'SJAAK', 'SWANG', 'PASSO', 'MASHY', 'CAPSA', 'SHAPY', 'PASMO', 'PHASM', 'MANAS', 'SAWAH', 'PLASM', 'PLASH', 'ASOKA', 'DASYA', 'ASCOT', 'ANASA', 'STASH', 'VAGUS', 'CALAS', 'CHAPS', 'SULLA', 'SULFA', 'CADUS', 'PADUS', 'SHAUP', 'SALPA', 'MALUS', 'JUYAS', 'KUSHA', 'GASAN', 'NASCH', 'SASSY', 'STAAB', 'SHAKU', 'AULOS', 'TSUGA', 'SAMAS', 'QUASH', 'SADLY', 'SCAUL', 'AGSAM', 'SHAKO', 'SHANK', 'STOGA', 'BASAL', 'MASON', 'SCALT', 'PSHAV', 'PASHA', 'SCOAD', 'LOASA', 'ALNUS', 'SHAFT', 'SHANT', 'SHAUL', 'UNSAD', 'SCADS', 'SLANG', 'FOSSA', 'SABLY', 'SHOAL', 'JASON', 'SPALD', 'SNOGA', 'STALL', 'PATSY', 'SUMAC', 'SOMAL', 'SALON', 'ABASH', 'SANTO', 'SHAWY', 'AGUSH', 'KUSAN', 

In [35]:
#Update letter frequency
letter_frequency = count_letter_frequency(possible_words)

# Sort the letter frequency by count (highest to lowest)
sorted_frequency = sorted(letter_frequency.items(), key=lambda x: x[1], reverse=True)
  
# Print the top 10 most common letters
print("Top 10 Most Common Letters:")
for letter, count in sorted_frequency[:10]:
    print(f"{letter}: {count}")

# Extract the top 10 most common letters
top_10_common_letters = [letter for letter, _ in sorted_frequency[:10]]

# Get the word with the most common letters left
possible_words = filter_words_by_top_10_common_letters(five_letter_words, top_10_common_letters)
print(possible_words)


Top 10 Most Common Letters:
A: 1
L: 1
N: 1
U: 1
S: 1
A
['CABAS', 'VALVA', 'NARES', 'PANOS', 'TUNCA']
L
['VALVA', 'MORAL', 'FOLIA', 'VLACH', 'AULAE']
N
['LADIN', 'DUNAL', 'LANAZ', 'LANGO', 'LIMAN']
U
['DUNAL', 'LUCAN', 'ALNUS', 'UNLAP', 'NUCAL']
S
['ALNUS']
['ALNUS']


In [34]:
possible_words = filter_words_by_top_10_common_letters(five_letter_words, top_10_common_letters)
print(possible_words)

    


A
['CABAS', 'VALVA', 'NARES', 'PANOS', 'TUNCA']
S
['CABAS', 'NARES', 'PANOS', 'SATAN', 'SIZAR']
L
['OSSAL', 'VASAL', 'ALOSA', 'SALAR', 'URSAL']
U
['URSAL', 'SULLA', 'SULFA', 'MALUS', 'AULOS']
N
['ALNUS']
['ALNUS']
