## Get all 5 Letter Words

In [30]:
import nltk

# Download the English words dataset if you haven't already
nltk.download('words')

# Import the English words
from nltk.corpus import words

# Function to get all 5-letter words
# Här får vi alla 5 bokstäver lpnga ord i listan "five_letter_words"
def get_5_letter_words():
    english_words = set(words.words())
    five_letter_words = [word.lower() for word in english_words if len(word) == 5 and word.isalpha()]
    return five_letter_words

if __name__ == "__main__":
    five_letter_words = get_5_letter_words()
    print(f"Total 5-letter words in English: {len(five_letter_words)}")
    print("Example 5-letter words:")
    print(five_letter_words[:10])  # Print the first 10 words as an example

Total 5-letter words in English: 10230
Example 5-letter words:
['plebe', 'salic', 'melos', 'bruzz', 'nintu', 'tango', 'doric', 'slane', 'otate', 'rower']


[nltk_data] Downloading package words to
[nltk_data]     C:\Users\jonca\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!


## Find the most common letters.

In [31]:
from collections import Counter

# Function to count letter frequency in 5-letter words
def count_letter_frequency(words_list):
    letter_frequency = Counter()
    for word in words_list:
        letter_frequency.update(word)
    return letter_frequency

if __name__ == "__main__":
    letter_frequency = count_letter_frequency(five_letter_words)
    
    # Sort the letter frequency by count (highest to lowest)
    sorted_frequency = sorted(letter_frequency.items(), key=lambda x: x[1], reverse=True)
    
    # Print the top 10 most common letters
    print("Top 10 Most Common Letters:")
    for letter, count in sorted_frequency[:10]:
        print(f"{letter}: {count}")

    # Extract the top 10 most common letters
    top_10_common_letters = [letter for letter, _ in sorted_frequency[:10]]


Top 10 Most Common Letters:
a: 5803
e: 4897
r: 3558
o: 3315
i: 3312
s: 2885
l: 2796
n: 2783
t: 2733
u: 2300


## Keep only word matching the top 10 letters

In [32]:
def filter_words_by_top_10_common_letters(words_list, top_10_common_letters):
    filtered_words = words_list.copy()  # Make a copy of the original list
    
    for letter in top_10_common_letters:
        # Keep only words containing the current letter
        filtered_words_ = [word for word in filtered_words if letter in word]
        
        if len(filtered_words_) == 0:  # Check if filtered_words_ is empty
            return filtered_words
        
        filtered_words = filtered_words_
        print(letter)
        print(filtered_words[:5])
    
    return filtered_words


starting_words = filter_words_by_top_10_common_letters(five_letter_words, top_10_common_letters)
print(starting_words)


a
['salic', 'tango', 'slane', 'otate', 'ectal']
e
['slane', 'otate', 'ectal', 'antes', 'asale']
r
['ceral', 'breba', 'razee', 'grate', 'rearm']
o
['oread', 'orate', 'orage', 'arose', 'opera']
['oread', 'orate', 'orage', 'arose', 'opera', 'adore', 'ocrea', 'oared', 'afore', 'leora', 'oreas']


In [33]:
def filter_words_by_top_10_common_letters(words_list, top_10_common_letters):
    filtered_words = words_list.copy()  # Make a copy of the original list
    
    for letter in top_10_common_letters:
        # Keep only words containing the current letter
        filtered_words_ = [word for word in filtered_words if letter in word]
        
        if len(filtered_words_) == 0:  # Check if filtered_words_ is empty
            continue
            ##return filtered_words
        
        filtered_words = filtered_words_
        print(letter)
        print(filtered_words[:5])
    
    return filtered_words


starting_words = filter_words_by_top_10_common_letters(five_letter_words, top_10_common_letters)
print(starting_words)


a
['salic', 'tango', 'slane', 'otate', 'ectal']
e
['slane', 'otate', 'ectal', 'antes', 'asale']
r
['ceral', 'breba', 'razee', 'grate', 'rearm']
o
['oread', 'orate', 'orage', 'arose', 'opera']
s
['arose', 'oreas']
['arose', 'oreas']


In [34]:
def extract_unique_letters(starting_words):
    unique_letters = set()
    for word in starting_words:
        for letter in word:
            unique_letters.add(letter)
    return list(unique_letters)





starting_letters = extract_unique_letters(starting_words)
print(starting_letters)

['o', 'r', 'e', 'a', 's']


In [35]:
from collections import defaultdict

def find_most_common_positions(unique_letters, five_letter_words):
    letter_positions = defaultdict(list)

    # Initialize the defaultdict with lists for each letter
    for letter in unique_letters:
        letter_positions[letter] = []

    # Iterate through each word in five_letter_words and record positions of letters
    for word in five_letter_words:
        for i, letter in enumerate(word):
            if letter in unique_letters:
                letter_positions[letter].append(i + 1)  # Add 1 to convert to 1-based indexing

    # Find the most common position for each letter
    most_common_positions = {}
    for letter, positions in letter_positions.items():
        if positions:
            most_common_position = max(set(positions), key=positions.count)
            most_common_positions[letter] = most_common_position

    return most_common_positions

# Usage
common_positions = find_most_common_positions(starting_letters, five_letter_words)
print(common_positions)
print(starting_words)


{'o': 2, 'r': 3, 'e': 5, 'a': 2, 's': 1}
['arose', 'oreas']


In [36]:
# Example usage
five_letter_words = ["apple", "banana", "cherry", "date"]
unique_letters = ["a", "b", "c", "d", "e", "h", "l", "n", "p", "r", "t"]
common_positions = find_most_common_positions(unique_letters, five_letter_words)
print(common_positions)


{'a': 2, 'b': 1, 'c': 1, 'd': 1, 'e': 3, 'h': 2, 'l': 4, 'n': 3, 'p': 2, 'r': 4, 't': 3}


In [40]:
def filter_words_by_conditions(words_list, required_letters=None, position_conditions=None):
    filtered_words = []

    for word in words_list:
        # Check condition one: if required_letters is specified, ensure all letters are in the word
        if required_letters and not all(letter in word for letter in required_letters):
            continue
        
        # Check condition two: if position_conditions is specified, ensure letters are in the correct positions
        if position_conditions:
            valid_word = True
            for position, letter in position_conditions.items():
                position = int(position.lstrip("Pos"))  # Convert position to an integer
                if len(word) < position or word[position - 1] != letter:
                    valid_word = False
                    break
            if not valid_word:
                continue
        
        # If the word meets both conditions, add it to the filtered list
        filtered_words.append(word)

    return filtered_words

# Example usage:
five_letter_words = ["apple", "banana", "cherry", "date", "apricot"]
required_letters = ["a"]
position_conditions = {"Pos1": "a"}

filtered_words = filter_words_by_conditions(five_letter_words, required_letters, position_conditions)
print(filtered_words)



['apple', 'apricot']


In [38]:
# Example usage:
five_letter_words = ["apple", "banana", "cherry", "date", "apricot"]
required_letters = ["y", "e"]   #["y", "e"]
position_conditions = {"Pos1": "a", "Pos3": "p"}        #{"Pos1": "a", "Pos3": "p"}

filtered_words = filter_words_by_conditions(five_letter_words, required_letters, position_conditions)
print(filtered_words)

['apple']


In [55]:
#use this logic to create a score for all words remaning after the clues has been added.
# Remeber to use the latest lists 

# Your list of letter-count tuples
#letter_counts = [('a', 5803), ('e', 4897), ('r', 3558), ('o', 3315), ('i', 3312)]

# Create a dictionary with points assigned to letters based on their order in the list
letter_points = {letter: index + 1 for index, (letter, _) in enumerate(sorted_frequency)}

print(letter_points)

{'a': 1, 'e': 2, 'r': 3, 'o': 4, 'i': 5, 's': 6, 'l': 7, 'n': 8, 't': 9, 'u': 10, 'y': 11, 'c': 12, 'd': 13, 'm': 14, 'h': 15, 'p': 16, 'b': 17, 'g': 18, 'k': 19, 'w': 20, 'f': 21, 'v': 22, 'z': 23, 'j': 24, 'x': 25, 'q': 26}
