## Chat GPT 4 Build

In [4]:
from collections import Counter, defaultdict

# Load the content of the provided text files
with open("five_letter_words.txt", "r") as file:
    five_letter_words = [word.strip().upper() for word in file.readlines()]

with open("OldWords.txt", "r") as file:
    OldWords = [word.strip().upper() for word in file.readlines()]

def remove_words(five_letter_words, OldWords):
    """Removes all words from `five_letter_words` that are also in `OldWords`."""
    return [word for word in five_letter_words if word not in OldWords]

five_letter_words = remove_words(five_letter_words, OldWords)

def count_letter_frequency(words_list):
    """Counts the frequency of each letter in the given list of words."""
    letter_frequency = Counter("".join(words_list))
    return sorted(letter_frequency.items(), key=lambda x: x[1], reverse=True)

top_common_letters = count_letter_frequency(five_letter_words)

def filter_words_based_on_letters(words_list, top_common_letters, num_letters=5):
    """Filters the words based on the most common letters and extracts unique letters."""
    filtered_words = words_list.copy()
    for letter, _ in top_common_letters[:num_letters]:
        filtered_words = [word for word in filtered_words if letter in word]
    
    unique_letters = set("".join(filtered_words))
    return filtered_words, list(unique_letters)

filtered_words, unique_letters = filter_words_based_on_letters(five_letter_words, top_common_letters)

def most_common_combinations(words_list, starting_letters, top_n=5):
    """Finds the most common two-letter combinations in the given list of words."""
    two_letter_combinations = Counter()
    for word in words_list:
        for i in range(len(word)):
            for j in range(i+1, len(word)):
                if word[i] in starting_letters and word[j] in starting_letters:
                    combo = tuple(sorted([word[i], word[j]]))
                    two_letter_combinations[combo] += 1
    return two_letter_combinations.most_common(top_n)

def most_common_positions(words_list, starting_letters):
    """Finds the most common position for each starting letter in the given list of words."""
    position_counter = defaultdict(lambda: defaultdict(int))
    for word in words_list:
        for letter in starting_letters:
            if letter in word:
                position = word.index(letter) + 1
                position_counter[letter][position] += 1
    common_positions = {letter: max(positions, key=positions.get) for letter, positions in position_counter.items()}
    return common_positions

def filter_words_by_locked_positions(words_list, locked_positions):
    """Filters words that have the locked letters in the locked positions."""
    for position, letter in locked_positions.items():
        words_list = [word for word in words_list if len(word) >= position and word[position-1] == letter]
    return words_list

def filter_words_by_excluded_positions(words_list, excluded_positions):
    """Filters out words that have the specified letters in the excluded positions."""
    for position, letter in excluded_positions.items():
        words_list = [word for word in words_list if len(word) < position or word[position-1] != letter]
    return words_list

def filter_words_based_on_parameters(words_list, letters_included, letters_not_included, locked_positions, excluded_positions):
    """Filters the word list based on the specified parameters."""
    for letter in letters_included:
        words_list = [word for word in words_list if letter in word]
    for letter in letters_not_included:
        words_list = [word for word in words_list if letter not in word]
    words_list = filter_words_by_locked_positions(words_list, locked_positions)
    words_list = filter_words_by_excluded_positions(words_list, excluded_positions)
    return words_list

def recommend_next_word(words_list, top_common_letters, unique_letters):
    """Recommends the next optimal word based on the filtered list of words."""
    # Use the most common combinations and positions logic from earlier
    filtered_words, _ = filter_words_based_on_letters(words_list, top_common_letters)
    # Return the first word from the filtered list as a recommendation
    return filtered_words[0] if filtered_words else None

def filter_words_based_on_parameters_updated(words_list, letters_included, letters_not_included, locked_positions, excluded_positions):
    """Filters the word list based on the specified parameters."""
    # Filter by letters included
    for letter in letters_included:
        words_list = exclude_words_with_letters(words_list, letters_not_included)
    
    # Filter by letters not included
    words_list = exclude_words_with_letters(words_list, letters_not_included)
    
    # Filter by locked positions
    words_list = filter_words_by_locked_positions(words_list, locked_positions)
    
    # Filter by excluded positions
    words_list = filter_words_by_excluded_positions(words_list, excluded_positions)
    
    return words_list

# Renaming and defining the function for excluding words with certain letters
def exclude_words_with_letters(words_list, excluded_letters):
    """Excludes words that contain any of the letters marked as incorrect by the user."""
    for letter in excluded_letters:
        words_list = [word for word in words_list if letter not in word]
    return words_list

def exclude_words_with_letters(words_list, excluded_letters):
    """Excludes words that contain any of the letters marked as incorrect by the user."""
    for letter in excluded_letters:
        words_list = [word for word in words_list if letter not in word]
    return words_list


## Recomendations for next word

In [5]:
def compute_letter_scores(words_list):
    """Computes the frequency scores for each letter in the words list."""
    letter_frequency = Counter("".join(words_list))
    total_letters = sum(letter_frequency.values())
    
    # Normalize the letter scores
    letter_scores = {letter: freq / total_letters for letter, freq in letter_frequency.items()}
    return letter_scores

def compute_combination_scores(words_list, unique_letters):
    """Computes the scores for two-letter combinations in the words list."""
    two_letter_combinations = most_common_combinations(words_list, unique_letters, top_n=None)
    total_combinations = sum([count for _, count in two_letter_combinations])
    
    # Normalize the combination scores
    combination_scores = {combo: count / total_combinations for combo, count in two_letter_combinations}
    return combination_scores

def recommend_optimized_next_word_no_repeats(words_list, top_common_letters, unique_letters):
    """Recommends the next optimal word based on letter and combination scores, avoiding repeated letters."""
    letter_scores = compute_letter_scores(words_list)
    combination_scores = compute_combination_scores(words_list, unique_letters)
    
    word_scores = {}
    for word in words_list:
        # Check for repeated letters and skip the word if any repeats are found
        if len(word) > len(set(word)):
            continue
        
        # Letter score
        word_score = sum([letter_scores.get(letter, 0) for letter in word])
        
        # Combination score
        for i in range(len(word)):
            for j in range(i+1, len(word)):
                combo = tuple(sorted([word[i], word[j]]))
                word_score += combination_scores.get(combo, 0)
                
        word_scores[word] = word_score
    
    # Return the word with the highest score
    return max(word_scores, key=word_scores.get) if word_scores else None

def top_n_recommended_words_no_repeats(words_list, top_common_letters, unique_letters, n=10):
    """Recommends the top n optimal words based on letter and combination scores, avoiding repeated letters."""
    letter_scores = compute_letter_scores(words_list)
    combination_scores = compute_combination_scores(words_list, unique_letters)
    
    word_scores = {}
    for word in words_list:
        # Check for repeated letters and skip the word if any repeats are found
        if len(word) > len(set(word)):
            continue
        
        # Letter score
        word_score = sum([letter_scores.get(letter, 0) for letter in word])
        
        # Combination score
        for i in range(len(word)):
            for j in range(i+1, len(word)):
                combo = tuple(sorted([word[i], word[j]]))
                word_score += combination_scores.get(combo, 0)
                
        word_scores[word] = word_score
    
    # Return the top n words with the highest scores
    sorted_words = sorted(word_scores, key=word_scores.get, reverse=True)
    return sorted_words[:n], [word_scores[word] for word in sorted_words[:n]]




In [8]:
# Set the variables
letters_included = ['A', 'S', 'E']
letters_not_included = ['R', 'O'] # ['N', 'H' ,'E']
locked_positions = {4: 'S', 5: 'E'}
excluded_positions = {1: 'A'}


# Filter the words based on the specified parameters using the updated function
filtered_words_updated = filter_words_based_on_parameters_updated(five_letter_words, letters_included, letters_not_included, locked_positions, excluded_positions)

# Recommend the next word
recommended_word = top_n_recommended_words_no_repeats(filtered_words_updated, top_common_letters, unique_letters)

# Display the results
print("Filtered words (first 10):", filtered_words_updated[:10])
print("Recommended next word:")
top_words, top_scores = top_n_recommended_words_no_repeats(filtered_words_updated, top_common_letters, unique_letters)
list(zip(top_words, top_scores))


Filtered words (first 10): ['BASSE', 'BLASE', 'BULSE', 'CAESE', 'CAUSE', 'CEASE', 'CENSE', 'CESSE', 'CHASE', 'CHUSE']
Recommended next word:


[('CAUSE', 1.4916535433070865),
 ('HAUSE', 1.4916535433070865),
 ('MANSE', 1.4916535433070865),
 ('HALSE', 1.4885766202301633),
 ('HANSE', 1.4885766202301633),
 ('DANSE', 1.4762689279224712),
 ('FALSE', 1.4762689279224712),
 ('LYASE', 1.4762689279224712),
 ('BLASE', 1.473192004845548),
 ('MAISE', 1.473192004845548)]