## Chat GPT 4 Build

In [13]:
from collections import Counter, defaultdict

# Load the content of the provided text files
with open("five_letter_words.txt", "r") as file:
    five_letter_words = [word.strip().upper() for word in file.readlines()]

with open("OldWords.txt", "r") as file:
    OldWords = [word.strip().upper() for word in file.readlines()]

def remove_words(five_letter_words, OldWords):
    """Removes all words from `five_letter_words` that are also in `OldWords`."""
    return [word for word in five_letter_words if word not in OldWords]

five_letter_words = remove_words(five_letter_words, OldWords)

def count_letter_frequency(words_list):
    """Counts the frequency of each letter in the given list of words."""
    letter_frequency = Counter("".join(words_list))
    return sorted(letter_frequency.items(), key=lambda x: x[1], reverse=True)

top_common_letters = count_letter_frequency(five_letter_words)

def filter_words_based_on_letters(words_list, top_common_letters, num_letters=5):
    """Filters the words based on the most common letters and extracts unique letters."""
    filtered_words = words_list.copy()
    for letter, _ in top_common_letters[:num_letters]:
        filtered_words = [word for word in filtered_words if letter in word]
    
    unique_letters = set("".join(filtered_words))
    return filtered_words, list(unique_letters)

filtered_words, unique_letters = filter_words_based_on_letters(five_letter_words, top_common_letters)

def most_common_combinations(words_list, starting_letters, top_n=5):
    """Finds the most common two-letter combinations in the given list of words."""
    two_letter_combinations = Counter()
    for word in words_list:
        for i in range(len(word)):
            for j in range(i+1, len(word)):
                if word[i] in starting_letters and word[j] in starting_letters:
                    combo = tuple(sorted([word[i], word[j]]))
                    two_letter_combinations[combo] += 1
    return two_letter_combinations.most_common(top_n)


def filter_words_by_locked_positions(words_list, locked_positions):
    """Filters words that have the locked letters in the locked positions."""
    for position, letter in locked_positions.items():
        words_list = [word for word in words_list if len(word) >= position and word[position-1] == letter]
    return words_list

def filter_words_by_excluded_positions(words_list, excluded_positions):
    """Filters out words that have the specified letters in the excluded positions."""
    for position, letter in excluded_positions.items():
        words_list = [word for word in words_list if len(word) < position or word[position-1] != letter]
    return words_list


def filter_words_based_on_parameters_updated(words_list, letters_included, letters_not_included, locked_positions, excluded_positions):
    """Filters the word list based on the specified parameters."""
    # Filter by letters included
    for letter in letters_included:
        words_list = exclude_words_with_letters(words_list, letters_not_included)
    
    # Filter by letters not included
    words_list = exclude_words_with_letters(words_list, letters_not_included)
    
    # Filter by locked positions
    words_list = filter_words_by_locked_positions(words_list, locked_positions)
    
    # Filter by excluded positions
    words_list = filter_words_by_excluded_positions(words_list, excluded_positions)
    
    return words_list

# Renaming and defining the function for excluding words with certain letters
def exclude_words_with_letters(words_list, excluded_letters):
    """Excludes words that contain any of the letters marked as incorrect by the user."""
    for letter in excluded_letters:
        words_list = [word for word in words_list if letter not in word]
    return words_list

def exclude_words_with_letters(words_list, excluded_letters):
    """Excludes words that contain any of the letters marked as incorrect by the user."""
    for letter in excluded_letters:
        words_list = [word for word in words_list if letter not in word]
    return words_list


## Recomendations for next word

In [34]:
def compute_letter_scores(words_list):
    """Computes the frequency scores for each letter in the words list."""
    letter_frequency = Counter("".join(words_list))
    total_letters = sum(letter_frequency.values())
    
    # Normalize the letter scores
    letter_scores = {letter: freq / total_letters for letter, freq in letter_frequency.items()}
    return letter_scores

def compute_combination_scores(words_list, unique_letters):
    """Computes the scores for two-letter combinations in the words list."""
    two_letter_combinations = most_common_combinations(words_list, unique_letters, top_n=None)
    total_combinations = sum([count for _, count in two_letter_combinations])
    
    # Normalize the combination scores
    combination_scores = {combo: count / total_combinations for combo, count in two_letter_combinations}
    return combination_scores

def top_n_recommended_words_with_scores(words_list, top_common_letters, unique_letters, n=10):
    """Recommends the top n optimal words based on letter and combination scores and returns the breakdown."""
    letter_scores = compute_letter_scores(words_list)
    combination_scores = compute_combination_scores(words_list, unique_letters)
    
    word_scores = {}
    word_letter_scores = {}
    word_combination_scores = {}
    for word in words_list:
        # Letter score
        word_letter_score = sum([letter_scores.get(letter, 0) for letter in word])
        word_letter_scores[word] = round(word_letter_score, 3)
        
        # Combination score
        word_combination_score = 0
        for i in range(len(word)):
            for j in range(i+1, len(word)):
                combo = tuple(sorted([word[i], word[j]]))
                word_combination_score += combination_scores.get(combo, 0)
        word_combination_scores[word] = round(word_combination_score, 3)
                
        # Total score
        word_scores[word] = round(word_letter_score + word_combination_score, 3)
    
    # Get the top n words with the highest scores
    sorted_words = sorted(word_scores, key=word_scores.get, reverse=True)[:n]
    return sorted_words, [word_scores[word] for word in sorted_words], [word_letter_scores[word] for word in sorted_words], [word_combination_scores[word] for word in sorted_words]


In [33]:
# Set the variables
letters_included = ['A', 'E']
letters_not_included = ['R', 'O' , 'S', 'L', 'I', 'N', 'M', 'P', 'W', 'C'] # ['N', 'H' ,'E']
locked_positions = {1: 'F', 2: 'A', 4: 'E', 5: 'D'}
excluded_positions = {1: 'E', 3: 'A'}


# Filter the words based on the specified parameters using the updated function
filtered_words_updated = filter_words_based_on_parameters_updated(five_letter_words, letters_included, letters_not_included, locked_positions, excluded_positions)

# Recommend the next word
top_words, top_total_scores, top_letter_scores, top_combination_scores = top_n_recommended_words_with_scores(filtered_words_updated, top_common_letters, unique_letters)

# Display the results
print("Filtered words (first 10):", filtered_words_updated[:10])
print("Recommended next word:")
list(zip(top_words, top_total_scores, top_letter_scores, top_combination_scores))


Filtered words (first 10): ['FADED', 'FAKED', 'FATED', 'FAXED', 'FAYED', 'FAZED']
Recommended next word:


[('FADED', 2.067, 1.067, 1.0),
 ('FAKED', 1.867, 0.867, 1.0),
 ('FATED', 1.867, 0.867, 1.0),
 ('FAXED', 1.867, 0.867, 1.0),
 ('FAYED', 1.867, 0.867, 1.0),
 ('FAZED', 1.867, 0.867, 1.0)]

In [21]:
def breakdown_word_score(word, words_list, top_common_letters, unique_letters):
    """Breaks down the score of a word into its letter score, combination score, and total score."""
    letter_scores = compute_letter_scores(words_list)
    combination_scores = compute_combination_scores(words_list, unique_letters)
    
    # Letter score
    word_letter_score = sum([letter_scores.get(letter, 0) for letter in word])
    
    # Combination score
    word_combination_score = 0
    for i in range(len(word)):
        for j in range(i+1, len(word)):
            combo = tuple(sorted([word[i], word[j]]))
            word_combination_score += combination_scores.get(combo, 0)
    
    # Total score
    total_score = word_letter_score + word_combination_score
    
    return round(word_letter_score, 3), round(word_combination_score, 3), round(total_score, 3)

#Then, run thfollowing code to get the top 10 recommended words:
top_words_rounded, _ = top_n_recommended_words_no_repeats_rounded(filtered_words_updated, top_common_letters, unique_letters)


# Calculate the score breakdown for the top 10 recommended words
score_breakdowns = [breakdown_word_score(word, filtered_words_updated, top_common_letters, unique_letters) for word in top_words_rounded]

# Combine the words and their score breakdowns
score_breakdown_results = list(zip(top_words_rounded, score_breakdowns))

score_breakdown_results




NameError: name 'top_n_recommended_words_no_repeats_rounded' is not defined