## Chat GPT 4 Build

In [23]:
import requests
from bs4 import BeautifulSoup
from collections import Counter, defaultdict


### Import Word Lists

In [24]:
def fetch_gist_content(gist_url):
    # Extract the gist ID from the URL
    gist_id = gist_url.split('/')[-1].split('.')[0]
    
    # Construct the URL for the raw content
    raw_url = f"https://gist.githubusercontent.com/dracos/{gist_id}/raw/"
    
    response = requests.get(raw_url)
    
    if response.status_code == 200:
        return string_to_list(response.text)
    else:
        return None

def string_to_list(input_string):
    # Split the string by newline to get a list of words and remove any leading/trailing whitespace
    return [word.upper() for word in input_string.strip().split("\n")]

def get_old_words():
    url = "https://www.rockpapershotgun.com/wordle-past-answers"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")

    oldwords = []
    for li in soup.select("ul.inline li"):
        oldwords.append(li.text)
    return oldwords

def get_frequency_list():
    # Define the file name
    file_name = "five_letter_frequency_list.txt"

    # Initialize an empty dictionary
    word_frequency_dict = {}

    # Read the contents of the file and parse into a dictionary
    try:
        with open(file_name, "r") as file:
            lines = file.readlines()
            for line in lines:
                word, number = line.strip().split('\t')
                word_frequency_dict[word] = int(number)
    except FileNotFoundError:
        print(f"File '{file_name}' not found.")

    return word_frequency_dict

def remove_words(five_letter_words, OldWords):
    """Removes all words from `five_letter_words` that are also in `OldWords`."""
    return [word for word in five_letter_words if word not in OldWords]


### Starting word

### The program

In [25]:
# Get word_frequency_dict and turn it into a list of words
word_frequency_dict = get_frequency_list()
word_frequency_list = list(word_frequency_dict.keys())

oldwords = get_old_words()
current_word_list = remove_words(word_frequency_list, oldwords)

#Remove Words not accsepted by wordel
temp_words = ['AERIO', 'AROSE', 'CLINT'] # ['AERIO', 'CMNTS']
current_word_list = remove_words(current_word_list, temp_words)

#Calulate the letter frequency
letter_frequency = Counter("".join(current_word_list))


## First Run

In [26]:
# Now we can calculate the scores and proceed with finding the two best words:

def calculate_word_score(word, letter_frequency):
    # The score of a word is the sum of its letters' frequencies
    return sum(letter_frequency[letter] for letter in word)

# Calculate the score for each word
word_scores = {word: calculate_word_score(word, letter_frequency) for word in current_word_list}



# Sort the words by their scores in descending order
sorted_words_by_score = sorted(word_scores, key=word_scores.get, reverse=True)

# Find the top two words with the highest scores that do not share any letters
top_two_words = []
for word in sorted_words_by_score:
    if all(word.count(letter) == 1 for letter in word):  # Ensure the word has all unique letters
        if not top_two_words:  # If this is the first word, just add it
            top_two_words.append(word)
        else:
            first_word_letters = set(top_two_words[0])
            if not any(letter in first_word_letters for letter in word):  # Ensure no shared letters
                top_two_words.append(word)
                break  # We found our two words, so we can stop searching

# Print the top two words and their scores
for word in top_two_words:
    print(f"Word: {word}, Score: {word_scores[word]}")

print(letter_frequency)

Word: SEARO, Score: 79586
Word: LINDT, Score: 51246
Counter({'A': 21541, 'E': 18440, 'O': 14318, 'I': 13504, 'S': 13461, 'R': 11826, 'N': 11240, 'L': 10112, 'T': 9561, 'C': 7234, 'M': 6836, 'D': 6829, 'U': 6638, 'H': 5474, 'G': 5459, 'P': 5377, 'B': 4897, 'K': 4877, 'Y': 4472, 'F': 3128, 'V': 2546, 'W': 2324, 'Z': 1849, 'J': 1475, 'X': 1439, 'Q': 408})


## Second Run

In [31]:
#### Import temp list
# Specify the file path from which to import the words
input_file_path = "temp_word_frequency_dict.txt"

# Initialize an empty list to store the words
imported_words = []

# Open the file for reading
with open(input_file_path, 'r') as file:
    # Read each line (word) from the file and append it to the list
    for line in file:
        word = line.strip()  # Remove leading/trailing whitespace and newline characters
        imported_words.append(word)

####


# Calculate the score for each word
word_scores = {word: calculate_word_score(word, letter_frequency) for word in imported_words}
print('\n')


# Initialize first_word as your desired starting word
first_word = "AROSE"

# Find the best word that does not share any letters with first_word
best_word = None
for word in sorted_words_by_score:
    if all(word.count(letter) == 1 for letter in word):  # Ensure the word has all unique letters
        if best_word is None:
            # If best_word is not set, assign the first word that meets the criteria
            if not any(letter in first_word for letter in word):
                best_word = word
        else:
            second_word_letters = set(word)
            if not any(letter in second_word_letters for letter in first_word):
                best_word = word
                break  # We found the best word, so we can stop searching

# Print first_word followed by the best word and its score (if available)
print(f"First Word: {first_word}")

if best_word:
    if best_word in word_scores:
        print(f"Best Word: {best_word}, Score: {word_scores[best_word]}")
    else:
        print(f"Best Word: {best_word}, Score: N/A (Not in word_scores)")
else:
    print("No suitable word found for the given criteria.")

print(letter_frequency)







First Word: AROSE
Best Word: DLINT, Score: N/A (Not in word_scores)
Counter({'A': 21541, 'E': 18440, 'O': 14318, 'I': 13504, 'S': 13461, 'R': 11826, 'N': 11240, 'L': 10112, 'T': 9561, 'C': 7234, 'M': 6836, 'D': 6829, 'U': 6638, 'H': 5474, 'G': 5459, 'P': 5377, 'B': 4897, 'K': 4877, 'Y': 4472, 'F': 3128, 'V': 2546, 'W': 2324, 'Z': 1849, 'J': 1475, 'X': 1439, 'Q': 408})
