In [24]:
# load up words from disk
with open('words.csv', 'r') as f:
    words = f.readlines()

In [25]:
# strip newline from each line
words = [word.strip('\n') for word in words]

In [26]:
# capture a count of the number of accepted Wordle words
total_wordle_words_count = len(words)
total_wordle_words_count 

12972

In [32]:
# initialize dictionaries tracking the character counts at each position
initial_counts = {'a': 0, 'b': 0, 'c': 0, 'd': 0, 'e': 0, 'f': 0, 'g': 0, 
                  'h': 0, 'i': 0, 'j': 0, 'k': 0, 'l': 0, 'm': 0, 'n': 0, 
                  'o': 0, 'p': 0, 'q': 0, 'r': 0, 's': 0, 't': 0, 'u': 0,
                  'v': 0, 'w': 0, 'x': 0, 'y': 0, 'z': 0}

first_character_counts = initial_counts.copy()
second_character_counts = initial_counts.copy()
third_character_counts = initial_counts.copy()
fourth_character_counts = initial_counts.copy()
fifth_character_counts = initial_counts.copy()

In [40]:
# build counts
for word in words:
    first_character_counts[word[0]] += 1
    second_character_counts[word[1]] += 1
    third_character_counts[word[2]] += 1    
    fourth_character_counts[word[3]] += 1    
    fifth_character_counts[word[4]] += 1    

In [44]:
# build frequency map
def calculate_frequency(count_map: dict, total_count: int):
    frequency_map = {}
    for character, count in count_map.items():
        frequency_map[character] = count / total_count

    return frequency_map

first_character_frequency = calculate_frequency(first_character_counts, total_wordle_words_count)
second_character_frequency = calculate_frequency(second_character_counts, total_wordle_words_count)
third_character_frequency = calculate_frequency(third_character_counts, total_wordle_words_count)
fourth_character_frequency = calculate_frequency(fourth_character_counts, total_wordle_words_count)
fifth_character_frequency = calculate_frequency(fifth_character_counts, total_wordle_words_count)

In [57]:
# score each word by expected matched character
def score(word, frequency_maps):
    
    score = 1
    
    for i in range(0, 5):
        character = word[i]
        frequency_map = frequency_maps[i]
        frequency = frequency_map[character]
        score *= frequency
        
    return score
        
frequency_maps = [first_character_frequency, 
                  second_character_frequency, 
                  third_character_frequency, 
                  fourth_character_frequency,
                  fifth_character_frequency]
scored_words = {}

for word in words:
    scored_words[word] = score(word, frequency_maps)

In [74]:
# rank words by frequency
ranked = [(k, v) for k, v in sorted(scored_words.items(), key=lambda item: -item[1])]

In [79]:
# show top 10
ranked[0:10]

[('sores', 0.003153188178667754),
 ('sanes', 0.0027394500191896733),
 ('sones', 0.002537289986841165),
 ('seres', 0.0024491366196904125),
 ('sales', 0.002409806655884692),
 ('soles', 0.0022319729344826844),
 ('sires', 0.002080562619798428),
 ('cares', 0.0020056712283722435),
 ('bares', 0.001977391699121876),
 ('senes', 0.0019707576806189966)]

In [65]:
# average number of green tiles - this is slow running (O(n^2))
def calculate_average_number_of_green_tiles(test_word, words):
    number_of_words = len(words)
    
    total_green_tiles = 0
    
    for word in words:
        for i in range(0, 5):
            if test_word[i] == word[i]:
                total_green_tiles += 1
                
    return total_green_tiles / number_of_words

green_tile_scores = {}

for word in words:
    average = calculate_average_number_of_green_tiles(word, words)
    green_tile_scores[word] = average

In [71]:
# rank words by green tiles scores
green_tile_ranked = [(k,v) for k, v in sorted(green_tile_scores.items(), key=lambda item: -item[1])]

In [80]:
# show top 10
green_tile_ranked[0:10]

[('sores', 0.8590810977489979),
 ('sanes', 0.8539161270428616),
 ('sales', 0.8449737897008942),
 ('sones', 0.8410422448350293),
 ('soles', 0.832099907493062),
 ('sates', 0.8270891150169596),
 ('seres', 0.8230033919210608),
 ('cares', 0.8223866790009251),
 ('bares', 0.8213845205057045),
 ('sames', 0.8189947579401788)]