In [24]:
# load up words from disk
with open('words.csv', 'r') as f:
    words = f.readlines()

In [25]:
# strip newline from each line
words = [word.strip('\n') for word in words]

In [26]:
# capture a count of the number of accepted Wordle words
total_wordle_words_count = len(words)
total_wordle_words_count 

12972

In [32]:
# initialize dictionaries tracking the character counts at each position
initial_counts = {'a': 0, 'b': 0, 'c': 0, 'd': 0, 'e': 0, 'f': 0, 'g': 0, 
                  'h': 0, 'i': 0, 'j': 0, 'k': 0, 'l': 0, 'm': 0, 'n': 0, 
                  'o': 0, 'p': 0, 'q': 0, 'r': 0, 's': 0, 't': 0, 'u': 0,
                  'v': 0, 'w': 0, 'x': 0, 'y': 0, 'z': 0}

first_character_counts = initial_counts.copy()
second_character_counts = initial_counts.copy()
third_character_counts = initial_counts.copy()
fourth_character_counts = initial_counts.copy()
fifth_character_counts = initial_counts.copy()

In [40]:
# build counts
for word in words:
    first_character_counts[word[0]] += 1
    second_character_counts[word[1]] += 1
    third_character_counts[word[2]] += 1    
    fourth_character_counts[word[3]] += 1    
    fifth_character_counts[word[4]] += 1    

In [44]:
# build frequency map
def calculate_frequency(count_map: dict, total_count: int):
    frequency_map = {}
    for character, count in count_map.items():
        frequency_map[character] = count / total_count

    return frequency_map

first_character_frequency = calculate_frequency(first_character_counts, total_wordle_words_count)
second_character_frequency = calculate_frequency(second_character_counts, total_wordle_words_count)
third_character_frequency = calculate_frequency(third_character_counts, total_wordle_words_count)
fourth_character_frequency = calculate_frequency(fourth_character_counts, total_wordle_words_count)
fifth_character_frequency = calculate_frequency(fifth_character_counts, total_wordle_words_count)

In [49]:
# score each word by expected matched character
def score(word, frequency_maps):
    
    score = 1
    
    for i in range(0, 5):
        character = word[i]
        frequency_map = frequency_maps[i]
        frequency = frequency_map[character]
        score *= frequency
        
    return score
        
frequency_maps = [first_character_frequency, 
                  second_character_frequency, 
                  third_character_frequency, 
                  fourth_character_frequency,
                  fifth_character_frequency]
scored_words = {}

for word in words:
    scored_words[word] = score(word, frequency_maps)

In [52]:
# rank words by frequency
ranked = {k: v for k, v in sorted(scored_words.items(), key=lambda item: -item[1])}

In [53]:
ranked

{'sores': 0.003153188178667754,
 'sanes': 0.0027394500191896733,
 'sones': 0.002537289986841165,
 'seres': 0.0024491366196904125,
 'sales': 0.002409806655884692,
 'soles': 0.0022319729344826844,
 'sires': 0.002080562619798428,
 'cares': 0.0020056712283722435,
 'bares': 0.001977391699121876,
 'senes': 0.0019707576806189966,
 'pares': 0.001868624278928153,
 'cores': 0.0018576610228317374,
 'bores': 0.0018314684053731556,
 'sures': 0.0017857034198848403,
 'tares': 0.0017729089491576777,
 'sates': 0.001750519929274729,
 'seles': 0.0017336125655237646,
 'pores': 0.0017307275689939943,
 'sines': 0.00167417559723346,
 'tores': 0.0016420756329803324,
 'canes': 0.0016139124074714883,
 'banes': 0.0015911565926155996,
 'mares': 0.0015075164438849945,
 'panes': 0.0015036342277852585,
 'cones': 0.0014948123756342198,
 'dares': 0.001490113656653999,
 'bones': 0.0014737358453920885,
 'siles': 0.0014727187826286037,
 'soras': 0.0014553176209235789,
 'sames': 0.001452135850421082,
 'ceres': 0.001442877