In [24]:
# load up words from disk
with open('words.csv', 'r') as f:
    words = f.readlines()

In [25]:
# strip newline from each line
words = [word.strip('\n') for word in words]

In [26]:
# capture a count of the number of accepted Wordle words
total_wordle_words_count = len(words)
total_wordle_words_count 

12972

In [32]:
# initialize dictionaries tracking the character counts at each position
initial_counts = {'a': 0, 'b': 0, 'c': 0, 'd': 0, 'e': 0, 'f': 0, 'g': 0, 
                  'h': 0, 'i': 0, 'j': 0, 'k': 0, 'l': 0, 'm': 0, 'n': 0, 
                  'o': 0, 'p': 0, 'q': 0, 'r': 0, 's': 0, 't': 0, 'u': 0,
                  'v': 0, 'w': 0, 'x': 0, 'y': 0, 'z': 0}

first_character_counts = initial_counts.copy()
second_character_counts = initial_counts.copy()
third_character_counts = initial_counts.copy()
fourth_character_counts = initial_counts.copy()
fifth_character_counts = initial_counts.copy()

In [40]:
# build counts
for word in words:
    first_character_counts[word[0]] += 1
    second_character_counts[word[1]] += 1
    third_character_counts[word[2]] += 1    
    fourth_character_counts[word[3]] += 1    
    fifth_character_counts[word[4]] += 1    

In [44]:
# build frequency map
def calculate_frequency(count_map: dict, total_count: int):
    frequency_map = {}
    for character, count in count_map.items():
        frequency_map[character] = count / total_count

    return frequency_map

first_character_frequency = calculate_frequency(first_character_counts, total_wordle_words_count)
second_character_frequency = calculate_frequency(second_character_counts, total_wordle_words_count)
third_character_frequency = calculate_frequency(third_character_counts, total_wordle_words_count)
fourth_character_frequency = calculate_frequency(fourth_character_counts, total_wordle_words_count)
fifth_character_frequency = calculate_frequency(fifth_character_counts, total_wordle_words_count)

In [57]:
# score each word by expected matched character
def score(word, frequency_maps):
    
    score = 1
    
    for i in range(0, 5):
        character = word[i]
        frequency_map = frequency_maps[i]
        frequency = frequency_map[character]
        score *= frequency
        
    return score
        
frequency_maps = [first_character_frequency, 
                  second_character_frequency, 
                  third_character_frequency, 
                  fourth_character_frequency,
                  fifth_character_frequency]
scored_words = {}

for word in words:
    scored_words[word] = score(word, frequency_maps)

In [58]:
# rank words by frequency
ranked = {k: v for k, v in sorted(scored_words.items(), key=lambda item: -item[1])}

In [59]:
ranked

{'sores': 0.003153188178667754,
 'sanes': 0.0027394500191896733,
 'sones': 0.002537289986841165,
 'seres': 0.0024491366196904125,
 'sales': 0.002409806655884692,
 'soles': 0.0022319729344826844,
 'sires': 0.002080562619798428,
 'cares': 0.0020056712283722435,
 'bares': 0.001977391699121876,
 'senes': 0.0019707576806189966,
 'pares': 0.001868624278928153,
 'cores': 0.0018576610228317374,
 'bores': 0.0018314684053731556,
 'sures': 0.0017857034198848403,
 'tares': 0.0017729089491576777,
 'sates': 0.001750519929274729,
 'seles': 0.0017336125655237646,
 'pores': 0.0017307275689939943,
 'sines': 0.00167417559723346,
 'tores': 0.0016420756329803324,
 'canes': 0.0016139124074714883,
 'banes': 0.0015911565926155996,
 'mares': 0.0015075164438849945,
 'panes': 0.0015036342277852585,
 'cones': 0.0014948123756342198,
 'dares': 0.001490113656653999,
 'bones': 0.0014737358453920885,
 'siles': 0.0014727187826286037,
 'soras': 0.0014553176209235789,
 'sames': 0.001452135850421082,
 'ceres': 0.001442877

In [65]:
# average number of green tiles - this is slow running (O(n^2))
def calculate_average_number_of_green_tiles(test_word, words):
    number_of_words = len(words)
    
    total_green_tiles = 0
    
    for word in words:
        for i in range(0, 5):
            if test_word[i] == word[i]:
                total_green_tiles += 1
                
    return total_green_tiles / number_of_words

green_tile_scores = {}

for word in words:
    average = calculate_average_number_of_green_tiles(word, words)
    green_tile_scores[word] = average

In [66]:
green_tile_scores

{'cigar': 0.3404255319148936,
 'rebut': 0.2866944187480728,
 'sissy': 0.408418131359852,
 'humph': 0.22934011717545483,
 'awake': 0.32076780758556894,
 'blush': 0.2436786925686093,
 'focal': 0.35738513721862475,
 'evade': 0.27628738822078325,
 'naval': 0.33749614554424917,
 'serve': 0.4678538390379278,
 'heath': 0.35622880049337036,
 'dwarf': 0.22240209682392847,
 'model': 0.4611470860314524,
 'karma': 0.3792013567684243,
 'stink': 0.3008017267961764,
 'grade': 0.37056737588652483,
 'quiet': 0.4139685476410731,
 'bench': 0.33009559050262105,
 'abate': 0.3448967005858773,
 'feign': 0.32608695652173914,
 'major': 0.3371106999691644,
 'death': 0.3713382670366944,
 'fresh': 0.2548566142460685,
 'crust': 0.2907801418439716,
 'stool': 0.3061208757323466,
 'colon': 0.3926919518963922,
 'abase': 0.3154486586493987,
 'marry': 0.4759481961147086,
 'react': 0.356922602528523,
 'batty': 0.4615325316065372,
 'pride': 0.3733425840271354,
 'floss': 0.5214307739747148,
 'helix': 0.30180388529139685,
 

In [67]:
# rank words by green tiles scores
green_tile_ranked = {k: v for k, v in sorted(green_tile_scores.items(), key=lambda item: -item[1])}

In [68]:
green_tile_ranked

{'sores': 0.8590810977489979,
 'sanes': 0.8539161270428616,
 'sales': 0.8449737897008942,
 'sones': 0.8410422448350293,
 'soles': 0.832099907493062,
 'sates': 0.8270891150169596,
 'seres': 0.8230033919210608,
 'cares': 0.8223866790009251,
 'bares': 0.8213845205057045,
 'sames': 0.8189947579401788,
 'pares': 0.8175300647548566,
 'tares': 0.8141381436941104,
 'sades': 0.8096669750231268,
 'cores': 0.8095127967930928,
 'bores': 0.8085106382978723,
 'sages': 0.8076626580326858,
 'sabes': 0.8054270736971939,
 'senes': 0.8049645390070922,
 'mares': 0.8047332716620413,
 'pores': 0.8046561825470243,
 'canes': 0.8043478260869565,
 'dares': 0.8041165587419057,
 'sires': 0.8041165587419057,
 'banes': 0.8033456675917361,
 'tores': 0.8012642614862782,
 'sakes': 0.8005704594511255,
 'gares': 0.8004933703361086,
 'rares': 0.799722479185939,
 'panes': 0.7994912118408881,
 'saves': 0.7981036077705828,
 'fares': 0.7974098057354302,
 'soces': 0.7969472710453284,
 'seles': 0.7960222016651248,
 'lares': 0.