In [1]:
#Import Dependencies
import pandas as pd

In [2]:
#Bring in word lists, join and reset index for single list, set name for word column
allowed_guesses_df = pd.read_table("wordle-allowed-guesses.txt", delimiter=" ", header=None)
possible_words_df = pd.read_table("wordle-answers-alphabetical.txt", delimiter=" ", header=None)
words_df = pd.concat([allowed_guesses_df, possible_words_df]).reset_index(drop = True)
words_df = words_df.rename(columns={0:"word"})
words_df

Unnamed: 0,word
0,aahed
1,aalii
2,aargh
3,aarti
4,abaca
...,...
12967,young
12968,youth
12969,zebra
12970,zesty


In [3]:
#Choose winning word for Wordle Game (NOTE: Change to be random after testing is done)
#Testing word has a duplicate letter to use multiple scenarios
#We also split that word by letter for later use
hidden_word = possible_words_df.iloc[485,0]
hidden_word_df = pd.DataFrame({"word":hidden_word}, index=[0])
hidden_word_df["L1"] = hidden_word_df["word"].astype(str).str[0]
hidden_word_df["L2"] = hidden_word_df["word"].astype(str).str[1]
hidden_word_df["L3"] = hidden_word_df["word"].astype(str).str[2]
hidden_word_df["L4"] = hidden_word_df["word"].astype(str).str[3]
hidden_word_df["L5"] = hidden_word_df["word"].astype(str).str[4]
hidden_word_df

Unnamed: 0,word,L1,L2,L3,L4,L5
0,crock,c,r,o,c,k


In [4]:
#Add columns to split each word into a seperate letter by column
words_df["L1"] = words_df["word"].astype(str).str[0]
words_df["L2"] = words_df["word"].astype(str).str[1]
words_df["L3"] = words_df["word"].astype(str).str[2]
words_df["L4"] = words_df["word"].astype(str).str[3]
words_df["L5"] = words_df["word"].astype(str).str[4]
words_df

Unnamed: 0,word,L1,L2,L3,L4,L5
0,aahed,a,a,h,e,d
1,aalii,a,a,l,i,i
2,aargh,a,a,r,g,h
3,aarti,a,a,r,t,i
4,abaca,a,b,a,c,a
...,...,...,...,...,...,...
12967,young,y,o,u,n,g
12968,youth,y,o,u,t,h
12969,zebra,z,e,b,r,a
12970,zesty,z,e,s,t,y


In [5]:
#Group by over entire word list, getting a frequency score for each letter by column
letter1_group = words_df[['word','L1']].groupby(['L1']).count().reset_index().rename(columns={"word":"count(1)", "L1":"letter"})
letter2_group = words_df[['word','L2']].groupby(['L2']).count().reset_index().rename(columns={"word":"count(2)", "L2":"letter"})
letter3_group = words_df[['word','L3']].groupby(['L3']).count().reset_index().rename(columns={"word":"count(3)", "L3":"letter"})
letter4_group = words_df[['word','L4']].groupby(['L4']).count().reset_index().rename(columns={"word":"count(4)", "L4":"letter"})
letter5_group = words_df[['word','L5']].groupby(['L5']).count().reset_index().rename(columns={"word":"count(5)", "L5":"letter"})

#Join all 5 letter groups into a single list
letters1_2 = pd.concat([letter1_group, letter2_group['count(2)']],axis=1)
letters1_2_3 = pd.concat([letters1_2, letter3_group['count(3)']],axis=1)
letters1_2_3_4 = pd.concat([letters1_2_3, letter4_group['count(4)']],axis=1)
lettersgroup = pd.concat([letters1_2_3_4, letter5_group['count(5)']],axis=1)

#Add a sum of all columns
lettersgroup['totalcount'] = lettersgroup['count(1)']+lettersgroup['count(2)']+lettersgroup['count(3)']+lettersgroup['count(4)']+lettersgroup['count(5)']

In [6]:
#Create score table off previous for simplicity in use
score_table = lettersgroup[['letter', 'totalcount']]
score_table

Unnamed: 0,letter,totalcount
0,a,5990
1,b,1627
2,c,2028
3,d,2453
4,e,6662
5,f,1115
6,g,1644
7,h,1760
8,i,3759
9,j,291


In [7]:
#Score each word based on letter frequency to choose the best guess. We do this by merging each individual letter to the score
#table, then adding them into a final score to append to our word table. If a letter is a duplicate, we do not add that score
#since the information added is likely to be none or very little, and we would gain more for having better variety
score_list = []
for index, row in words_df.iterrows():
    score = 0
    letter1 = pd.DataFrame({'letter':row['L1'] }, index=[0])
    letter1_score = letter1.merge(score_table,how='inner',on='letter')
    letter2 = pd.DataFrame({'letter':row['L2'] }, index=[0])
    letter2_score = letter2.merge(score_table,how='inner',on='letter')
    letter3 = pd.DataFrame({'letter':row['L3'] }, index=[0])
    letter3_score = letter3.merge(score_table,how='inner',on='letter')
    letter4 = pd.DataFrame({'letter':row['L4'] }, index=[0])
    letter4_score = letter4.merge(score_table,how='inner',on='letter')
    letter5 = pd.DataFrame({'letter':row['L5'] }, index=[0])
    letter5_score = letter5.merge(score_table,how='inner',on='letter')
    score = letter1_score['totalcount'].values[0]
    if letter2_score['letter'].values[0] != letter1_score['letter'].values[0]:
        score = score + letter2_score['totalcount'].values[0]
    if (letter3_score['letter'].values[0] != letter1_score['letter'].values[0] and letter3_score['letter'].values[0] != letter2_score['letter'].values[0]):
        score = score + letter3_score['totalcount'].values[0]
    if (letter4_score['letter'].values[0] != letter1_score['letter'].values[0] and letter4_score['letter'].values[0] != letter2_score['letter'].values[0] and letter4_score['letter'].values[0] != letter3_score['letter'].values[0]):
        score = score + letter4_score['totalcount'].values[0]
    if (letter5_score['letter'].values[0] != letter1_score['letter'].values[0] and letter5_score['letter'].values[0] != letter2_score['letter'].values[0] and letter5_score['letter'].values[0] != letter3_score['letter'].values[0] and letter5_score['letter'].values[0] != letter4_score['letter'].values[0]):
        score = score + letter5_score['totalcount'].values[0]
    score_list.append(score)

In [8]:
#Add Previous calcuations to word list
words_df['Frequency_Score']=score_list
words_df

Unnamed: 0,word,L1,L2,L3,L4,L5,Frequency_Score
0,aahed,a,a,h,e,d,16865
1,aalii,a,a,l,i,i,13120
2,aargh,a,a,r,g,h,13552
3,aarti,a,a,r,t,i,17202
4,abaca,a,b,a,c,a,9645
...,...,...,...,...,...,...,...
12967,young,y,o,u,n,g,13619
12968,youth,y,o,u,t,h,14078
12969,zebra,z,e,b,r,a,18871
12970,zesty,z,e,s,t,y,19130


In [9]:
#Choose best guess and create a dataframe split by letter, similar to word list
maxid = words_df.Frequency_Score.idxmax()
bestguess = words_df.iloc[maxid,0]
bestguess_df = pd.DataFrame({"word":bestguess}, index=[0])
bestguess_df["L1"] = bestguess_df["word"].astype(str).str[0]
bestguess_df["L2"] = bestguess_df["word"].astype(str).str[1]
bestguess_df["L3"] = bestguess_df["word"].astype(str).str[2]
bestguess_df["L4"] = bestguess_df["word"].astype(str).str[3]
bestguess_df["L5"] = bestguess_df["word"].astype(str).str[4]
bestguess_df

Unnamed: 0,word,L1,L2,L3,L4,L5
0,aeros,a,e,r,o,s


In [10]:
hidden_word_df

Unnamed: 0,word,L1,L2,L3,L4,L5
0,crock,c,r,o,c,k
