## Imports

In [1]:
import numpy as np
import pandas as pd

In [2]:
# Create a dataframe with the list of words
words_raw = pd.read_csv('5-letter-words.csv')
words = words_raw['words'].apply(list).apply(pd.Series).drop(np.arange(5, 51), axis=1)
words = words.apply(lambda x: x.astype(str).str.upper())

## Functions

In [4]:
def set_secret(secret):
    
    # Set the secret word
    if type(secret) == 'str':
        secret = pd.Series(secret.upper())
    else:
        secret = pd.Series(words.iloc[np.random.randint(0, len(words))])
   
    return(secret)    

In [23]:
# def check_word_against_secret(secret, results, guess): 
def check_word_against_secret(secret, guess): 

    checks = pd.Series(np.zeros(5)).astype(int)

    unmatched_letters_in_secret = secret.copy()

    # Check for correct letter in the correct space. Mark with a 2 in the check row.
    for letter_pos in range(0, 5):
        if guess[letter_pos] == secret[letter_pos]:
            checks[letter_pos] = 2
            unmatched_letters_in_secret[unmatched_letters_in_secret != secret[letter_pos]]
            
    # Iterate over the guess and check if the letters is anywhere else in the secret
    for letter_pos in range(0, 5):
        if checks[letter_pos] == 2:
            continue
        if guess[letter_pos] in unmatched_letters_in_secret:
            checks[letter_pos] = 1
            unmatched_letters_in_secret.remove(guess[letter_pos])

    return(checks)

In [30]:
def determine_remaining(possibles, results):
    guess = results.iloc[-2]
    checks_for_guess = results.iloc[-1]
    
    # Find the positions of letters that are correct
    pos_of_twos = [idx for idx, check in enumerate(results.iloc[-1]) if check == 2]
    
    # Find the positions of letters that are in the wrong position
    pos_of_ones = [idx for idx, check in enumerate(results.iloc[-1]) if check == 1]
    
    # Find the positions of letters that are not in the secret word
    pos_of_zeros = [idx for idx, check in enumerate(results.iloc[-1]) if check == 0]

    # Loop through the letters that are in the correct place (the "twos").
    for pos in pos_of_twos:
        # Keep only those words that have the letter in question in the correct place.
        possibles = possibles[possibles[pos] == guess[pos]]
       
    # Loop through those letters that are not in the word (the "zeros").
    for pos_outer in pos_of_zeros:
        # Check if the letter is unique in the guess. (If not, it may still be in the word.)
        if not(guess.drop([pos_outer]).isin([guess[pos_outer]]).any()):
            
            # The letter is unique in the word. Keep only those words 
            # that don't have the letter in in any position that has a zero or one check.
            # (The positions with a two check have already been taken care of.)
            for pos_inner in (pos_of_zeros + pos_of_ones):
                possibles = possibles[possibles[pos_inner] != guess[pos_outer]]
                
        else:
            # The letter is not unique. Filter out only those words that have the letter in question
            # in the position marked with a 0.
            possibles = possibles[possibles[pos_outer] != guess[pos_outer]]
        
    # Loop through the results and check them with the check function.
    for idx, possible in possibles.iterrows():        
            
        checks_for_possible = check_word_against_secret(possible, guess)
        
        if not((checks_for_possible == checks_for_guess).all()):
            possibles = possibles.drop(idx)
        
    return(possibles)

In [349]:
def determine_remaining_old(possible, results):
    # Find the positions of letters that are correct
    pos_of_twos = [idx for idx, check in enumerate(results.iloc[-1]) if check == 2]
    
    # Find the positions of letters that are in the wrong position
    pos_of_ones = [idx for idx, check in enumerate(results.iloc[-1]) if check == 1]
    
    # Find the positions of letters that are not in the secret word
    pos_of_zeros = [idx for idx, check in enumerate(results.iloc[-1]) if check == 0]

    # If a letter is on both pos_of_ones and pos_of_zeros, remove the respective positions from both.
    

    # Alternative to the above approach
    
    
    
    
    
    
    # Filter based on correct letter in correct position
    for position in pos_of_twos:
        possible = possible[possible[position] == results.iloc[-2, position]]

    
        
        
    # Like the filter below, this one probably gives incorrect results 
    # when there are two of the same letter in the guess.
    # I will probably have to loop over the remaining possible words and check them one by one,
    # using the check function. That will work, but will probably be much, much slower.
    # Filter based on correct letter in wrong position
    for position in pos_of_included_letters:
        possible = possible[possible[position] != results.iloc[-2, position]]
        
        possible_positions_in_secret = pos_of_included_letters + pos_of_excluded_letters
        possible_positions_in_secret.remove(position)
        condition = ''
        for possible_position_in_secret in possible_positions_in_secret:
            condition += "(possible[" + str(possible_position_in_secret) \
                + "] == results.iloc[-2, " + str(position) + "]) | "
        condition = condition.rstrip(" | ")
        possible = possible[eval(condition)]

        
    # This gives the wrong result if the secret is 'chalk' and the guess is 'alley'.
    # It's probably the two l:s that cause the problem.
    # The first l requires there to be an l in the word.
    # The second l is interpreted to forbid there to be an l in the word.
    # Filter based on letters not included in the secret.
    for position in pos_of_excluded_letters:
        possible_positions_in_secret = pos_of_included_letters + pos_of_excluded_letters
        
        condition = ''
        for possible_position_in_secret in possible_positions_in_secret:
            condition += "(possible[" + str(possible_position_in_secret) \
                + "] != '" + results.iloc[-2, position] + "') & "
        condition = condition.rstrip(" & ")
        possible = possible[eval(condition)]

    return(possible)        

## Play manually

In [405]:
# Set set_secret to None to let the secret word be set randomly.
secret = 'chalk'

# Set the secret word.
secret = set_secret(secret)

secret_df = pd.DataFrame(data = pd.Series(secret).values.reshape((1, 5)), \
                   index = pd.MultiIndex.from_tuples([(0, 'secr.')]), columns = [0, 1, 2, 3, 4])

print(secret_df)

# Initiate results with an empty list as a placeholder
results = []

# Keep asking for guesses until the secret word has been found.
while True:

    # Check the length of results to determine the number of the guess
    guess_number = int(1 + (len(results) / 2))        
    
    # Prompt the user for a guess.
    guess = pd.Series(list(input())).str.upper()
    
    # Check the guess
    checks = check_word_against_secret(secret, guess)
    
    # Make a hierchical indx for a new two-row df
    hier_index = pd.MultiIndex.from_tuples([(guess_number, 'guess'), (guess_number, 'check')])
    
    # Make a new two-row df for the new guess and checks
    guess_and_checks = pd.DataFrame(data = [guess, checks], index = hier_index)

    # Add the new guess and checks to the results
    if len(results) == 0:
        results = guess_and_checks.copy()
    else:
        results = pd.concat([results, guess_and_checks])    
    
    print(results)
    print("*" * 50)

    if sum(results.iloc[-1]) == 10:
        break
        
print("You found the secret word: ")
print("".join(secret))

KeyboardInterrupt: Interrupted by user

## Play automatically

In [38]:
# Set the intial list of possible words to the words df
possibles = words

# Set the secret word.
secret = possibles.iloc[np.random.randint(0, len(possibles))]

secret = set_secret(secret)
# print(secret.values.reshape((1, 5)), \
print(pd.DataFrame(data = secret.values.reshape((1, 5)), \
                   index = pd.MultiIndex.from_tuples([(0, 'secr.')]), columns = [0, 1, 2, 3, 4]))

# Initiate results with an empty list as a placeholder
results = []

# Keep asking for guesses until the secret word has been found.
while True:

    # Check the length of results to determine the number of the guess
    guess_number = int(1 + (len(results) / 2))        
    
    print(len(possibles))
    # Chose a word at random from the remaining possibilities.
    guess = possibles.iloc[np.random.randint(0, len(possibles))]
    
    # Check the guess
    checks = check_word_against_secret(secret, guess)
    
    # Make a hierchical indx for a new two-row df
    hier_index = pd.MultiIndex.from_tuples([(guess_number, 'guess'), (guess_number, 'check')])
    
    # Make a new two-row df for the new guess and checks
    guess_and_checks = pd.DataFrame(data = [guess, checks], index = hier_index)

    # Add the new guess and checks to the results
    if len(results) == 0:
        results = guess_and_checks.copy()
    else:
        results = pd.concat([results, guess_and_checks])    
    
    possibles = determine_remaining(possibles, results)
    
    print(results)

    if sum(results.iloc[-1]) == 10:
        break

results


         0  1  2  3  4
0 secr.  S  E  E  D  S
2500
         0  1  2  3  4
1 guess  B  I  N  G  E
  check  0  0  0  0  0
478
         0  1  2  3  4
1 guess  B  I  N  G  E
  check  0  0  0  0  0
2 guess  T  R  A  P  S
  check  0  0  0  0  2
34
         0  1  2  3  4
1 guess  B  I  N  G  E
  check  0  0  0  0  0
2 guess  T  R  A  P  S
  check  0  0  0  0  2
3 guess  C  O  W  L  S
  check  0  0  0  0  2
2
         0  1  2  3  4
1 guess  B  I  N  G  E
  check  0  0  0  0  0
2 guess  T  R  A  P  S
  check  0  0  0  0  2
3 guess  C  O  W  L  S
  check  0  0  0  0  2
4 guess  D  U  F  F  S
  check  0  0  0  0  2
0


ValueError: low >= high

## Find word in online game

## Select the best guess

In [12]:
# Limit the list to the first 100 words
words_limited = words.iloc[:100,:].copy()

words_limited['remaining # if secret'] = 0
words_limited['aver. remaining # for guess'] = 0


possible = words_limited

possible

# Set a random secret word
# result, possible = start_over(words, None)
# result

Unnamed: 0,0,1,2,3,4,remaining # if secret,aver. remaining # for guess
0,A,B,A,C,K,0,0
1,A,B,A,S,E,0,0
2,A,B,A,T,E,0,0
3,A,B,A,Y,A,0,0
4,A,B,B,E,Y,0,0
...,...,...,...,...,...,...,...
95,A,L,L,O,T,0,0
96,A,L,L,O,W,0,0
97,A,L,L,O,Y,0,0
98,A,L,L,Y,L,0,0


In [13]:
# for guess_idx in range(0, len(possible)):
for guess_idx in range(0, 1):
    guess = pd.DataFrame(possible.iloc[guess_idx]).transpose()
    print(guess)
    print("*" * 50)
    
    for secret_idx in range(0, len(possible)):
#         print(secret_idx)
        secret = possible.iloc[secret_idx]
        result = pd.DataFrame(secret).transpose()
        print("-" * 50)
        print(result)
        print("-" * 50)
        result.index = ['secret']
        
        result = check(result, guess)
        possible = determine_remaining(possible, result)
        
        print(len(possible))
        
        
        
    

   0  1  2  3  4 remaining # if secret aver. remaining # for guess
0  A  B  A  C  K                     0                           0
**************************************************
--------------------------------------------------
   0  1  2  3  4 remaining # if secret aver. remaining # for guess
0  A  B  A  C  K                     0                           0
--------------------------------------------------
1


IndexError: single positional indexer is out-of-bounds

In [88]:



for guess_idx in range(0, len(possible)):
    guess = possible.iloc[guess_idx]


    for secret_idx in range(0, 1):
        result, possible = start_over(words, secret_idx)

