In [1]:
## This program is generated to modified haz-wordz game in http://icanhazwordz.appspot.com ##

## Input: string that contains 16 random letters (upper case or lower case)
# suggest that input is always valid (contains only alphabet)
## Output: ALL matched anagrams in the given dictionary
## Output: one of the BEST anagrams (obtain the highest score)

# The rule of scoring is in http://icanhazwordz.appspot.com/help
# Regardless of upper / lower case. For example, apple is an angram for APPLEE

# n: length of input string; m: length of dictionary
# Time complexity: O(2^n * log(m)) 
# Space complexity: O(2^n + 2m)

In [2]:
class anagram_game:
    
    def __init__(self):
        # letters' points
        self.points = {'A':1,'B':1,'D':1,'E':1,'G':1,'I':1,'N':1,'O':1,'R':1,'S':1,'S':1,'T':1,'U':1,
                       'C':2,'F':2,'H':2,'L':2,'M':2,'P':2,'V':2,'W':2,'Y':2,
                       'J':3,'K':3,'Q':3,'X':3,'Z':3}  
        pass
    
    
    ## define pre-processing of a given dictionary ##
    def dictionary_processor(self, dictionary):
        
        sorted_dictionary = []
        
        # process each word in dict: upper(word) and sort(word)
        for word in dictionary:
            new_word = word.upper()
            sorted_word = ''.join(sorted(new_word))
            sorted_dictionary.append([sorted_word,word])
            
        # sort(whole dictionary)
        sorted_dictionary = sorted(sorted_dictionary)
        return sorted_dictionary
        pass      
    
    
    ## generate a set of ALL sub-strings for a given string ##
    def sub_strings_generator(self, word):
        
        substrs_set = []
        
        for i in range(len(word)):
            for c in combinations(word, i+1):
                sub_string = ''.join(sorted(c))
                substrs_set.append(sub_string)
        
        substrs_set = list(set(substrs_set))
        print('There are', len(substrs_set), 'sub-strings.')
        return substrs_set
        pass
    
    
    ## calculate the point for a given anagram ##
    def point_calculator(self, anagram):
        
        if not anagram:
            return 0
        
        else:
            point = 1
            i = 0
            
            while i < len(anagram):
                # upper(word) for convenient accessment to self.points
                ch = anagram[i].upper()
                
                if ch != 'Q':
                    point += self.points[ch]
                    i += 1
                else:
                    # special rule for Q: 'QU' worths 3 points treated as a whole
                    # (There's no word in dict that have a Q that isn't immediately followed by a U.)
                    point += self.points['Q']
                    i += 2
            
        point = point**2
        return point
        pass
    
    
    ## return INDEX of a anagram in the dictionary, if the anagram matches the given word ##
    ## using binary search ##
    def binary_search_dictionary(self, word, dictionary):
        
        left = 0
        right = len(dictionary) - 1  
    
        while left <= right:
            middle = (left + right) // 2  
            key = dictionary[middle][0]
        
            if key > word:
                right = middle - 1
            elif key < word:
                left = middle + 1
            else:
                return middle
        pass
    
    
    def solve(self, random_word, dictionary):
        
        # pre-process given letters
        random_word = random_word.upper()
        candidate_strs = self.sub_strings_generator(random_word)
        # pre-process dictionary
        sorted_dictionary = self.dictionary_processor(dictionary)
        
        anagrams = []
        max_point = 0
        best_anagram = ''
        
        for candidate in candidate_strs:
            anagram_index = self.binary_search_dictionary(candidate, sorted_dictionary)
            
            if anagram_index:
                new_anagram = sorted_dictionary[anagram_index][1]
                anagrams.append(new_anagram)      
                point = self.point_calculator(new_anagram)
                max_point = max(max_point, point)
                best_anagram = new_anagram if max_point == point else best_anagram
        
        return anagrams, best_anagram
        pass

In [3]:
if __name__ == '__main__':
    
    import re
    from itertools import permutations, combinations
    
    path = '/Users/keely/lectures/STEP2020/hw1/'
    dictionary_file = open(path + 'dictionary.txt','r')
    dictionary = dictionary_file.read().split('\n')
    
    ## sorted dictionary: type: List, len: 72413 ##
    #print(dictionary)
    #print(len(dictionary))
    dictionary_file.close

In [4]:
print('Input 16 letters: ')
letters = input()

Input 16 letters: 


 URBDGGGABVFYVMXQU


In [5]:
solution = anagram_game()
all_anagrams, best_anagram = solution.solve(letters, dictionary)

There are 27647 sub-strings.


In [6]:
print('===== Random letters =====\n', letters)
print('===== All mathced anagrams =====\n', all_anagrams)
print('There are', len(all_anagrams), 'anagrams.')
print('===== The BEST anagram =====\n', best_anagram)

===== Random letters =====
 URBDGGGABVFYVMXQU
===== All mathced anagrams =====
 ['dram', 'fax', 'FDR', 'gay', 'Axum', 'quad', 'quay', 'rumba', 'dumb', 'Gray', 'gamy', 'Maud', 'barf', 'Brady', 'dry', 'rub', 'rag', 'drug', 'augury', 'fray', 'guard', 'Ufa', 'bra', 'guy', 'garb', 'gab', 'fad', 'gabby', 'burg', 'yum', 'Bragg', 'vary', 'Guam', 'augur', 'fraud', 'Max', 'gravy', 'gad', 'far', 'Aug', 'Abby', 'bud', 'dug', 'bug', 'bard', 'Baum', 'Mary', 'buggy', 'gaudy', 'daub', 'Day', 'Davy', 'Amur', 'Fay', 'rugby', 'bray', 'ray', 'fag', 'yard', 'Babur', 'Rudy', 'drub', 'Urdu', 'dam', 'rum', 'grubby', 'buy', 'farm', 'Amy', 'bum', 'mud', 'fury', 'Grady', 'fry', 'barb', 'ruby', 'gum', 'muggy', 'dab', 'rug', 'gram', 'Byrd', 'drag', 'gag', 'guru', 'baggy', 'fur', 'drum', 'gym', 'bay', 'Mar', 'Marx']
There are 92 anagrams.
===== The BEST anagram =====
 Marx
