In [1]:
## This program is to generate ALL anagrams for a given word/string ##
## For example: all anagrams for Appleee

## Input: string that contains random letters (upper case or lower case)
# input that contains NOT ONLY alphabet will be regarded as invalid
## Output: ALL matched anagrams in the given dictionary

# Regardless of upper / lower case. For example, apple is an angram for Applee

In [2]:
## Method 1: Recap
# Step 1: sort(each word in dictionary)
# Step 2: Binary search the sort(sub-strings of random_word) in a sort(dictionary)

# n: length of input string; m: length of dictionary
# Time complexity: O(2^n * log(m)) 
# Space complexity: O(2^n + 2m)

In [3]:
class anagram_1:
    
    def __init__(self):
        pass
    
    
    ## implement the functions of itertools.combinations(string,k) ##
    def my_combinations(self, string, k):
        
        string = sorted(string)
        if k < 1:
            return None
        elif k == 1:
            return [[ch] for ch in string]
        else:
            
            ans = []
            stack = []
            n = len(string)
        
            for i in range(0, n-k+1):
                for j in self.my_combinations(string[i+1:], k-1):
                    j.insert(0, string[i])
                    ans.append(j)
        return ans
        pass
    
    
    ## generate a set of ALL sub-strings for a given string ##
    def sub_strings_generator(self, word):
        
        substrs_set = []
        
        for i in range(len(word)):
            # use itertools.combinations() before: for c in combinations(word, i+1):
            # update with self-built my_combinations
            for c in self.my_combinations(word, i+1):
                sub_string = ''.join(c)
                substrs_set.append(sub_string)
        
        substrs_set = list(set(substrs_set))
        print('There are', len(substrs_set), 'sub-strings.')
        return substrs_set
        pass
        
    
    ## define pre-processing of a given dictionary ##
    def dictionary_processor(self, dictionary):
        
        sorted_dictionary = []
        
        # process each word in dict: upper(word) and sort(word)
        for word in dictionary:
            new_word = word.lower()
            sorted_word = ''.join(sorted(new_word))
            sorted_dictionary.append([sorted_word,word])
            
        # sort(whole dictionary)
        sorted_dictionary = sorted(sorted_dictionary)
        return sorted_dictionary
        pass      
    
    
    ## return INDEX of a anagram in the dictionary, if the anagram matches the given word ##
    ## using binary search ##
    def binary_search_dictionary(self, word, dictionary):
        
        left = 0
        right = len(dictionary) - 1  
    
        while left <= right:
            middle = (left + right) // 2  
            key = dictionary[middle][0]
        
            if key > word:
                right = middle - 1
            elif key < word:
                left = middle + 1
            else:
                return middle
        return None
        pass
    
    
    ## judge whether an input string is valid ##
    # valid case: contains ONLY alphabet
    def alphabet_judger(self, word):
        return word.isalpha()
        pass
    
    
    ## solution ##
    def solve(self, random_word, dictionary):
        
        if not self.alphabet_judger(random_word):
            print('Oops~! The input string is NOT VALID!')
            return None
    
        # pre-process given letters
        random_word = random_word.lower()
        candidate_strs = self.sub_strings_generator(random_word)
        # pre-process dictionary
        sorted_dictionary = self.dictionary_processor(dictionary)

        anagrams = []

        for candidate in candidate_strs:
            anagram_index = self.binary_search_dictionary(candidate, sorted_dictionary)
      
            if anagram_index:
                new_anagram = sorted_dictionary[anagram_index][1]
                anagrams.append(new_anagram)      
        
        return anagrams
        pass

In [4]:
## Method 2: Decode string
# Step 1: counter_w = decode_str( random_word ). Ex. apple -> aelpp -> {'a':1, 'e':1, 'l':1, 'p':2}
# Step 2: counter_d = decode_str( each word in dictionary )
# Step 3: if union_set( counter_w, each counter_d ) == set( counter_w ) then append( word ) to anagrams

# solve_1(): solution 1 using built-in collections.Counter() method
# solve_2(): solution 2 using self-buit string_counter() method

# n: length of input string; m: length of dictionary
# Time complexity: O(m) 
# Space complexity: constant space

In [5]:
class anagram_2:
    
    def __init__(self):
        pass    
    
    
    ## judge whether an input string is valid ##
    # valid case: contains ONLY alphabet
    def alphabet_judger(self, word):
        return word.isalpha()
        pass
    
    
    ## solution using decode method (by built-in Counter method) ##
    def solve_1(self, random_word, dictionary):
        
        if not self.alphabet_judger(random_word):
            print('Oops~! The input string is NOT VALID!')
            return None
    
        # pre-process given letters
        random_word = random_word.lower()
        random_word_counter = Counter(random_word)
        
        anagrams = []
        
        for word in dictionary:
            new_word = word.lower()
            word_counter = Counter(new_word)
            
            if random_word_counter | word_counter == random_word_counter and word_counter:
                anagrams.append(word)
                
        return anagrams
        pass
    
    
    ## self-built Counter(method) ##
    # Input: string that contains ONLY lower-case alphabet
    # Output: dictionary
    def string_counter(self, random_word):
        
        s = sorted(random_word)
        counter = {}
        
        for ch in s:
            if ch not in counter:
                counter[ch] = 1
            else:
                counter[ch] += 1
        return counter
        pass
    
    
    ## judge a word in dict is an anagram of input random_word or not
    # Input: type-dict. ex. {'a':1, 'p':2}. 
    # [counter1 for input random_word, counter2 for word in dictionary]
    # Output: boolean value
    def isAnagram(self, counter1, counter2):
        
        if counter1.keys() | counter2.keys() == counter1.keys():
            for key in counter2.keys():
                if counter2[key] > counter1[key]:
                    return False
            return True
        
        return False
        pass
        
            
    ## solution using decode method (by self-built Counter method) ##
    def solve_2(self, random_word, dictionary):
        
        if not self.alphabet_judger(random_word):
            print('Oops~! The input string is NOT VALID!')
            return None
    
        # pre-process given letters
        random_word = random_word.lower()
        random_word_counter = self.string_counter(random_word)
        
        anagrams = []
        
        for word in dictionary:
            new_word = word.lower()
            word_counter = self.string_counter(new_word)
            
            if word_counter and self.isAnagram(random_word_counter, word_counter):
                anagrams.append(word)
                
        return anagrams
        pass

In [6]:
if __name__ == '__main__':
    
    from itertools import combinations
    from collections import Counter
    
    path = '/Users/keely/lectures/STEP2020/Homework 1 Anagram/'
    dictionary_file = open(path + 'dictionary.txt','r')
    dictionary = dictionary_file.read().split('\n')
    
    ## sorted dictionary: type: List, len: 72413 ##
    #print(dictionary)
    #print(len(dictionary))
    dictionary_file.close

In [7]:
print('You can input a random string ~')
random_word = input()

You can input a random string ~


 apple


In [8]:
solution_1 = anagram_1()
anagrams_1 = solution_1.solve(random_word, dictionary)

There are 23 sub-strings.


In [9]:
print('##### Using recap method #####')
print('===== Random word =====\n', random_word)
print('===== All mathced anagrams =====\n', sorted(anagrams_1))

if anagrams_1: 
    print('There are', len(anagrams_1), 'anagrams.')
    

##### Using recap method #####
===== Random word =====
 apple
===== All mathced anagrams =====
 ['Apple', 'Lapp', 'Lea', 'pal', 'pale', 'pap', 'pea', 'pep']
There are 8 anagrams.


In [10]:
solution_2 = anagram_2()
anagrams_2 = solution_2.solve_1(random_word, dictionary)

In [11]:
print('##### Using decode method (with built-in counter method) #####\n')
print('===== Random word =====\n', random_word)
print('===== All mathced anagrams =====\n', sorted(anagrams_2))

if anagrams_2: 
    print('There are', len(anagrams_2), 'anagrams.')

##### Using decode method (with built-in counter method) #####

===== Random word =====
 apple
===== All mathced anagrams =====
 ['Apple', 'Lapp', 'Lea', 'ale', 'ape', 'apple', 'lap', 'lea', 'leap', 'pal', 'pale', 'pap', 'pea', 'peal', 'pep', 'plea']
There are 16 anagrams.


In [12]:
anagrams_3 = solution_2.solve_2(random_word, dictionary)

In [13]:
print('##### Using decode method (with self-built counter method) #####\n')
print('===== Random word =====\n', random_word)
print('===== All mathced anagrams =====\n', sorted(anagrams_3))

if anagrams_3: 
    print('There are', len(anagrams_3), 'anagrams.')

##### Using decode method (with self-built counter method) #####

===== Random word =====
 apple
===== All mathced anagrams =====
 ['Apple', 'Lapp', 'Lea', 'ale', 'ape', 'apple', 'lap', 'lea', 'leap', 'pal', 'pale', 'pap', 'pea', 'peal', 'pep', 'plea']
There are 16 anagrams.
