# Tries
**``**
Also known as **`prefix tree`**, a **`trie`** is a ds used to store and retrieve str from a dict or set. The nodes don't store their associated key, instead, each node's position within the trie determines its associated key. 

This ds is used in **autocomplete, spell checking and IP routing**

Every key is a char that maps to the next char in a word, and the last character  in a word is paired with a boolean True:

> **`"*": True`**

## add(), exists,

In [6]:
import json
class Trie:
    def __init__(self):
        self.root = {}
        self.end_symbol = "*"
    
    def add(self, word):
        current_level = self.root
        for char in word:
            if char not in current_level:
                current_level[char] = {}
            current_level = current_level[char]
        current_level[self.end_symbol] = True
    
    def exists(self, word):
        current = self.root
        for letter in word:
            if letter not in current:
                return False
            current = current[letter]
        #verifies too that the word inserted is a complete word
        if self.end_symbol in current:
            return True
        else:
            return False
            
trie = Trie()
trie.add("dev")
trie.add("doc")
trie.add("dict")
print(f"{json.dumps(trie.root, sort_keys=True, indent=1)}")
print(f"Word 'dictionary' exists: {trie.exists('dictionary')}")
print(f"Word 'doc' exists: {trie.exists('doc')}")

{
 "d": {
  "e": {
   "v": {
    "*": true
   }
  },
  "i": {
   "c": {
    "t": {
     "*": true
    }
   }
  },
  "o": {
   "c": {
    "*": true
   }
  }
 }
}
Word 'dictionary' exists: False
Word 'doc' exists: True


## Searching words with specific prefix

The following functions collect all complete words stating from the current word given, and returns a list if any words were found

In [23]:
class Trie:
    def search_level(self, current_level, current_prefix, words):
        #for every recursion call, check if the current word is complete 
        if self.end_symbol in current_level:
            words.append(current_prefix)
        
        for character in sorted(current_level):
            if character == "*":
                continue
            new_word = current_prefix + character
            #this completes every word before using the following letter at the prefix level
            self.search_level(current_level[character], new_word, words)
        #this is actually the original list. No other list were modified
        return words

    def words_with_prefix(self, prefix):
        matching_words = []
        #starts in the root to verify if the given prefix even exists in the current trie
        current_level = self.root
        for character in prefix:
            if character not in current_level:
                #no words start with the current prefix
                return []
            #this takes to the starting point (prefix given)
            elif character in current_level:
                current_level = current_level[character]
        #at this point, current level correspond to the last character in the prefix
        #so this call find all words starting from this level
        return self.search_level(current_level, prefix, matching_words)
    def __init__(self):
        self.root = {}
        self.end_symbol = "*"
    
    def add(self, word):
        current_level = self.root
        for char in word:
            if char not in current_level:
                current_level[char] = {}
            current_level = current_level[char]
        current_level[self.end_symbol] = True
    

trie = Trie()
lst = ["dev", "devops", "designer", "director","manager", "intern","cto", "cfo", "coo", "ceo"]
for w in lst:
    trie.add(w)
print(trie.words_with_prefix("m"))
print(trie.words_with_prefix("d"))
print(trie.words_with_prefix("g"))

['manager']
['designer', 'dev', 'devops', 'director']
[]
{'d': {'e': {'v': {'*': True, 'o': {'p': {'s': {'*': True}}}}, 's': {'i': {'g': {'n': {'e': {'r': {'*': True}}}}}}}, 'i': {'r': {'e': {'c': {'t': {'o': {'r': {'*': True}}}}}}}}, 'm': {'a': {'n': {'a': {'g': {'e': {'r': {'*': True}}}}}}}, 'i': {'n': {'t': {'e': {'r': {'n': {'*': True}}}}}}, 'c': {'t': {'o': {'*': True}}, 'f': {'o': {'*': True}}, 'o': {'o': {'*': True}}, 'e': {'o': {'*': True}}}}


## Find matches

The complexity of this method is **`O(n * m)`**, where **`n`** is the length of the document, and **`m`** is the depth of the trie

This method is used to find instances, for example, of bad words. Because the method uses a kind of deep searching, it includes the word "darn", and the word "darnit", for example. 

In [37]:
class Trie:
    def find_matches(self, document):
    #takes a complete str and returns a set of all the words from the trie that occur in the str
        matches = set()
        #the exterior loop marks where the search begins, and inner loop decides how far the word goes
        for index in range(0, len(document)):
            #every exterior iteration (when the inner found a word or there were no matching words) restar the current_level 
            current_level = self.root
            for inner_index in range(index, len(document)):
                character = document[inner_index]
                if character not in current_level:
                    break
                else:
                    #moves to the next level in the trie
                    current_level = current_level[character]      
                if self.end_symbol in current_level:
                    #using +1 to include the end of the word
                    matches.add(document[index : inner_index+1])
        return matches
        
    def __init__(self):
        self.root = {}
        self.end_symbol = "*"
    
    def add(self, word):
        current_level = self.root
        for char in word:
            if char not in current_level:
                current_level[char] = {}
            current_level = current_level[char]
        current_level[self.end_symbol] = True

trie = Trie()
string = "Let's circle back to touch base, Let's leverage our synergy to realign our bandwidth, we need to pivot and innovate for truly scalable solutions"
lst = ["synergy", "alignment", "leverage", "bandwidth","circle", "back", "touch", "base", "pivot", "innovate", "scalable", "proactive"]
for w in lst:
    trie.add(w)
trie.find_matches(string)       

{'back',
 'bandwidth',
 'base',
 'circle',
 'innovate',
 'leverage',
 'pivot',
 'scalable',
 'synergy',
 'touch'}

## **`longest_common_prefix`** and **`advanced_find_matches`** methods

In [21]:
class Trie:
    def longest_common_prefix(self):
        current_level = self.root
        prefix = ""
        while True:
            children_current = current_level.keys()
            if self.end_symbol in children_current:
                break

            if len(children_current) == 1: 
                prefix += list(children_current)[0]
                current_level = current_level[list(children_current)[0]]
            else:
                break

        return prefix
        
    def advanced_find_matches(self, document, variations):
        matches = set()
        for i in range(len(document)):
            level = self.root
            for j in range(i, len(document)):
                ch = document[j]
                if ch in variations:
                    ch = variations[ch]
                if ch not in level:
                    break
                level = level[ch]
                if self.end_symbol in level:
                    matches.add(document[i : j + 1])
        return matches
        
    def __init__(self):
        self.root = {}
        self.end_symbol = "*"

    def add(self, word):
        current = self.root
        for letter in word:
            if letter not in current:
                current[letter] = {}
            current = current[letter]
        current[self.end_symbol] = True