# Top 10 Trie algorithms in interview questions

For further references see https://www.geeksforgeeks.org/top-10-algorithms-in-interview-questions-set-2/

# Find duplicate rows in a binary matrix

Given a binary matrix whose elements are only 0 and 1, we need to print the rows which are duplicate of rows which are already present in the matrix.

### Complexity Analysis

This algorithm has time complexity of $\mathcal{O}(m n)$.

In [1]:
class TrieNode:
    def __init__(self):
        self.children = {}
        self.rows = []

class Solution:
    def buildTrie(self, mat):
        self.root = TrieNode()
        for i, row in enumerate(mat):
            p = self.root
            for j in row:
                if j not in p.children:
                    p.children[j] = TrieNode()
                p = p.children[j]
            p.rows.append(i)               
    
    def findDuplicates(self, mat):
        self.buildTrie(mat)
        for i, rows in enumerate(mat):
            p = self.root
            for j in rows:
                p = p.children[j]
            tmp = [k for k in p.rows if k != i]
            if len(tmp) == 0:
                print("There are no duplicates of row {}".format(i))
            elif len(tmp) == 1:
                print("There is a duplicate row {} at position: {}".format(i, tmp[0]))
            else:
                print("There are duplicates row {} at positions: {}".format(i, ', '.join(map(str,tmp))))
    
    
def main():
    mat = [[1, 1, 0, 1, 0, 1], 
           [0, 0, 1, 0, 0, 1], 
           [1, 0, 1, 1, 0, 0], 
           [1, 1, 0, 1, 0, 1], 
           [0, 0, 1, 0, 0, 1], 
           [0, 0, 1, 0, 0, 1]]
    sol = Solution()
    sol.findDuplicates(mat)
      
if __name__ == "__main__":
    main()

There is a duplicate row 0 at position: 3
There are duplicates row 1 at positions: 4, 5
There are no duplicates of row 2
There is a duplicate row 3 at position: 0
There are duplicates row 4 at positions: 1, 5
There are duplicates row 5 at positions: 1, 4


# Word Break Problem

Given an input string and a dictionary of words, find out if the input string can be segmented into a space-separated sequence of dictionary words. See following examples for more details.

### Complexity Analysis

This algorithm has time complexity of $\mathcal{O}(L^2)$.

In [2]:
class TrieNode():
    def __init__(self):
        self.children = {}
        self.isLeaf = False

class Solution:
    def buildTrie(self, words):
        self.root = TrieNode()
        for word in words:
            p = self.root
            for w in word:
                if w not in p.children:
                    p.children[w] = TrieNode()
                p = p.children[w]
            p.isLeaf = True
            
    def search(self, word):
        p = self.root
        for w in word:
            if w not in p.children:
                return False
            p = p.children[w]
        if p and p.isLeaf:
            return True
    
    def wordBreak(self, s):
        n = len(s)
        if n == 0:
            return True
        for i in range(1, n+1):
            if self.search(s[:i]) and self.wordBreak(s[i:]):
                return True
        return False
    
def main():
    sol = Solution()
    s, words = "thequickbrownfox", ["the", "quick", "fox", "brown"]
    sol.buildTrie(words)
    print(sol.wordBreak(s))
    
    s, words = "bedbathandbeyond", ["bed", "bath", "bedbath", "and", "beyond"]
    sol.buildTrie(words)
    print(sol.wordBreak(s))
    
    s, words = "bedbathandbeyond", ["teddy", "bath", "bedbath", "and", "beyond"]
    sol.buildTrie(words)
    print(sol.wordBreak(s))
    
    s, words = "bedbathandbeyond", ["bed", "bath", "bedbath", "and", "away"]
    sol.buildTrie(words)
    print(sol.wordBreak(s))
    
    s = "ilikesamsung"
    words = ["i", "like", "sam", "sung", "samsung", "mobile", "ice", "cream", "icecream", "man", "go", "mango"]
    sol.buildTrie(words)
    print(sol.wordBreak(s))
      
if __name__ == "__main__":
    main()

True
True
True
False
True


# Longest Common Prefix using Trie

Given a set of strings, find the longest common prefix.


### Complexity Analysis
 Inserting all the words in the trie takes $\mathcal{O}(m n)$ time and performing a walk on the trie takes $\mathcal{O}(m)$.

In [3]:
class TrieNode():
    def __init__(self):
        self.children = {}
        self.rank = 0

class Solution:
    def buildTrie(self, words):
        self.root = TrieNode()
        for word in words:
            p = self.root
            for w in word:
                if w not in p.children:
                    p.children[w] = TrieNode()
                p = p.children[w]
                p.rank += 1
    
    def longestCommonPrefix(self, word):
        lcp = 0
        p = self.root
        for w in word:
            if len(p.children) == 1:
                p = p.children[w]
                lcp += 1
        return lcp
    
def main():
    sol = Solution()
    
    words = ["geeksforgeeks", "geeks", "geek", "geezer"]
    sol.buildTrie(words)
    print("The longest common prefix is of length: {}".format(sol.longestCommonPrefix(words[0])))
    
    words = ["apple", "ape", "april"]
    sol.buildTrie(words)
    print("The longest common prefix is of length: {}".format(sol.longestCommonPrefix(words[0])))
      
if __name__ == "__main__":
    main()

The longest common prefix is of length: 3
The longest common prefix is of length: 2


# Find shortest unique prefix for every word in a given list

Given an array of words, find all shortest unique prefixes to represent each word in the given array. Assume that no word is prefix of another.

### Complexity Analysis
This algorithm has time complexity of $\mathcal{O}(nm)$.

In [4]:
class TrieNode:
    def __init__(self):
        self.children = {}
        self.rank = 0

class Solution:
    def buildTrie(self, words):
        self.root = TrieNode()
        for word in words:
            p = self.root
            for w in word:
                if w not in p.children:
                    p.children[w] = TrieNode()
                p = p.children[w]
                p.rank += 1
    
    def shortestUniquePrefix(self, words):
        res = []
        for word in words:
            p = self.root
            cur = ''
            for w in word:
                cur += w
                p = p.children[w]
                if p.rank == 1:
                    res.append(cur)
                    break
        return res
    
def main():
    sol = Solution()
    
    words = ["geeksgeeks", "geeksquiz", "geeksforgeeks"]
    sol.buildTrie(words)
    print("The shortest unique prefixes are: {}".format(sol.shortestUniquePrefix(words)))
    
    words = ["geeksgeeks", "geeksquiz", "geeksforgeeks"]
    sol.buildTrie(words)
    print("The shortest unique prefixes are: {}".format(sol.shortestUniquePrefix(words)))
      
if __name__ == "__main__":
    main()

The shortest unique prefixes are: ['geeksg', 'geeksq', 'geeksf']
The shortest unique prefixes are: ['geeksg', 'geeksq', 'geeksf']


# Longest prefix matching

Given a dictionary of words and an input string, find the longest prefix of the string which is also a word in dictionary.

### Complexity Analysis

This algorithm has time complexity of $\mathcal{O}(n^2)$.

In [5]:
class TrieNode:
    def __init__(self):
        self.children = {}
        self.isWord = False

class Solution:
    def buildTrie(self, words):
        self.root = TrieNode()
        for word in words:
            p = self.root
            for w in word:
                if w not in p.children:
                    p.children[w] = TrieNode()
                p = p.children[w]
            p.isWord = True
    
    def longestPrefixMatching(self, word):
        res = 'No matching prefix'
        p = self.root
        for i, w in enumerate(word):
            if w in p.children:
                p = p.children[w]
                if p.isWord:
                    res = word[:i+1]
            else:
                break
        return res
    
def main():
    sol = Solution()
    
    words = ["are", "area", "base", "cat", "cater", "children", "basement"]
    sol.buildTrie(words)
    word = 'caterer'
    print("Longest prefix mathing {}: {}".format(word, sol.longestPrefixMatching(word)))
    word = 'basement'
    print("Longest prefix mathing {}: {}".format(word, sol.longestPrefixMatching(word)))
    word = 'are'
    print("Longest prefix mathing {}: {}".format(word, sol.longestPrefixMatching(word)))
    word = 'arex'
    print("Longest prefix mathing {}: {}".format(word, sol.longestPrefixMatching(word)))
    word = 'basemexz'
    print("Longest prefix mathing {}: {}".format(word, sol.longestPrefixMatching(word)))
    word = 'xyz'
    print("Longest prefix mathing {}: {}".format(word, sol.longestPrefixMatching(word)))
      
if __name__ == "__main__":
    main()

Longest prefix mathing caterer: cater
Longest prefix mathing basement: basement
Longest prefix mathing are: are
Longest prefix mathing arex: are
Longest prefix mathing basemexz: base
Longest prefix mathing xyz: No matching prefix


# Find maximum XOR of given integer in a stream of integers

You are given a number of queries Q and each query will be of the following types:

    Query 1 : add(x) This means add x into your data structure.
    Query 2 : maxXOR(y) This means print the maximum possible XOR of y with all the elements already stored in the data structure.

1 <= x, y <= 10^9
1 <= 10^5 <= Q
The data structure begins with only a 0 in it. 

### Complexity Analysis
Add and maxXor have time complexity of $\mathcal{O}(\log n)$. The space complexity is $\mathcal{O}(n \log n)$.

In [6]:
class TrieNode:
    def __init__(self):
        self.children = {}
        self.value = None

class Trie:
    def __init__(self):
        self.root = TrieNode()
        self.add(0)
    
    def add(self, x):
        bin32 = "{:032b}".format(x)
        p = self.root
        for c in bin32[::-1]:
            d = int(c)
            if d not in p.children:
                p.children[d] = TrieNode()
            p = p.children[d]
        p.value = x
    
    def maxXOR(self, y):
        bin32 = "{:032b}".format(y)
        p = self.root
        ans = i = 0
        for c in bin32[::-1]:
            d = int(c)
            if d ^ 1 in p.children:
                ans += 1 << i
                p = p.children[d^1]
            else:
                p = p.children[d]
            i += 1
        return ans
    
def main():
    trie = Trie()
    trie.add(10)
    trie.add(13)    
    print(trie.maxXOR(10))
    trie.add(9)
    trie.add(5)    
    print(trie.maxXOR(6))
      
if __name__ == "__main__":
    main()

7
15


# Auto-complete feature using Trie

We are given a Trie with a set of strings stored in it. Now the user types in a prefix of his search query, we need to give him all recommendations to auto-complete his query based on the strings stored in the Trie. We assume that the Trie stores past searches by the users.

### Complexity Analysis
This algorithm has time complexity of $\mathcal{O}(n)$.

In [7]:
class TrieNode:
    def __init__(self):
        self.children = {}
        self.isEnd = False
        self.data = None
        self.rank = 0

class AutocompleteSystem:
    def __init__(self, sentences, times):
        self.root = TrieNode()
        self.keyword = ''
        for sentence, time in zip(sentences,times):
            self.addRecord(sentence, time)
            
    def addRecord(self, sentence, hot):
        p = self.root
        for c in sentence:
            if c not in p.children:
                p.children[c] = TrieNode()
            p = p.children[c]
        p.isEnd = True
        p.data = sentence
        p.rank -= hot

    def input(self, c):
        results = []
        if c != '#':
            self.keyword += c
            results = self.search(self.keyword)
        else:
            self.addRecord(self.keyword, 1)
            self.keyword = ''
        return [item[1] for item in sorted(results)[:3]]
    
    def search(self, sentence):
        p = self.root
        for c in sentence:
            if c not in p.children:
                return []
            p = p.children[c]
        return self.dfs(p)
    
    def dfs(self, node):
        ret = []
        if node:
            if node.isEnd:
                ret.append((node.rank, node.data))
            for child in node.children:
                ret.extend(self.dfs(node.children[child]))
        return ret
    
def main():
    sentences = ["hello", "dog", "hell", "cat", "a", "hel", "help", "helps", "helping"]
    times = [1, 1, 1, 1, 1, 1, 1, 1, 1]
    key = 'hel'
    sol = AutocompleteSystem(sentences, times)
    print(sol.search(key))
      
if __name__ == "__main__":
    main()

[(-1, 'hel'), (-1, 'hell'), (-1, 'hello'), (-1, 'help'), (-1, 'helps'), (-1, 'helping')]


# Count of distinct substrings of a string using Suffix Trie

Given a string of length n of lowercase alphabet characters, we need to count total number of distinct substrings of this string.

### Complexity Analysis

The time complexity of this algorithm is $\mathcal{O}(n)$.

In [8]:
class TrieNode:
    def __init__(self):
        self.children = {}

class Trie:
    def buildTrie(self, txt):
        self.root = TrieNode()
        for i in range(len(txt)):
            p = self.root
            for c in txt[i:]:
                if c not in p.children:
                    p.children[c] = TrieNode()
                p = p.children[c]
    
    def countNodes(self, txt):
        self.buildTrie(txt)
        self.ans = 0        
        
        def dfs(node):
            self.ans += 1
            for p in node.children:
                dfs(node.children[p])
                
        dfs(self.root)
        return self.ans
    
def main():
    trie = Trie()
    text = "ababa"
    print("Count of distinct substrings is ", trie.countNodes(text))
      
if __name__ == "__main__":
    main()

Count of distinct substrings is  10
