## String Search
Find the starting index of a target string T from a string s
Example:
* S = 'Hello World'
* T = 'ell'
* Output = 1

In [33]:
def rabin_karp(s,t):
    if t == '':
        return 0
    if len(t) > len(s):
        return -1
    hasht = 0; hashs = 0; pn = 53
    for i in range(len(t)):
        hasht = hasht*pn + ord(t[i])
        hashs = hashs*pn + ord(s[i])
    
    if hasht == hashs and t == s[0:len(t)]:
        return 0
    
    xpow = pn**(len(t)-1)
    
    for i in range(len(t), len(s)):
        remove = s[i-len(t)]
        hashs = (hashs - ord(remove)*xpow)*pn + ord(s[i])
        if hasht == hashs and t == s[i-len(t)+1:i+1]:
            return i - len(t) + 1
    return -1

rabin_karp('hello', 'll')

2

### Longest Repeating Substring
Given a string S, find out the length of the longest repeating substring(s). Return 0 if no repeating substring exists.

**Similar Question** : Longest Duplicate Substring

In [30]:
class Solution:
    def longestRepeatingSubstring(self, S: str) -> int:
        ans = 0
        start = 1; end = len(S)-1
        while start <= end:
            mid = (start + end) // 2
            if self.check(mid, S):
                ans = mid
                start = mid+1
            else:
                end = mid - 1
        return ans
    
    def check(self, n, s):
        h = 0; mod = 2**31; pn = 53
        seen = set()
        for i in range(n):
            h = (h*pn + ord(s[i])) % mod
        seen.add(h)
        
        xpow = (pn ** (n-1)) % mod
        
        for i in range(n, len(s)):
            remove = s[i-n]
            h = ((h - xpow*ord(remove)) * pn + ord(s[i])) % mod
            if h in seen:
                return True
            seen.add(h)
        return False

string = "aabcaabdaab"
obj = Solution()
obj.longestRepeatingSubstring(string)

3

### Repeated DNA Sequences
All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG". When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.

Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.

Example:

Input: s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT"

Output: ["AAAAACCCCC", "CCCCCAAAAA"]

In [65]:
def findRepeatedDnaSequences(s):
    if len(s)<=10: return []
    hash = 0; ans = set(); seen = set(); pn = 53; xpow = pn**9
    for i in range(10):
        hash = hash*pn + ord(s[i])
    seen.add(hash)

    for i in range(10, len(s)):
        remove = s[i - 10]
        hash = (hash - ord(remove)*xpow)*pn + ord(s[i])
        if hash in seen:
            ans.add(s[i-9:i+1])
        seen.add(hash)
    return list(ans)
s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT"
findRepeatedDnaSequences(s)

['AAAAACCCCC', 'CCCCCAAAAA']

### Maximum Number of Occurrences of a Substring

Given a string s, return the maximum number of ocurrences of any substring under the following rules:

The number of unique characters in the substring must be less than or equal to maxLetters.
The substring size must be between minSize and maxSize inclusive.

In [4]:
from collections import Counter
class Solution:
    def maxFreq(self, s: str, maxLetters: int, minSize: int, maxSize: int) -> int:
        x = minSize; pn = 101; hash = 0; mod = 2**31; ans = 0
        xpow = (pn**(x-1)) % mod
        count = Counter()
        freq = Counter()
        
        for i in range(x):
            hash = (hash*pn + ord(s[i])) % mod
            freq[s[i]] += 1
            
        if len(freq) <= maxLetters:
            count[hash] += 1
            ans = max(ans, count[hash])
        
        for i in range(x, len(s)):
            remove = s[i-x]
            hash = ((hash - ord(remove)*xpow) * pn + ord(s[i])) % mod
            freq[remove] -= 1
            freq[s[i]] += 1
            if freq[remove] == 0:
                del freq[remove]
            if len(freq) <= maxLetters:
                count[hash] += 1
                ans = max(ans, count[hash])
        
        return ans
            
        
s = "aababcaab"; maxLetters = 2; minSize = 3; maxSize = 4
Solution().maxFreq(s, maxLetters, minSize, maxSize)

2

### Design a Hash Table

In [1]:
class HashMap:

    def __init__(self):
        """
        Initialize your data structure here.
        """
        self.m = 100
        self.a = [None] * self.m
        

    def put(self, key, value):
        """
        value will always be non-negative.
        """
        index = key%self.m
        if self.a[index] is None:
            self.a[index] = ListNode(key,value)
            return
        node = self.a[index]
        while node:
            if node.key == key:
                node.val = value
                return
            prev = node
            node = node.next
        prev.next = ListNode(key,value)
        

    def get(self, key):
        """
        Returns the value to which the specified key is mapped, or -1 if this map contains no mapping for the key
        """
        index = key%self.m
        node = self.a[index]
        while node:
            if node.key == key:
                return node.val
            node = node.next
        return -1
        

    def remove(self, key):
        """
        Removes the mapping of the specified value key if this map contains a mapping for the key
        """
        index = key %self.m
        node = self.a[index]
        if node is None:
            return
        if node.key == key:
            self.a[index] = node.next
            return
        prev = node; node = node.next
        while node:
            if node.key == key:
                prev.next = node.next
                return
            prev = node
            node = node.next
        
        
class ListNode:
    def __init__(self, key, val):
        self.key = key
        self.val = val
        self.next = None


In [2]:
hashMap = HashMap()
hashMap.put(1, 1)     
hashMap.put(2, 2) 

In [3]:
hashMap.get(1) 

1

In [4]:
hashMap.get(3)

-1

In [5]:
hashMap.put(2, 1)         
hashMap.get(2) 

1

In [6]:
hashMap.remove(2)         
hashMap.get(2)           

-1

### Design Hash-map (using built in lists)

In [2]:
class MyHashMap:

    def __init__(self):
        """
        Initialize your data structure here.
        """
        self.m = 1000
        self.a = [None]*self.m

    def put(self, key: int, value: int) -> None:
        """
        value will always be non-negative.
        """
        index = key%self.m
        if self.a[index] is None:
            self.a[index] = [Node(key, value)]
            return
        
        for node in self.a[index]:
            if node.key == key:
                node.value = value
                return
        self.a[index].append(Node(key, value))
        return
        
    def get(self, key: int) -> int:
        """
        Returns the value to which the specified key is mapped, or -1 if this map contains no mapping for the key
        """
        index = key%self.m
        if self.a[index] is None:
            return -1
        for node in self.a[index]:
            if node.key == key:
                return node.value
        return -1
        

    def remove(self, key: int) -> None:
        """
        Removes the mapping of the specified value key if this map contains a mapping for the key
        """
        index = key%self.m
        if self.a[index] is None:
            return
        for i, node in enumerate(self.a[index]):
            if node.key == key:
                self.a[index][i] = self.a[index][-1]
                self.a[index].pop()
                return     
        
class Node:
    def __init__(self, key, value):
        self.key = key
        self.value = value