# String algorithms

####  these are methods or procedures designed to process and manipulate strings (sequences of characters)

In [None]:
# init
from .add_binary import *
from .breaking_bad import *
from .decode_string import *
from .delete_reoccurring import *
from .domain_extractor import *
from .encode_decode import *
from .group_anagrams import *
from .int_to_roman import *
from .is_palindrome import *
from .is_rotated import *
from .license_number import *
from .make_sentence import *
from .merge_string_checker import *
from .multiply_strings import *
from .one_edit_distance import *
from .rabin_karp import *
from .reverse_string import *
from .reverse_vowel import *
from .reverse_words import *
from .roman_to_int import *
from .strip_url_params import *
from .validate_coordinates import *
from .word_squares import *
from .unique_morse import *
from .judge_circle import *
from .strong_password import *
from .caesar_cipher import *
from .check_pangram import *
from .contain_string import *
from .count_binary_substring import *
from .repeat_string import *
from .text_justification import *
from .min_distance import *
from .longest_common_prefix import *
from .rotate import *
from .first_unique_char import *
from .repeat_substring import *
from .atbash_cipher import *
from .longest_palindromic_substring import *
from .knuth_morris_pratt import *
from .panagram import *

## Binary String Addition


In [None]:
"""
Given two binary strings,
return their sum (also a binary string).

For example,
a = "11"
b = "1"
Return "100".
"""

In [None]:
def add_binary(a, b):
    s = ""
    c, i, j = 0, len(a)-1, len(b)-1
    zero = ord('0')
    while (i >= 0 or j >= 0 or c == 1):
        if (i >= 0):
            c += ord(a[i]) - zero
            i -= 1
        if (j >= 0):
            c += ord(b[j]) - zero
            j -= 1
        s = chr(c % 2 + zero) + s
        c //= 2 
        
    return s


## Atbash Cipher Implementation

In [None]:
"""
Atbash cipher is mapping the alphabet to it's reverse.
So if we take "a" as it is the first letter, we change it to the last - z.

Example:
Attack at dawn --> Zggzxp zg wzdm

Complexity: O(n)
"""

In [None]:
def atbash(s):
    translated = ""
    for i in range(len(s)):
        n = ord(s[i])
        
        if s[i].isalpha():
            
            if s[i].isupper():
                x = n - ord('A')
                translated += chr(ord('Z') - x)
            
            if s[i].islower():
                x = n - ord('a')
                translated += chr(ord('z') - x)
        else:
            translated += s[i]
    return translated

## Symbol Matching in Words with Longest Symbol Preference

In [None]:
"""
Given an api which returns an array of words and an array of symbols, display
the word with their matched symbol surrounded by square brackets.

If the word string matches more than one symbol, then choose the one with
longest length. (ex. 'Microsoft' matches 'i' and 'cro'):

Example:
Words array: ['Amazon', 'Microsoft', 'Google']
Symbols: ['i', 'Am', 'cro', 'Na', 'le', 'abc']

Output:
[Am]azon, Mi[cro]soft, Goog[le]

My solution(Wrong):
(I sorted the symbols array in descending order of length and ran loop over
words array to find a symbol match(using indexOf in javascript) which
worked. But I didn't make it through the interview, I am guessing my solution
was O(n^2) and they expected an efficient algorithm.

output:
['[Am]azon', 'Mi[cro]soft', 'Goog[le]', 'Amaz[o]n', 'Micr[o]s[o]ft', 'G[o][o]gle']
"""

In [None]:
from functools import reduce

In [None]:
def match_symbol(words, symbols):
    import re
    combined = []
    for s in symbols:
        for c in words:
            r = re.search(s, c)
            if r:
                combined.append(re.sub(s, "[{}]".format(s), c))
    return combined

In [None]:
def match_symbol_1(words, symbols):
    res = []
    # reversely sort the symbols according to their lengths.
    symbols = sorted(symbols, key=lambda _: len(_), reverse=True)
    for word in words:
        for symbol in symbols:
            word_replaced = ''
            # once match, append the `word_replaced` to res, process next word
            if word.find(symbol) != -1:
                word_replaced = word.replace(symbol, '[' + symbol + ']')
                res.append(word_replaced)
                break
        # if this word matches no symbol, append it.
        if word_replaced == '':
            res.append(word)
    return res

In [None]:
"""
Another approach is to use a Tree for the dictionary (the symbols), and then
match brute force. The complexity will depend on the dictionary;
if all are suffixes of the other, it will be n*m
(where m is the size of the dictionary). For example, in Python:
"""

In [None]:
class TreeNode:
    def __init__(self):
        self.c = dict()
        self.sym = None


def bracket(words, symbols):
    root = TreeNode()
    for s in symbols:
        t = root
        for char in s:
            if char not in t.c:
                t.c[char] = TreeNode()
            t = t.c[char]
        t.sym = s
    result = dict()
    for word in words:
        i = 0
        symlist = list()
        while i < len(word):
            j, t = i, root
            while j < len(word) and word[j] in t.c:
                t = t.c[word[j]]
                if t.sym is not None:
                    symlist.append((j + 1 - len(t.sym), j + 1, t.sym))
                j += 1
            i += 1
        if len(symlist) > 0:
            sym = reduce(lambda x, y: x if x[1] - x[0] >= y[1] - y[0] else y,
                         symlist)
            result[word] = "{}[{}]{}".format(word[:sym[0]], sym[2],
                                             word[sym[1]:])
    return tuple(word if word not in result else result[word] for word in words)

## Caesar Cipher Encryption


In [None]:
"""
Julius Caesar protected his confidential information by encrypting it using a cipher.
Caesar's cipher shifts each letter by a number of letters. If the shift takes you
past the end of the alphabet, just rotate back to the front of the alphabet.
In the case of a rotation by 3, w, x, y and z would map to z, a, b and c.
Original alphabet:      abcdefghijklmnopqrstuvwxyz
Alphabet rotated +3:    defghijklmnopqrstuvwxyzabc
"""

In [None]:
def caesar_cipher(s, k):
    result = ""
    for char in s:
        n = ord(char)
        if 64 < n < 91:
            n = ((n - 65 + k) % 26) + 65
        if 96 < n < 123:
            n = ((n - 97 + k) % 26) + 97
        result = result + chr(n)
    return result

## Pangram Check Algorithm

In [None]:
"""
Algorithm that checks if a given string is a pangram or not
"""

In [None]:
def check_pangram(input_string):
    alphabet = "abcdefghijklmnopqrstuvwxyz"
    for ch in alphabet:
        if ch not in input_string.lower():
            return False
    return True 

## Implementing `strStr()` Function


In [None]:
"""
Implement strStr().

Return the index of the first occurrence of needle in haystack, or -1 if needle is not part of haystack.

Example 1:
Input: haystack = "hello", needle = "ll"
Output: 2

Example 2:
Input: haystack = "aaaaa", needle = "bba"
Output: -1
"""

In [None]:
# Reference: https://leetcode.com/problems/implement-strstr/description/

def contain_string(haystack, needle):
    if len(needle) == 0:
        return 0
    if len(needle) > len(haystack):
        return -1
    for i in range(len(haystack)):
        if len(haystack) - i < len(needle):
            return -1
        if haystack[i:i+len(needle)] == needle:
            return i
    return -1