In [None]:
"""Word Count Engine
===============================
Implement a document scanning function wordCountEngine, which receives a string 
document and returns a list of all unique words in it and their number of
occurrences, sorted by the number of occurrences in a descending order. If
two or more words have the same count, they should be sorted according to
their order in the original sentence. Assume that all letters are in english
alphabet. You function should be case-insensitive, so for instance, the words
“Perfect” and “perfect” should be considered the same word.

The engine should strip out punctuation (even in the middle of a word) and
use whitespaces to separate words.

Analyze the time and space complexities of your solution. Try to optimize
for time while keeping a polynomial space complexity.

Examples:

input:  document = "Practice makes perfect. you'll only
                    get Perfect by practice. just practice!"

output: [ ["practice", "3"], ["perfect", "2"],
          ["makes", "1"], ["youll", "1"], ["only", "1"], 
          ["get", "1"], ["by", "1"], ["just", "1"] ]
"""

"""My solution :  Wrong output, un-optmized"""
from collections import defaultdict
import string

def word_count_engine(document):
    docList = document.lower().split(' ')
    wordMap = defaultdict(list)
    try:
        for wordIndex in range(0, len(docList)):
            word = docList[wordIndex]
            count = 0
            order = 0
            for letter in word:
                if letter not in string.ascii_letters:
                    word = word.replace(letter, "")
            if word in wordMap:
                count = wordMap[word][0] + 1
                order = wordMap[word][1]
            else:
                count = 1
                order = wordIndex
                
            if word == "":
                pass
            else:
                wordMap[word] = [count, order]
    except Exception as e:
        print(e)
  
    wordList = sorted(wordMap.values(), key=lambda x: x[0], reverse=True)  
  
    output = []
    for value in wordList:
        for k, v in wordMap.items():
            if value == v:
                output.append([k,str(v[0])])
      
    return output
    
"""Correct solution :  optmized (time O(n), space O(n))
========================================================
Let document consist of of N words where M of them are unique (M ≤ N). 
The solution consists of two steps: 
1) parsing the string according to the criteria described in the problem 
and counting the number of occurrences of each word. 
2) sorting the [word, occurrence] pairs by the number of words’ occurrences 
in a descending order.

Step 1: we tokenize document into words by using whitespaces as delimiters.
For each word, we clean it from all non-alphabetic characters (digits, punctuation etc)
and convert it to lowercase to make counting case-insensitive. In this part, you should 
be leveraging whatever parsing capabilities your programming language of choice is 
providing. There is really no point of implementing functions that already exist.
As for counting, we’ll use a Map (Hash Table) to store words and their corresponding 
occurrences. A map is optimal in this case because it allows us find, store and
update operations in O(1) time complexity.

Step 2: as for the sorting part, rather than sorting the entries in the map
directly, which takes O(M⋅log(M)) - where M is number of unique words in 
document - a better solution will be to place words into an array of string
arrays indexed by the occurrence number and then iterate through the array 
in the reverse order. This is similar to a Bucket Sort. 
The proposed solution trades off a bit of space for performance, 
which may be a reasonable trade under certain circumstances.

Time Complexity: let N be the number of words in document and M 
the number of unique words in it (M ≤ N). Iterating over all words,
cleaning them and inserting them into a map takes O(N). 
The sorting step takes O(M) since notice that in the second loop, 
every word gets visited only once. The total time complexity is 
therefore O(N + M), which is O(N).

Space Complexity: wordMap takes O(M) space and the array of
strings array, counterList, takes another O(M). So, in total,
the space complexity is O(M).

Note: the reason we’re analyzing the problem complexity 
in terms of the number of words, and not number of characters 
is because the average length of an english word is ~5, 
so from a practical perspective this could be regarded as a
constant and therefore can be ignored (i.e. O(5N) = O(N))
"""
from collections import defaultdict
import string

def wordCountEngineOPTIMIZED(document):
    wordMap = {}
    wordList = document.lower().split(' ')
    
    for index in range(0, len(wordList)):
        currentWord = wordList[index]
        currentWordCount = 0
        
        for letter in currentWord:
            if letter not in string.ascii_letters: 
            # alternatively, you can also do ==> 
            #    if (ord(ch) >= ord('a') and ord(ch) <= ord('z')):
                currentWord = currentWord.replace(letter, "")
                            
        if currentWord in wordMap:
            currentWordCount = wordMap[currentWord] + 1
        else:
            currentWordCount = 1
                            
        if currentWord == "":
            pass
        else:
            wordMap[currentWord] = currentWordCount
                            
    counterList = [None for i in range(0, len(wordMap))]
    
    # add all words to a list indexed by the
    # corresponding occurrence number.
    for word, count in wordMap.items():
        wordCounterList = counterList[count]
        if (wordCounterList == None):
            wordCounterList = []
        wordCounterList.append(word)
        counterList[count] = wordCounterList

    # iterate through the list in reverse order (largest count to lowest count)
    # and add only non-null values to result
    result = []
    for count in range(len(counterList)-1, -1, -1):
        listOfWordsWithSameCount = counterList[count]
        if (listOfWordsWithSameCount == None or len(listOfWordsWithSameCount) == 0):
            continue
        else:
            for wordIndex in range(0, len(listOfWordsWithSameCount)):
                word = listOfWordsWithSameCount[wordIndex]
                result.append([word, str(count)])

    return result

document1 = "Practice makes perfect. you'll only get Perfect makes by practice. just practice!"
expected1 = "[['practice', '3'], ['makes', '2'], ['perfect', '2'], ['youll', '1'], ['only', '1'], ['get', '1'], ['by', '1'], ['just', '1']]" 
print("input: ", expected1)
print("\nMy solution: {}".format(word_count_engine(document1)))
print("\nCorrect solution: {}".format(wordCountEngineOPTIMIZED(document1)))
print(expected1.strip() == str(word_count_engine(document1)).strip() == str(wordCountEngineOPTIMIZED(document1)).strip())

In [None]:
"""      
Flatten a Dictionary
input:  dict = {
            "Key1" : "1",
            "Key2" : {
                "a" : "2",
                "b" : "3",
                "c" : {
                    "d" : "3",
                    "e" : {
                        "" : "1"
                    }
                }
            }
        }

output: {
            "Key1" : "1",
            "Key2.a" : "2",
            "Key2.b" : "3",
            "Key2.c.d" : "3",
            "Key2.c.e" : "1"
        }
A recursion is natural choice for this kind of problem. 
We iterate over the keys in dict and distinguish between two cases:
If the value mapped to a key is a primitive,
we take that key and simply concatenate it to the flattened 
key we created up to this point. We then map the resultant key 
to the value in the output dictionary. If the value is a dictionary,
we do the same concatenation, but instead of mapping the result
to the value in the output dictionary,
we recurse on the value with the newly formed key.

Time Complexity: O(N), where N is the number of keys in the input dictionary. 
We visit every key in dictionary only once, hence the linear time complexity.

Space Complexity: O(N) since the output dictionary is asymptotically as big as 
the input dictionary. We also store recursive calls in the execution 
stack which in the worst case scenario could be O(N), as well. 
The total is still O(N).
"""

def flattenDictionary(dictionary):
    flatDictionary = {}
    flattenDictionaryHelper("", dictionary, flatDictionary)
    
    return flatDictionary

def removeLastDot(key):
    if key.endswith("."):
        return key[:-1]
    else:
        return key

def flattenDictionaryHelper(initialKey, dictionary, flatDictionary):
    for key in dictionary.keys():
        value = dictionary[key]

        if (type(value) != dict): # the value is of a primitive type
            if ((initialKey == None) | (initialKey == "")):
                flatDictionary[key] = value
            else:
                newKey = removeLastDot(initialKey + "." + key)
                flatDictionary[newKey] = value
        else:
            if ((initialKey == None) | (initialKey == "")):
                flattenDictionaryHelper(key, value, flatDictionary)
            else:
                newKey = removeLastDot(initialKey + "." + key)
                flattenDictionaryHelper(newKey, value, flatDictionary)

d = { "Key1" : "1",
      "Key2" : {
        "a" : "2",
        "b" : 3,
        "c" : {
            "d" : "4",
            "e" : {
                "" : 5,
                'k': 6
            }
        }
    }
}
print(flattenDictionary(d))

In [None]:
"""
An H-tree is a geometric shape that consists of a repeating pattern
resembles the letter “H”.

It can be constructed by starting with a line segment 
of arbitrary length, drawing two segments of the same length
at right angles to the first through its endpoints, and continuing 
in the same vein, reducing (dividing) the length of the line segments
drawn at each stage by √2.

Write a function drawHTree that constructs an H-tree, 
given its center (x and y coordinates), a starting length,
and depth. Assume that the starting line is parallel to the X-axis.

Use the function drawLine provided to implement your algorithm.
In a production code, a drawLine function would render a real line
between two points. However, this is not a real production environment,
so to make things easier, implement drawLine such that it simply prints
its arguments (the print format is left to your discretion).

Analyze the time and space complexity of your algorithm.
In your analysis, assume that drawLine's time and space complexities
are constant, i.e. O(1).

We will start from the center point. Compute the coordinates of the 4 
tips of the H. Then we shall draw the 3 line segments of the H, i.e. 
left and right vertical of the H, and the connection of the two vertical 
segments. We will update the length and recursively draw 4 half-size 
H-trees of order one less than the current depth.

Time Complexity: every call of drawHTree invokes 
9 expressions whose time complexity is O(1) and 4 calls of drawHTree
until depth(denoted here as D) reaches to 0. 
Therefore: T(D) = 9 + 4 * T(D-1), where T is the time complexity
function and D is the depth of the H-Tree. 
Now, if we expand T(D-1) recursively all the way to T(0),
it’ll be easy to see that T(D) = O(4^D).

Space Complexity:  recursive calls add overhead since we store them 
in the execution stack. The space occupied in the stack will be then O(D),
in the worst case scenario. 
The stack space occupied will be no more than O(D) at any given
point since a sibling drawHTree will not be called before the current
one being executed returns (i.e. finishes its execution).
"""
import math 
import turtle

def drawLine(x1, y1, x2, y2):
    # draws line, assume implementation available
    print("({x1},{y1}) , ({x2},{y2})".format(x1=x1, y1=y1, x2=x2, y2=y2))
    point1 = (x1, y1)
    point2 = (x2, y2)
    turtle.penup()
    turtle.goto(point1)
    turtle.pendown()
    turtle.goto(point2)
    #turtle.hideturtle()
    turtle.exitonclick()

def drawHTree(x, y, length, depth):
    # recursion base case
    if (depth == 0):
        return

    x1 = x - length/2
    x2 = x + length/2
    y1 = y - length/2
    y2 = y + length/2

    # draw the 3 line segments of the H-Tree
    drawLine(x1, y1, x1, y2)    # left segment
    drawLine(x2, y1, x2, y2)    # right segment
    drawLine(x1, y,  x2,  y)    # connecting segment

    # at each stage, the length of segments decreases by a factor of √2
    newLength = length/math.sqrt(2)

    # decrement depth by 1 and draw an H-tree
    # at each of the tips of the current ‘H’
    drawHTree(x1, y1, newLength, depth-1)     # lower left  H-tree
    drawHTree(x1, y2, newLength, depth-1)     # upper left  H-tree
    drawHTree(x2, y1, newLength, depth-1)     # lower right H-tree
    drawHTree(x2, y2, newLength, depth-1)     # upper right H-tree

drawHTree(0.0, 0.0, 140, 3)

In [None]:
"""
Pairs with Specific Difference
Given an array arr of distinct integers and a nonnegative integer k, 
write a function findPairsWithGivenDifference that returns an array of
all pairs [x,y] in arr, such that x - y = k. If no such pairs exist, 
return an empty array.

Note: the order of the pairs in the output array should maintain the
order of the y element in the original array.
Examples:
    input:  arr = [0, -1, -2, 2, 1], k = 1
    output: [[1, 0], [0, -1], [-1, -2], [2, 1]]

    input:  arr = [1, 7, 5, 3, 32, 17, 12], k = 17
    output: []

Constraints:
    [time limit] 5000ms
    [input] array.integer arr
    0 ≤ arr.length ≤ 100
    [input]integer k
    k ≥ 0
    [output] array.array.integer

Pairs with Specific Difference
A naive approach is is to run two loops. 
The outer loop picks the first element (smaller element) 
and the inner loop looks up for the element picked 
by the outer loop plus k. 
While this solution is done in O(1) space complexity, 
its time complexity is O(N^2), which isn’t asymptotically optimal.

We can use a hash map to improve the time complexity to O(N⋅log(N))
for the worst case and O(N) for the average case. 
We rely on the fact that if x - y = k then x - k = y.

The first step is to traverse the array, and for each element arr[i], 
we add a key-value pair of (arr[i] - k, arr[i]) to a hash map. 
Once the map is populated, we traverse the array again, 
and check for each element if a match exists in the map.

Both the first and second steps take O(N⋅log(N)) for the worst
case and O(N) for the average case. So the overall time complexity
is O(N) for the average case.
    
"""
import itertools

# My Solution: Time: O(n^2), Space:  O(n)
def find_pairs_with_given_difference(arr, k):
    if (len(arr) == 0):
        return []
    
    if k < 0:
        return
    
    result_arr = []
    for elem in arr:
        for (x,y) in itertools.product(arr, [elem]):
            if x - y == k:
                #print("({x},{y}) = {k}: TRUE".format(x=x, y=y, k=k))
                result_arr.append([x, y])
            else:
                pass
    return result_arr

# Correct Solution: Time: O(N) , Space O(n)
def findPairsWithGivenDifference(arr, k):
    # since we don't allow duplicates, no pair can satisfy x - 0 = y
    if k == 0:
        return []
        
    map = {}
    answer = []
    """
    if x - y = k, then
    x - k = y
    """
    for x in arr:
        y = x - k
        map[y] = x
    
    for y in arr:
        if y in map:
            x = map[y]
            answer.append([x, y]) 
            
    return answer

arr = [0, -1, -2, 2, 1]
k = 1
arr2 = [1, 7, 5, 3, 32, 17, 12]
k2 = 17

print("My Brute-force solution: O(n^2)")
print("result1:", find_pairs_with_given_difference(arr, k))
print("result2: ", find_pairs_with_given_difference(arr2, k2))

print("\nFaster solution: O(2n), i.e., O(n)")
print("result1:", findPairsWithGivenDifference(arr, k))
print("result2: ", findPairsWithGivenDifference(arr2, k2))

In [None]:
"""
====================== Award Budget Cuts ============================
The awards committee of your alma mater (i.e. your college/university) 
asked for your assistance with a budget allocation problem they’re facing. 
Originally, the committee planned to give N research grants this year.
However, due to spending cutbacks, the budget was reduced to newBudget 
dollars and now they need to reallocate the grants. The committee made a 
decision that they’d like to impact as few grant recipients as possible by
applying a maximum cap on all grants. Every grant initially planned to be 
higher than cap will now be exactly cap dollars. Grants less or equal to cap, 
obviously, won’t be impacted.

Given an array grantsArray of the original grants and the reduced budget 
newBudget, write a function findGrantsCap that finds in the most efficient
manner a cap such that the least number of recipients is impacted and that
the new budget constraint is met (i.e. sum of the N reallocated grants 
equals to newBudget).

Analyze the time and space complexities of your solution.

Example:
input: 

output: 47 # and given this cap the new grants array would be
           # [2, 47, 47, 47, 47]. Notice that the sum of the
           # new grants is indeed 190
           
Constraints:
    [time limit] 5000ms
    [input] array.double grantsArray
    0 ≤ grantsArray.length ≤ 20
    0 ≤ grantsArray[i]
    [input] double newBudget
    [output] double
"""
def find_grants_cap(grantsArray, newBudget):
    # sort the array in a descending order.
    arr = sorted(grantsArray, reverse=True)
    print(arr)
    
    # pad the array with a zero at the end to
    # cover the case where 0 <= cap <= grantsArray[i]
    arr.append(0)
    n = len(grantsArray)
    oldBudget = 0
    for i in arr:
        oldBudget += i
    
    
    # calculate the total amount we need to
    # cut back to meet the reduced budget
    surplus = oldBudget - newBudget
  
    # if there is nothing to cut, simply return
    # the highest grant as the cap. Recall that
    # the grants array is sorted in a descending
    # order, so the highest grant is positioned
    # at index 0
    if surplus <= 0:
        return grantsArray[0]
     
    # start subtracting from surplus the
    # differences (“deltas”) between consecutive
    # grants until surplus is less or equal to zero.
    # Basically, we are testing out, in order, each
    # of the grants as potential lower bound for
    # the cap. Once we find the first value that
    # brings us below zero we break
    for i in range(0, n):
        surplus = surplus - ((i+1) * (arr[i] - arr[i+1]))  ##important
        if (surplus <= 0):
            break
  
    # since grantsArray[i+1] is a lower bound
    # to our cap, i.e. grantsArray[i+1] <= cap,
    # we  need to add to grantsArray[i+1] the
    # difference: (-total / float(i+1), so the
    # returned value equals exactly to cap.
    newCap = arr[i+1] + (-surplus / float(i+1))   ## important
    
    print(grantsArray)
    modified_array = [] 
    for i in range(0, len(grantsArray)):
        if (grantsArray[i] > int(newCap)):
            modified_array.append(newCap)
        elif (grantsArray[i] <= int(newCap)):
            modified_array.append(grantsArray[i])
            
    return newCap, modified_array

grantsArray = [2, 100, 50, 120, 1000]
newBudget = 190
print(find_grants_cap(grantsArray, newBudget))

In [None]:
"""
========================= Bracket Match =============================
A string of brackets is considered correctly matched if every 
opening bracket in the string can be paired up with a later closing 
bracket, and vice versa. For instance, “(())()” is correctly matched, 
whereas “)(“ and “((” aren’t. For instance, “((” could become correctly 
matched by adding two closing brackets at the end, so you’d return 2.

Given a string that consists of brackets, write a function bracketMatch
that takes a bracket string as an input and returns the minimum number
of brackets you’d need to add to the input in order to make it correctly matched.

Explain the correctness of your code, and analyze its time and space complexities.

Examples:

input:  text = “(()”
output: 1

input:  text = “(())”
output: 0

input:  text = “())(”
output: 2
Constraints:

[time limit] 5000ms

[input] string text

1 ≤ text.length ≤ 5000
[output] integer
"""


"""Only parenthesis matcher"""
from collections import deque
def bracketMatcher(string):
    s = deque()
    balanced = True
    index = 0
    needed = 0
    while index < len(string):
        symbol = string[index]
        if symbol == "(":
            needed += 1
            s.append(symbol)
        else:
            if len(s)==0:
                needed += 1
                balanced = False
            else:
                needed -= 1
                s.pop()

        index = index + 1

    if balanced and len(s)==0:
        return [string, True, "Needed: {} (NO) parenthesis.".format(needed)]
    else:
        return [string, False, "Needed: {} more parenthesis.".format(needed)]

print("Only parenthesis matcher")
print(bracketMatcher('((()))'))
print(bracketMatcher('(()()'))
print(bracketMatcher(')('))
print(bracketMatcher('))))))'))

"""All kinds of bracket matcher"""

from collections import deque
def allBracketMatcher(string):
    s = deque()
    balanced = True
    index = 0
    needed = 0
    while index < len(string):
        symbol = string[index]
        if symbol in "([{":
            needed += 1
            s.append(symbol)
        else:
            if len(s)==0:
                needed += 1
                balanced = False
            else:
                needed -= 1
                s.pop()
        index += 1
        
    if balanced and len(s)==0:
        return [string, True, "Needed: {} (NO) bracket.".format(needed)]
    else:
        return [string, False, "Needed: {} more bracket(s).".format(needed)]

print("\nAll kinds of brackets matcher")
print(allBracketMatcher('{{([][])}()}'))
print(allBracketMatcher(')[{(()]'))
print(allBracketMatcher(']])[{{()]'))

In [1]:
"""
Deletion Distance
The deletion distance of two strings is the minimum number of characters you need to delete in the two 
strings in order to get the same string. For instance, the deletion distance between "heat" and "hit" is 3:

By deleting 'e' and 'a' in "heat", and 'i' in "hit", we get the string "ht" in both cases.
We cannot get the same string from both strings by deleting 2 letters or fewer.
Given the strings str1 and str2, write an efficient function deletionDistance that returns the deletion distance between them. Explain how your function works, and analyze its time and space complexities.

Examples:

input:  str1 = "dog", str2 = "frog"
output: 3

input:  str1 = "some", str2 = "some"
output: 0

input:  str1 = "some", str2 = "thing"
output: 9

input:  str1 = "", str2 = ""
output: 0
"""

def deletionDistance(str1, str2):
    str1Len = len(str1)
    str2Len = len(str2)
    
    # allocate a 2D array with str1Len + 1 rows and str2Len + 1 columns
    memo = []

    for i in range(0, str1Len):
        for j in range(0, str2Len):
            if (i == 0):
                memo[i][j] = j  # Rule #1: This is true because if one string is the empty string, 
                                # we have no choice but to delete all letters in the other string.
            elif (j == 0):
                memo[i][j] = i  # Same as Rule #1 
                
            elif (str1[i-1] == str2[j-1]):        # Rule #2: This holds since we don’t need to delete the last 
                memo[i][j] = memo[i-1][j-1]       # letters in order to get the same string, we simply 
                                                  # use the same deletions we would to the (i-1)'th and 
                                                  # (j-1)'th prefixes.
            else:
                # Rule #3: This holds since we need to delete at least one of the letters str1[i] or str2[j] 
                # and the deletion of one of the letters is counted as 1 deletion (hence the 1 in the 
                # formula). Then, since we’re left with either the (i-1)'th prefix of str1, or the
                # (j-1)'th prefix of str2, need to take the minimum between opt(i-1,j) and opt(i,j-1).
                # We, therefore, get the equation opt(i,j) = 1 + min(opt(i-1,j), opt(i,j-1)).
                memo[i][j] = 1 + min(memo[i-1][j], memo[i][j-1])

    return memo[str1Len][str2Len]

"""
Time Complexity: we have a nested loop that executes O(1) steps at every iteration, 
thus we the time complexity is O(N⋅M) where N and M are the lengths of str1 and str2, respectively.

Space Complexity: we save every value of opt(i,j) in our memo 2D array, 
which takes O(N⋅M) space, where N and M are the lengths of str1 and str2, respectively.
"""
print(deletionDistance("", ""))
print(deletionDistance("", "hit"))
print(deletionDistance("neat", ""))
print(deletionDistance("heat", "hit"))
print(deletionDistance("hot", "not"))
print(deletionDistance("some", "thing"))
print(deletionDistance("abc", "adbc"))
print(deletionDistance("awesome", "awesome"))
print(deletionDistance("ab", "ba"))

IndexError: list index out of range

In [8]:
a = []
for i in range(8):
    for j in range(8):
        a.append([])
        
for i in a:
    print(a)

print("rows:", len(a))
print("columns:", (a[0][1]))

[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]
[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]
[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]
[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]
[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]
[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]
[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]
[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], 

IndexError: list index out of range