A notebook containing the solutions to most the CS whiteboarding questions asked during Insight Toronto Session 19C, as well as, some other miscellaneous questions from Stats and Machine Learning done during whiteboarding. If imports are required, they are included  above each question (instead of in the imports section) to enable  each section to be run on their own.

## Imports

Not necessary to run this, but I put these in almost every notebook.

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# If input or output variables using extraneous memory run below specify (in or out)
# %reset -f out
# %reset -f in

def sizeof_fmt(num, suffix='B'):
    ''' by Fred Cirera,  https://stackoverflow.com/a/1094933/1870254, modified'''
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f %s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f %s%s" % (num, 'Yi', suffix)

def get_var_sizes(local_vars = locals().items(), max_num=12):
    # if sys not imported then import it
    # after sys imported use 'copy' in sys.modules
    if "sys" not in dir():
        import sys
        
    for name, size in sorted(((name, sys.getsizeof(value)) for name, value in local_vars),
                             key= lambda x: -x[1])[:max_num]:
        print("{:>30}: {:>8}".format(name, sizeof_fmt(size)))
get_var_sizes()

## CS: Whiteboarding (Leetcode type)

Categories describing the difficulty are just my guess to what they would be on Leetcode or elsewhere. 

### Hard

#### Return a Deep Copy of a List

A linked list is given such that each node contains an additional random pointer which could point to any node in the list or null.

Return a deep copy of the list.

In [None]:
class Node:
    def __init__(self, val, next, random):
        self.val = val
        self.next = next
        self.random = random

In [None]:
def copyRandomList(head: 'Node') -> 'Node':
    if head == None:
        return None
    copy_head = Node(head.val, None, None)
    head_cur = head
    copy_cur = copy_head
    # Need None: None in case the random pointer is equal to None
    map_old_to_new = {None: None}
    i = 1
    # creates the initial list and maps original list references to new list references
    # this can be used to set the random nodes in the second loop
    while head_cur!= None:
        map_old_to_new[head_cur] = copy_cur
        head_cur = head_cur.next
        if head_cur == None:
            copy_cur.next = None
        else:
            copy_cur.next = Node(head_cur.val, None, None)
        copy_cur = copy_cur.next
        i+=1

    # Rerun through the lists setting the random pointers 
    # Passes the reference of the old list into the dictionary which is a mapping to the 
    # same nodes in the new list so used to set the random pointers in the new list
    head_cur = head
    copy_cur = copy_head
    while head_cur!= None:
        copy_cur.random = map_old_to_new[head_cur.random]
        head_cur = head_cur.next
        copy_cur = copy_cur.next

    return copy_head


In [None]:
# Make the graph
node1 = Node(1, None, None)
node2 = Node(2, None, None)
node1.next = node2
node1.random = node2
node2.random = node2

graph = copyRandomList(node1)

# Checks to make sure the graph was created correctly
assert graph.val == 1
assert graph != node1
assert graph.next.val == node2.val
assert graph.random != node2
assert graph.random == graph.next
assert graph.random.val == 2
assert graph.next.random == graph.next
assert graph.next.random.val == 2

#### Egg dropping problem (n eggs, k floors)

Functions to get minimum number of trials  needed in worst case with n eggs and k floors.


In [None]:
INT_MAX = 32767
def eggDrop_recur(n, k): 
    # If there are no floors, then no trials 
    # needed. OR if there is one floor, one 
    # trial needed. 
    if (k == 1 or k == 0): 
        return k 
  
    # We need k trials for one egg  
    # and k floors 
    if (n == 1): 
        return k 
  
    min = INT_MAX
  
    # Consider all droppings from 1st  
    # floor to kth floor and return  
    # the minimum of these values plus 1. 
    for x in range(1, k + 1): 
  
        res = max(eggDrop_recur(n - 1, x - 1),  
                  eggDrop_recur(n, k - x)) 
        if (res < min): 
            min = res 
  
    return min + 1
  
assert eggDrop_recur(2, 10) == 4

In [None]:
INT_MAX = 32767
def eggDrop_dynamicp(n, k): 
    # A 2D table where entry eggFloor[i][j] will represent minimum 
    # number of trials needed for i eggs and j floors. 
    eggFloor = [[0 for x in range(k+1)] for x in range(n+1)] 
  
    # We need one trial for one floor and 0 trials for 0 floors 
    for i in range(1, n+1): 
        eggFloor[i][1] = 1
        eggFloor[i][0] = 0
  
    # We always need j trials for one egg and j floors. 
    for j in range(1, k+1): 
        eggFloor[1][j] = j 
  
    # Fill rest of the entries in table using optimal substructure 
    # property 
    for i in range(2, n+1): 
        for j in range(2, k+1): 
            eggFloor[i][j] = INT_MAX 
            for x in range(1, j+1): 
                res = 1 + max(eggFloor[i-1][x-1], eggFloor[i][j-x]) 
                if res < eggFloor[i][j]: 
                    eggFloor[i][j] = res 
  
    # eggFloor[n][k] holds the result 
    return eggFloor[n][k]

assert eggDrop_dynamicp(2, 10) == 4
assert eggDrop_dynamicp(2, 100) == 14

#### Coin Change

Given a value N, if we want to make change for N cents, and we have infinite supply of each of `S = { S1, S2, .. , Sm}` valued coins, how many ways can we make the change? The order of coins doesn’t matter.

For example, for N = 4 and S = {1,2,3}, there are four solutions: {1,1,1,1},{1,1,2},{2,2},{1,3}. So output should be 4. For N = 10 and S = {2, 5, 3, 6}, there are five solutions: {2,2,2,2,2}, {2,2,3,3}, {2,2,6}, {2,3,5} and {5,5}. So the output should be 5.

[Dynamic Programming solution here](https://www.geeksforgeeks.org/coin-change-dp-7/)

In [None]:
def count_coins(S: str, n: int) -> int:
    """Count the ways we can sum the coins in S to get sum n."""
    if (n == 0):
        return 1  # 1 solution
    if (n < 0):
        return 0  # no solution
    # no coins and n not 0 so no solution
    if (not S and n >= 1):
        return 0

    # count is sum of solutions
    #  (i) including S[m-1] (ii) excluding S[m-1]
    return count_coins(S[:len(S)-1], n) + count(S, n-S[-1])

In [None]:
assert count_coins([1,2,3], 4) == 4
assert count_coins([2, 5, 3, 6], 10) == 5

### Medium

#### Array of squares (non-decreasing order)

Given an array of integers A sorted in non-decreasing order, return an array of the squares of
each number, also in sorted non-decreasing order.

In [None]:
def array_of_squares(A):
    # store temp answer
    answer = []
    l, r = 0, len(A) - 1
    # go from left and right index simultaneously 
    # appending the bigger value to the list
    while l <= r:
        left, right = abs(A[l]), abs(A[r])
        if left > right:
            # much more time efficient to do left*left instead of left**2
            # also more efficient to append then do answer += [left*left]
            answer.append(left*left)
            l += 1
        else:
            answer.append(right*right)
            r -= 1
    # returns the list in reverse order because sorted in decreasing order
    # not non-decreasing order
    return answer[::-1]

In [None]:
def array_of_squares_nonegs(A):
    # store temp answer
    answer = []
    l, r = 0, len(A) - 1
    # go from left and right index simultaneously 
    # appending the bigger value to the list
    while l <= r:
        left, right = abs(A[l]), abs(A[r])
        if left > right:
            # much more time efficient to do left*left instead of left**2
            # also more efficient to append then do answer += [left*left]
            answer.append(left*left)
            l += 1
        # if rarely have negative numbers this will be faster
        elif A[l] > -1:
            return answer[::-1].extend([x*x for x in A[l:r+1]][::-1])
        else:
            answer.append(right*right)
            r -= 1
    # returns the list in reverse order because sorted in decreasing order
    # not non-decreasing order
    return answer[::-1]

In [None]:
import collections
def array_of_squares_deque(A):
    # The above implementation is using a list as a queue. Directly
    # using a queue in collection might be faster/more efficient.
    answer = collections.deque()
    l, r = 0, len(A) - 1
    while l <= r:
        left, right = abs(A[l]), abs(A[r])
        if left > right:
            answer.appendleft(left*left)
            l += 1
        else:
            answer.appendleft(right*right)
            r -= 1
    return list(answer)

In [None]:
start=-1000
end=100000
skip=3
%timeit array_of_squares(range(start,end,skip))
%timeit array_of_squares_nonegs(range(start,end,skip))
%timeit array_of_squares_deque(range(start,end,skip))

In [None]:
assert array_of_squares([-10,-2,0,1,2,3]) == [0, 1, 4, 4, 9, 100]
assert array_of_squares_deque([-10,-2,0,1,2,3]) == [0, 1, 4, 4, 9, 100]
assert array_of_squares([-1,1,2,3]) == [1,1, 4, 9]
assert array_of_squares_deque([-1,1,2,3]) == [1,1, 4, 9]

#### All permutations of a given string using recursion

Write a program to print all permutations of a given string using recursion.  
A permutation, also called an “arrangement number” or “order,” is a rearrangement of the elements of an
ordered list `S` into a one‐to‐one correspondence with `S` itself. A string of length `n` has `n!` permutation.  
Below are the permutations of string `ABC`.
`[ABC ACB BAC BCA CBA CAB]`

In [None]:
def all_permutations(s, depth=0, perms=None):
    if perms == None: perms = []
    if depth == len(s)-1:
        perms.append(s)
    else:
        for ind in range(depth, len(s)):
            perm_s = list(s)
            perm_s[ind] = s[depth]
            perm_s[depth] = s[ind]
            all_permutations(''.join(perm_s), depth+1, perms)
    return perms

In [None]:
# Much more efficient using lists and backtrack than strings
def all_permutations_backtrack(a, l=0, r=len(a)-1, perms=None):
    if perms == None: perms = []
    if l==r:
        perms.append(''.join(a))
    else:
        for i in range(l,r+1):
            a[l], a[i] = a[i], a[l]
            all_permutations_backtrack(a, l+1, r, perms)
            a[l], a[i] = a[i], a[l] # backtrack
    return perms

In [None]:
%timeit all_permutations("abcdefg")
%timeit all_permutations_backtrack(list("abcdefg"))

In [None]:
assert all_permutations("abc") == ['abc', 'acb', 'bac', 'bca', 'cba', 'cab']
assert all_permutations_backtrack(list("abc")) == ['abc', 'acb', 'bac', 'bca', 'cba', 'cab']

#### Fibonacci Numbers

Memory saving way (non-recursive so doesn't have a bunch of calls on the stack):

In [None]:
def fibonacci(n): 
    a = 0
    b = 1
    if n < 0: 
        print("Incorrect input") 
    elif n == 0: 
        return a 
    elif n == 1: 
        return b 
    else: 
        for i in range(2,n): 
            c = a + b 
            a = b 
            b = c 
        return b 

Dynamic programming approach so the sub-array sums are saved and used if already calculated when the recursion reaches that point.

In [None]:
FibArray = [0,1] 
  
def fibonacci(n): 
    if n<0: 
        print("Incorrect input") 
    elif n<=len(FibArray): 
        return FibArray[n-1] 
    else: 
        temp_fib = fibonacci(n-1)+fibonacci(n-2) 
        FibArray.append(temp_fib) 
        return temp_fib 

#### Find days until warmer temperature in list

Given a list of daily temperatures T, return a list such that, for each day in the input, tells you how many days you would have to wait until a warmer temperature. If there is no future day for which this is possible, put 0 instead.
For example, given the list of temperatures T = [73, 74, 75, 71, 69, 72, 76, 73], your output should be [1, 1, 4, 2, 1, 1, 0, 0].

Note: The length of temperatures will be in the range [1, 30000]. Each temperature will be an integer in the range [30, 100].

In [None]:
from typing import List

In [None]:
def warmer_temps_store_index(temps: List[int]) -> List[int]:
    """Find the warmer temperatures by storing the index of the last occurence
    of that temperature. Then on new temperatures checking the min index for temperatures 
    above that temperature in the list. If none is found, then return `inf`."""
    indexes = [float('inf')] * 102
    ans = [0] * len(temps)
    for i in range(len(temps) - 1, -1, -1):
        # find the minimum index above the current temperature or finds `inf`
        warmer_ind = min(indexes[t] for t in range(temps[i]+1, 102))
        # if it found a value in the array other than `inf` then store 
        # it into the array otherwise remains 0
        if warmer_ind < float('inf'):
            ans[i] = warmer_ind - i
        indexes[temps[i]] = i
    return ans    
    

In [None]:
def warmer_temps_stack(temps: List[int]) -> List[int]:
    """Find the warmer temperatures by using a stack that as you process the list it is updated to
    always have the indexes of the hottest to coldest temperatures that have been seen above the current 
    temperature that are hotter than the current temperature."""
    ans = [0] * len(temps)
    stack = [] # stores idx from hot to cold
    for i in range(len(temps) - 1, -1, -1):
        # pop off the stack until reach a temperature in the stack 
        # that is greater than the current or empty
        while stack and temps[i] >= temps[stack[-1]]:
            stack.pop()
        
        if stack:
            ans[i] = stack[-1] - i
        stack.append(i)
    return ans    

In [None]:
assert warmer_temps_brute_force([73, 74, 75, 71, 69, 72, 76, 73]) == [1, 1, 4, 2, 1, 1, 0, 0]
assert warmer_temps_stack([73, 74, 75, 71, 69, 72, 76, 73]) == [1, 1, 4, 2, 1, 1, 0, 0]

### Easy

#### Max profit from stock ticker

Calculate the maximum profit (given an array representing the closing stock price each day) if you can only buy and sell once.

In [None]:
def max_buy_sell(arr):
    start = [arr[0], 0]
    max_diff = 0
    max_idx = [0, 0]
    for i in range(1,len(arr)):
        if max_diff < arr[i] - start[0]:
            max_idx = [start[1], i]
            max_diff = arr[i] - start[0]
        if start[0] > arr[i]:
            start = [arr[i], i]
    return max_diff, max_idx

In [None]:
assert max_buy_sell([0,1,2,3,4,1,2,5,6,-1,2,5,2,3,4]) == (6, [0, 8])
assert max_buy_sell([0,1,2,3,4,1,2,5,6,-1,2,7,2,3,4]) == (8, [9, 11])
assert max_buy_sell([6,2,3,1,0]) == (1, [1,2])
assert max_buy_sell([6,4,3,2,1]) == (0, [0,0])

#### Pairs of integers whose sum equals a given number

How do you find all pairs of integers in an integer array whose sum is equal to a given number?

In [None]:
def all_pairs_sum_count(arr, num):
    hashset = set()
    pairs = []
    for i in arr:
        if num - i in hashset:
            pairs.append([num - i, i])
        hashset.add(i)
    return pairs

assert all_pairs_sum_count([1,2,3,19,10,10,9,11], 20) == [[1,19],[10,10],[9,11]]
# Not sure how duplicate pairs (like 10 three times) are supposed to be treated 
# could be tracked using a dictionary instead of a set()
# Then either discarded or a count kept of the number of those pairs seen
assert all_pairs_sum_count([1,2,3,19,10,10,10,9,11], 20) == [[1,19],[10,10],[10,10],[9,11]]

#### Remove duplicates in a sorted array in-place

Given a sorted array nums, remove the duplicates in-place such that each element appear only once and return the new length. Do not allocate extra space for another array, you must do this by modifying the input array in-place with O(1) extra memory.

In [None]:
def remove_dups_in_place(nums):
    if len(nums) == 0:
        return 0
    cur_end = 1
    for i in range(1, len(nums)):
        prev_val = nums[i-1]
        if nums[i] != prev_val:
            if cur_end != i:
                nums[cur_end] = nums[i]
            cur_end += 1
    return cur_end

assert remove_dups_in_place([1,2,2]) == 2
assert remove_dups_in_place([1,1,2]) == 2
assert remove_dups_in_place([1]) == 1
assert remove_dups_in_place([]) == 0

#### Largest Perimeter of a Triangle

Given an array A of positive lengths, return the largest perimeter of a triangle with non-zero area, formed from 3 of these lengths.

If it is impossible to form any triangle of non-zero area, return 0.


In [None]:
def largest_perim(arr):
    if len(arr) < 3:
        return 0
    arr.sort(reverse=True)
    for i in range(2,len(arr)):
        if arr[i-2] < arr[i-1] + arr[i]:
            return arr[i-2] + arr[i-1] + arr[i]
    return 0

In [None]:
assert largest_perim([1,2]) == 0
assert largest_perim([1,2,2]) == 5
assert largest_perim([1,2,5]) == 0
assert largest_perim([1,2,3,5,7,15]) == 15

#### Longest non-repeating substring (by characters) in a string

Return the longest substring with non-repeating characters.

In [None]:
def non_repeating_substring(s):
    if s == '':
        return 0
    max_count = 0
    count = 1
    sub_chars = set(s[0])
    for char in s[1:]:
        if char in sub_chars:
            max_count = count
            count = 1
            sub_chars = set(char)
        else:
            count += 1
            sub_chars.add(char)
    if count > max_count:
        return count
    else:
        return max_count

In [None]:
assert non_repeating_substring('abaacde') == 4
assert non_repeating_substring('abaacdeacde') == 4
assert non_repeating_substring('abcde') == 5
assert non_repeating_substring('abaac') == 2
assert non_repeating_substring('') == 0
assert non_repeating_substring('aaaa') == 1

#### Convert a roman numeral to an integer

Given a roman numeral, convert it to an integer. Input is guranteed to be within the range from 1 to 3999.

In [None]:
def convert_roman_to_num(s):
    roman_to_num = {"I": 1, "V": 5, "X": 10, "L": 50, 
                     "C": 100, "D": 500, "M": 1000}
    num = 0
    for i in range(1, len(s)):
        prev = roman_to_num[s[i-1]]
        cur = roman_to_num[s[i]]
        if prev < cur:
            num -= prev
        else:
            num += prev
    return num + roman_to_num[s[-1:]]

In [None]:
assert convert_roman_to_num("MCMXCIV") == 1994
assert convert_roman_to_num("LVIII") == 58
assert convert_roman_to_num("IV") == 4
assert convert_roman_to_num("I") == 1
assert convert_roman_to_num("MMMCMXCIX") == 3999

#### Calculate maximum sum of a contiguous subarray

Given an integer array nums, find the contiguous subarray (containing at least one number)
which has the largest sum and return its sum.

In [None]:
def calculate_max_sum_subarry(nums):
    max_sum = nums[0]
    cur_sum = nums[0]
    for i in range(1,len(nums)):
        cur_sum += nums[i]
        if max_sum < cur_sum:
            max_sum = cur_sum
        if cur_sum < 0:
            cur_sum = 0
    return max_sum
            
assert calculate_max_sum_subarry([1, -5, -10, -2, 2]) == 2
assert calculate_max_sum_subarry([1, 3, -1, 5, 2]) == 10
assert calculate_max_sum_subarry([1, -1, 5, 2]) == 7
assert calculate_max_sum_subarry([1, -10, -5, -2]) == 1
assert calculate_max_sum_subarry([-10, -5, -2]) == -2
assert calculate_max_sum_subarry([-2, -5]) == -2
assert calculate_max_sum_subarry([-5]) == -5

#### Random uniform sampling from triangle

Write a Python function sampleFromTriangle(n) that return a list of n points (x,y) uniformly sampled from within the triangle bounded by (0,0),(0,1),(1,0). Use random.uniform(a, b)  from the random package.

In [None]:
def turn_off_top_right_axis_ticks(ax):
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.yaxis.set_ticks_position('left')
    ax.xaxis.set_ticks_position('bottom')


In [None]:
import random
import matplotlib.pyplot as plt

def sample_from_triangle(n, method='throwaway'):
    points = []
    while len(points) < n:
        a = random.uniform(0,1)
        b = random.uniform(0,1)
        if a + b < 1:
            points.append((a,b))
        elif method == 'remap':
            # remap the points into the triangle
            a = 1 - a
            b = 1 - b
            points.append((a,b))

    return points

# Both triangles seeded the same (would get the same points 
# if the points outside the triangle not thrown away)
rand_seed = random.randint(1,100000)
random.seed(rand_seed)
points = sample_from_triangle(200)
random.seed(rand_seed)
points_re = sample_from_triangle(200, 'remap')
# unzip points into x, y for plotting
x, y = zip(*points)
x_re, y_re = zip(*points_re)

# Setup and plot both triangles
fig = plt.figure(figsize=(9,4))

ax = fig.add_subplot(1, 2, 1, xlim=(0,1), ylim=(0,1))
ax.scatter(x, y)
ax.plot((0,1), (1,0), 'r--')
turn_off_top_right_axis_ticks(ax)

ax1 = fig.add_subplot(1, 2, 2, xlim=(0,1), ylim=(0,1))
ax1.scatter(x_re, y_re)
ax1.plot((0,1), (1,0), 'r--')
turn_off_top_right_axis_ticks(ax1)

#### Transpose a Matrix

##### Standard

In [None]:
def transpose(A, B, M=3, N=4): 
    for i in range(N): 
        for j in range(M): 
            B[i][j] = A[j][i]

##### One Line (using Pythons fancy list indexing)

In [None]:
def transpose_fancy_index(A):
    n = len(A[0])
    B = sum(A, [])
    return [B[i::n] for i in range(n)]

In [None]:
def transpose_fancy_index_one_line(A):
    return [sum(A, [])[i::n] for i in range(len(A[0]))]

##### One Line (using list comprehension)

In [None]:
# mat = [[1,2],[3,4],[5,6]] 
mat = [[1,2,3],[4,5,6],[7,8,9]] 
new_mat = [[row[i] for row in mat] for i in range(len(mat[0]))]
print("Transpose is:")
for row in new_mat: 
    print(row) 

##### One Line (using zip)

In [None]:
# mat = [[1,2],[3,4],[5,6]] 
mat = [[1,2,3],[4,5,6],[7,8,9]] 
result_zip = zip(*mat) 
print("Transpose using zip is:")
for row in result_zip:
    print(row) 

#### Check if Two Strings are Permutation of Each Other

Given two strings, write a method to decide if one is a premutation of the other.

In [None]:
from collections import defaultdict
import re


def is_permutation(str1: str, str2: str, case: bool = True, space: bool = True) -> bool:
    """Checks if the two strings are permutations of each other. 
    Default is whitespace and case matters."""
    if case == False:
        str1 = str1.lower()
        str2 = str2.lower()
    if space == False:
        # prefer using regex but built-in string functions can do the same (both below)
        # these get rid spaces, tabs, and newlines not just spaces
        # str1 = "".join(str1.split()) # split gets rid of any spaces, tabs, newlines
        # str2 = "".join(str2.split())
        str1 = re.sub(r"\s+", "", str1)
        str2 = re.sub(r"\s+", "", str2)
    if len(str1) != len(str2):
        return False
    char_map1 = defaultdict(int)
    char_map2 = defaultdict(int)
    # Since both strings are the same length run through loading each character
    # into a hashmap. Then check if the hashmaps have equal number of characters.
    for i in range(len(str1)):
        char_map1[str1[i]] += 1
        char_map2[str2[i]] += 1
    # Checks to see if every key exists in the other hashmap and the values are the same
    for key in char_map1:
        if key not in char_map2 or char_map2[key] != char_map1[key]:
            return False
    return True


assert is_permutation("blah", "halb") == True
assert is_permutation("blah", "halB") == False
assert is_permutation("bLah", "halB") == False
assert is_permutation("bla h", "halb ") == True
assert is_permutation("bla h", "halb") == False
assert is_permutation("bla hh", "halb") == False
assert is_permutation("bla h\n", "h\nalb ") == True
assert is_permutation("bla h\n", "h\n\nalb ") == False
assert is_permutation("bLah", "halB", case=False, space=False) == True
assert is_permutation("bla h", "halb", case=False, space=False) == True
assert is_permutation("bla hh", "halb", case=False, space=False) == False
assert is_permutation("bla h\nh", "h\n\nalb", case=False, space=False) == False
assert is_permutation("bla \t\nh\n", "halb\n\n\n",
                      case=False, space=False) == True

#### Counting valleys and peaks of mountain

Gary is a hiker and for every step he took he noted down if it was an uphill, U , or a downhill, D , step. Gary's hikes start and end at sea level and each step up or down represents a unit change in altitude.

We define the following terms: 
  ● A mountain is a sequence of consecutive steps above sea level, starting with a step up from sea level and ending with a step down to sea level. 
  ● A valley is a sequence of consecutive steps below sea level, starting with a step down from sea level and ending with a step up to sea level. 

Given Gary's sequence of up and down steps during his last hike, find and print the number of (peaks, valleys) he walked through ? 


In [None]:
def find_num_peaks_valleys(s):
    """Finds the peaks and Valleys from a string of U and D's. Only works
    correctly when starts and ends at sea level."""
    valleys = 0
    peaks = 0
    cur_val = 0
    in_valley = False
    for char in s:
        # stored here to check below when reach sea level whether came from
        # a valley or a peak
        temp_val = cur_val
        if char == "D":
            cur_val -= 1
        else:
            cur_val += 1
        if cur_val == 0:
            if temp_val < 0:
                valleys += 1
            else:
                peaks += 1
    return peaks, valleys

assert find_num_peaks_valleys("DDUUUUUDDD") == (1, 1) # 1 peak and 1 valley
assert find_num_peaks_valleys("DUDUDU") == (0, 3)
assert find_num_peaks_valleys("UUDUDUDUDD") == (1, 0)

#### Determine if input string of different brackets is valid "[({})]"

Given a string containing just the characters '(', ')', '{', '}', '[' and ']', determine if the input string is valid.   
An input string is valid if:  
  • Open brackets must be closed by the same type of brackets.  
  • Open brackets must be closed in the correct order.  
  • Note that an empty string is also considered valid.  


In [None]:
b = []
b.append('[')
b.append('{')
a = b.pop()
print(a, b)

In [None]:
def brackets_are_valid(s: str) -> str:
    opening = '[{('
    closing = ')}]'
    open_close_map = {'(': ')', '{': '}', '[': ']'}
    brac_stack = []
    for brac in s:
        if brac in opening:
            brac_stack.append(brac)
        elif brac in closing:
            # check to see if the closing bracket matches the most recent open bracket
            if open_close_map[brac_stack.pop()] != brac:
                return False
        elif brac == " ":
            continue
        # if put in non-bracket chars besides space will return false
        # Just remove this to not care about any chars between brackets
        else:
            return False
    if len(brac_stack) == 0:
        return True
    else:
        return False
    

In [None]:
assert brackets_are_valid("()") == True
assert brackets_are_valid("") == True
assert brackets_are_valid("()[]{}") == True
assert brackets_are_valid("(]") == False
assert brackets_are_valid("(]") == False
assert brackets_are_valid("([)]") == False
assert brackets_are_valid("{[()]}") == True
assert brackets_are_valid("{[()]") == False
assert brackets_are_valid("{ [( )]}") == True

#### Maximum amount of split balanced strings

Balanced strings are those who have equal quantity of ’L’ and ’R’ characters. Given a
balanced string s split it in the maximum amount of balanced strings. Return the maximum
amount of splitted balanced strings.  

**Example 1:**  
Input: s = "RLRRLLRLRL"  
Output: 4  
Explanation: s can be split into "RL", "RRLL", "RL", "RL", each substring contains same number of ’L’ and ’R’.

In [None]:
def split_strings(s: str) -> int:
    if len(s) == 0:
        return 0
    char_count = 0
    splits = 0
    for char in s:
        if char == 'R':
            char_count += 1
        else:
            char_count += -1
        if char_count == 0:
            splits += 1

    return splits

assert split_strings('RLRRLLRLRL') == 4
assert split_strings('RLLLLRRRLR') == 3
assert split_strings('LLLLRRRR') == 1

#### Given two binary strings, return their sum

In [None]:
def binary_sum_oneline(str1: str, str2: str) -> str:
    return format(int(str1, 2) + int(str2, 2), 'b')

In [None]:
def get_num_from_binary_string_expanded(s: str) -> int:
    num = 0
    for i, c in enumerate(s[::-1]):
        if c == '1':
            num += 2**i
    return num

In [None]:
def get_num_from_binary_string(s: str) -> int:
    return sum([2**i if c == '1' else 0 for i, c in enumerate(s[::-1])])

In [None]:
def binary_sum(str1: str, str2: str) -> str:
    n1 = get_num_from_binary_string(str1)
    n2 = get_num_from_binary_string(str2)
    n_sum = n1 + n2
    nb = []
    for exp in range(16,-1,-1):
        if n_sum//2**exp >= 1:
            nb.append(str(1))
            n_sum = n_sum % 2**exp
        elif nb:
            nb.append(str(0))
    return ''.join(nb)

In [None]:
assert binary_sum('11', '1') == '100'
assert binary_sum('11', '10') == '101'
assert binary_sum_oneline('11', '1') == '100'
assert binary_sum_oneline('11', '10') == '101'

#### Share contiguous segment of a bar such that length matches birth month and sum is birth day

Lily has a chocolate bar and she wants to share it with Ron for his birthday. Each of the squares has an integer on it. She decides to share a contiguous segment of the bar selected such that the length of the segment matches Ron's birth month and the sum of the integers on the squares is equal to his birth day. You must determine how many ways she can divide the chocolate.

Consider the chocolate bar as an array of squares, [2, 2, 1, 3, 2]. She wants to find segments summing to Ron's birth day, d=4, with a length equalling his birth month, m=2. In this case, there are two segments meeting her criteria: [2, 2]  and [1,3].



In [None]:
def share_bar(arr, d, m):
    count = 0
    for i in range(len(arr) - m + 1):
        if sum(arr[i:i+m]) == d:
            count += 1
    return count

assert share_bar([2, 2, 1, 3, 2], 4, 2) == 2

#### Relpace letter i with number 1 in string

Write a function that takes in a string consisting of only letters and replaces any occurence of the letter 'i' with the number '1'.  
`Example: input 'Insight' returns output '1ns1ght'`

In [None]:
def replace_i(s: str) -> str:
    """This is one way using a list to replace all ["I","i"] with 1s. 
    """
    return "".join([str(1) if char.upper() == "I" else char 
                    for char in s])

def replace_i_map(s: str) -> str:
    """This is one way using map to replace all ["I","i"] with 1s. 
    """
    return "".join(map(lambda char: str(1) if char.upper() == "I" else char, s))

def replace_i_strreplace(s: str) -> str:
    """This is one way using str.replace() to replace all ["I","i"] with 1s. 
    """
    str1 = str(1)
    s.replace("i", str1)
    s.replace("I", str1)
    return s

%timeit replace_i("Insight") == '1ns1ght'
%timeit replace_i_map("Insight") == '1ns1ght'
%timeit replace_i_strreplace("Insight") == '1ns1ght'

#### Swaps the case of every character

Write a function that takes in a string consisting of only letters and swaps the case of every character.  
`Example: input 'Insight' returns output 'iNSIGHT'`

In [None]:
for cnum in [ord(char) for char  in "test"]:
    print(cnum)

In [None]:
def swap_case_charcodes(s: str) -> str:
    """Swap case using the character codes. First check if above character code 90,
    which is capital "Z". Then subtract 32 or add 32 to get the lower case or upper case code.
    Can also use the built-in string method or a stored dictionary for the mapping.
    """
    return "".join([chr(cnum - 32)
                    if cnum > 90 else chr(cnum + 32) 
                    for cnum in [ord(char) for char  in s]])

def swap_case_map(s: str) -> str:
    """Swap case using a character code map. First check if above character code 90,
    which is capital "Z". Then subtract 32 or add 32 to get the lower case or upper case code.    
    """
    lower = [chr(i) for i in range(97, 123)]
    upper = [chr(i) for i in range(65, 90)]
    map_l_u = dict(zip(lower,upper))
    map_u_l = dict(zip(upper,lower))
    return "".join([map_l_u[char] 
                    if char in map_l_u else map_u_l[char]
                    for char in s])

lower = [chr(i) for i in range(97, 123)]
upper = [chr(i) for i in range(65, 90)]
map_l_u = dict(zip(lower,upper))
map_u_l = dict(zip(upper,lower))
def swap_case_map_outside(s: str) -> str:
    """Swap case using a map that isn't built inside the function. First check if above character code 90,
    which is capital "Z". Then subtract 32 or add 32 to get the lower case or upper case code.
    
    """
    return "".join([map_l_u[char] 
                    if char in map_l_u else map_u_l[char]
                    for char in s])

def swap_case_str(s: str) -> str:
    """Swap case using built-in method str.swapcase()
    """
    return s.swapcase()

%timeit assert swap_case_charcodes("Insight") == 'iNSIGHT'
%timeit assert swap_case_map("Insight") == 'iNSIGHT'
%timeit assert swap_case_map_outside("Insight") == 'iNSIGHT'
%timeit assert swap_case_str("Insight") == 'iNSIGHT'

## CS: Sorting and Searching

### Binary Search

In [None]:
def binary_search(alist, start, end, key):
    """Search key in sorted alist[start... end - 1] and return the 
    index of the key if found or -1 otherwise.
    """
    if not start < end:
        return -1
 
    mid = (start + end)//2
    if alist[mid] < key:
        return binary_search(alist, mid + 1, end, key)
    elif alist[mid] > key:
        return binary_search(alist, start, mid, key)
    else:
        return mid

In [None]:
arr = sorted([8, 9, 3, 2, 6, 7, 4])
print(arr)
key = 8
start = 0
end = len(arr)
print("Index is:", binary_search(arr, start, end, key))

### Bubble Sort

Write a Python function that implements bubble sort on a non-empty array. What is the algorithm’s time complexity? How many times is the ‘if’ statement tested as a function of the length n of the array to sort? If your answer is $n^{2}$, propose a small modification of the algorithm that reduces it to $\frac{n(n+1)}{2}$. Following this modification, has the algorithm’s time complexity changed?

In [None]:
def swap_nums(arr, i, j):
    temp = arr[i]
    arr[i] = arr[j]
    arr[j] = temp

def bubble_sort(nums):
    # Time complexity is O(n) if it's already sorted
    for i in range(len(nums)-1,-1,-1):
        j = 0
        sorted = True
        # This solves not having n^2 if comparisons because stops 
        # on the part of the array that is already sorted
        while j < i:
            if nums[j] > nums[j+1]:
                swap_nums(nums, j, j+1)
                sorted = False
            j+=1
        # break out of the loop when run pass is done without a swap (means sorted)
        if sorted == True:
            break
    return nums
assert bubble_sort([1, 2, 3]) == [1,2,3]
assert bubble_sort([3, 2, 1]) == [1,2,3]
assert bubble_sort([10, 8, -10, 20, 5]) == [-10, 5, 8, 10, 20]
assert bubble_sort([64, 34, 25, 12, 22, 11, 90]) == [11, 12, 22, 25, 34, 64, 90]

## Stats

### Cross-Validation (SKLearn)

What are different cross validation strategies implemented in SKLearn?

In [None]:
from sklearn import datasets
from sklearn.model_selection import ShuffleSplit, cross_val_score
from sklearn import svm
from sklearn import metrics

iris = datasets.load_iris()
clf = svm.SVC(kernel='linear', C=1)

# Cross validate with f1-score (leave scoring blank to get accuracy)
# No overlap in testing values for each fold
cvs = cross_val_score(clf, iris.data, 
                      iris.target, cv=5, scoring='f1_macro')
print("Scores:", cvs)
print("Avg Score:", sum(cvs)/len(cvs))

# Can use ShuffleSplit to specify the size of the train/test sets for each split
# Therefore, there can be overlap in testing values across folds
cv = ShuffleSplit(n_splits=5, test_size=0.3, random_state=0)
print("\nPrint out all the splits (shows test repeats values):")
for tr, te in cv.split(iris):
    print("tr:", tr, "test:", te)
cvs1 = cross_val_score(clf, iris.data, iris.target, cv=cv)  
print("\nScores:", cvs1)
print("Avg Score:", sum(cvs1)/len(cvs1))

### ANOVA

In [None]:
import numpy as np

Use the data below, showing a summary of highway gas mileage for several observations, to decide if 
the average highway gas mileage is the same for midsize cars, SUV’s, and pickup trucks.

Test the appropriate hypotheses at the α = 0.01 level.

In [None]:
cars = ['mid', 'suv', 'pickup']
mean = np.array([25.8, 22.68, 21.29])
n = np.array([31, 31, 14])
std = np.array([2.56, 3.67, 2.76])

#### Solution

$H_0$ is that all the means are the same ($\mu_M = \mu_S = \mu_P$)
$H_a$ is that $H_0$ is not true

Run an ANOVA to check if the means of these three groups are different from each other.

Need to find out first the Sum of Squares Between, which is the variability due to interaction between the groups:    
$SSbetween = \frac{\sum(\sum k_i) ^2} {n} - \frac{T^2}{N}$  

Then the variability in the data due to differences within people:  
$SSwithin = \sum Y^2 - \frac{\sum (\sum a_i)^2}{n}$  

Sum of Squares Total will be needed to calculate eta-squared later. This is the total variability in the data:  
$SStotal = \sum Y^2 - \frac{T^2}{N}$

Then calculate:  
`MSbetween = SSbetween/DFbetween`  
`MSwithin = SSwithin/DFwithin`  
`F = MSbetween/MSwithin`  

Use this to reject the null hypothesis:
```
import scipy.stats as stats
p = stats.f.sf(F, DFbetween, DFwithin)
```


## Machine Learning

### Iris SKLearn Modeling

Run the Iris dataset through basic models in SKLearn and compare scores for different classes using the whole training set and with a train/test split.

#### No Train/Test Split

In [None]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

In [None]:
iris = load_iris()

In [None]:
print(iris.data.shape, iris.target_names)

In [None]:
reg = LogisticRegression(solver='lbfgs', 
                         multi_class='auto', 
                         max_iter=500).fit(iris.data, iris.target)

In [None]:
print("Accuracy is:", reg.score(iris.data, iris.target))

In [None]:
targets = (iris.target == 0, iris.target == 1, iris.target == 2)
print("Per class accuracy is:")
for i, targ in enumerate(targets):
    print(iris.target_names[i], reg.score(iris.data[targ], iris.target[targ]))

In [None]:
naive = GaussianNB().fit(iris.data, iris.target)

In [None]:
print("Accuracy is:", naive.score(iris.data, iris.target))

In [None]:
print("Per class accuracy is:")
for i, targ in enumerate(targets):
    print(iris.target_names[i], naive.score(iris.data[targ], iris.target[targ]))

In [None]:
rf = RandomForestClassifier(n_estimators=100).fit(iris.data, iris.target)

In [None]:
print("Accuracy is:", rf.score(iris.data, iris.target))

In [None]:
print("Per class accuracy is:")
for i, targ in enumerate(targets):
    print(iris.target_names[i], rf.score(iris.data[targ], iris.target[targ]))

#### Train and Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target)

In [None]:
reg = LogisticRegression(solver='lbfgs', max_iter=500, multi_class='auto').fit(X_train, y_train)
reg.score(X_test, y_test)

In [None]:
targets = (y_test == 0, y_test == 1, y_test == 2)
print("Per class accuracy is:")
for i, targ in enumerate(targets):
    print(iris.target_names[i], reg.score(X_test[targ], y_test[targ]))

In [None]:
gauss = GaussianNB().fit(X_train, y_train)
gauss.score(X_test, y_test)

In [None]:
print("Per class accuracy is:")
for i, targ in enumerate(targets):
    print(iris.target_names[i], gauss.score(X_test[targ], y_test[targ]))

In [None]:
rf = RandomForestClassifier(n_estimators=100).fit(X_train, y_train)
rf.score(X_test, y_test)

In [None]:
print("Per class accuracy is:")
for i, targ in enumerate(targets):
    print(iris.target_names[i], gauss.score(X_test[targ], y_test[targ]))

## End