In [9]:
# Copying a dictionary into another

In [10]:
a = {"a":2, "an":5, "the":3}
b = a.copy()
print(b)

{'a': 2, 'an': 5, 'the': 3}


In [11]:
c = dict([("a",2),("an",5),("the",3)])
print(c)

{'a': 2, 'an': 5, 'the': 3}


In [12]:
d = dict.fromkeys(["a","an","the"], 10)
print(d)

{'a': 10, 'an': 10, 'the': 10}


In [13]:
# Dictionaries are mutable. Tuples are immutable

In [14]:
print(d["a"])
print(d.get("a"))

10
10


In [15]:
# d["abc"] gives error because the key "abc" doesn't exist in the dictionary.
# Whereas d.get("abc") doesn't give error and returns None.

In [16]:
print(d.get("abc"))
print(d.get("abc", 100)) # If the key is not present, it will return 100

None
100


In [17]:
print(a.keys()) # Returns all the keys
print(a.items()) # Returns pair values of keys, values

dict_keys(['a', 'an', 'the'])
dict_items([('a', 2), ('an', 5), ('the', 3)])


In [18]:
# Prints all the keys
for x in a:
    print(x)

a
an
the


In [19]:
# Prints all the values of keys

for x in a:
    print(a[x])

2
5
3


In [20]:
# Prints all the keys

for x in a.keys():
    print(x)

a
an
the


In [21]:
# Prints all the values of keys

for x in a.values():
    print(x)

2
5
3


In [24]:
# Updating one dictionary with another

dict1 = {'a':5,'b':10,'c':15}
dict2 = {'b':2,'c':3,'d':4}
dict1.update(dict2) # The values of common keys in dict1 will be updated as per dict2. 
# Any keys present in dict2 but not in dict1 will be added to dict1
dict1

{'a': 5, 'b': 2, 'c': 3, 'd': 4}

In [27]:
# Removing keys from a dictionary

a = {'a':5,'b':6,'c':7}
print(a.pop('b')) # a.pop('b') will print the value of the key 'b'
print(a)

6
{'a': 5, 'c': 7}


In [49]:
a = {'a':5,'b':6,'c':7,'d':6}
del a['c']
max(a)

'd'

In [34]:
a = {'a':5,'b':6,'c':7}
a.clear()
a

{}

In [None]:
del a # This deletes the dictionary itself

### --------------------------------------------------------------------------------------------------------------------------------------------------------------

In [35]:
# Print all the words with frequency k

In [40]:
def frequency(string, k):
    words = string.split(" ")
    freq = {}
    
    for word in words:
        freq[word] = freq.get(word,0) + 1
    
    for key in freq:
        if freq[key] == k:
            print(key)

In [41]:
def frequency_2(string):
    words = string.split(" ")
    freq = {}
    
    for word in words:
        if word not in freq:
            freq[word] = 1
        else:
            freq[word] += 1
    
    for key in freq:
        if freq[key] == k:
            print(key)

### --------------------------------------------------------------------------------------------------------------------------------------------------------------

In [50]:
# You are given an array of integers that contain numbers in random order. 
# Write a program to find and return the number which occurs the maximum times in the given input.
# If two or more elements contend for the maximum frequency, return the element which occurs in the array first.

In [51]:
def frequency(arr):
    freq = {}
    for num in arr:
        freq[num] = freq.get(num,0) + 1

    ans = arr[0]
    for num in arr:
        if freq[num] > freq[ans]:
            ans = num

    return ans

In [1]:
# Given a random integer array A of size N. Find and print the count of pair of elements in the array which sum up to 0.
# Note: Array A can contain duplicate elements as well.

In [4]:
def pair_sum(arr, n):
    count = 0
    for i in range(n):
        for j in range(i+1,n):
            if arr[i] + arr[j] == 0:
                count += 1
    return count

# Time complexity = O(n^2)

In [13]:
arr = [2,-2,1,-1,3,4,3,-1,-2,-3,0,0,0,0,0]
n = len(arr)
pair_sum(arr,n)

16

In [18]:
def pair_sum_2(arr, n):
    arr_dict = {}
    for element in arr:
        arr_dict[element] = arr_dict.get(element, 0) + 1
    count = 0
    for key in arr_dict:
        if key > 0:
            count += arr_dict[key] * arr_dict.get(-key, 0)
        if key == 0:
            x = arr_dict[key]
            count += int(((x-1)*x)/2)
    return count

# Time complexity = O(n)
# Space complexity = O(n)

In [19]:
pair_sum_2(arr,n)

16

In [20]:
# In a hashmap/dictionary, the insertion of key and access of key is very fast

**Building a hashmap/dictionary**

The keys of a hashmap are stored in a bucket array. Before storing the key in a bucket array, we need to convert the key into
an integer (the integer is an index of the bucket array) which is done with the help of a hashfunction.

What is hashfunction?
A hashfunction consists of 2 parts - Hashcode and Compression function.

When we pass a key through a hashfunction, we get a hashcode. The hashcode is then passed through the compression function. Compression function makes sure the integer (index) returned by hashfunction is less than the size of bucket array.

One example of compression function = hashcode % bucketsize

**Calculating Hashcode**

The hashcode of the string 'abc' can be represented as 10^2 * ascii value of 'a' + 10^1 * ascii value of 'b' + 10^0 * ascii value of 'c'

If the base is a prime number, we get a good hashcode
'abc' = p^2 * ascii value of 'a' + p^1 * ascii value of 'b' + p^0 * ascii value of 'c' where p is a prime number gives a good hashcode

**Collision Handling**

Sometimes we may get different hashcodes for 2 different strings. But when we pass the hashcodes through the compression function (hashcode % bucketsize), we may end up with the same answer. This is known as Collision Handling.

Suppose we get 2 hashcodes of 205 and 305 and suppose the bucket size is 20. Then after passing both the hashcodes through the compression function, we get the same index for both the keys. i.e 5. Multiple keys are having the same index.

**Handling Collision Handling**

Collision Handling is handled by 2 ways

**Closed Hashing**

Multiple elements are stored at the same index. Multiple elements at the same index are stored in the form of a linked list.

**Open Addressing**

Suppose the string(element) 'a' is stored at the index 'x'. i.e After passing the element through the hashfunction we get the index 'x'. Suppose index of another string (element) 'b' is also calculated by hashfunction as 'x'. Then we won't store the element 'b' at 'x'. But we will calculate the index of 'b' as hashfunction(b) + f(i) where i is the attempt number.

f(0) = 0 So index at 0th attempt is h(b). If this index is filled, we go to the next attempt (first attempt)
Index at 1st attempt is h(b) + f(1). If this index is filled, we go to the next attemp (second attempt). We keep repeating until we get an unfilled index

**Types of Open Addressing techniques**

In Linear Probing, f(i) = i
In Quadratic Probing, f(i) = i^2
In Double Hashing, f(i) = i * h'(b) where h'(b) is a new hashfunction of b.

In [1]:
# Create a hashmap

In [8]:
class MapNode:
    def __init__(self, key, value):
        self.key = key
        self.value = value
        self.next = None

class Map:
    def __init__(self):
        self.bucketSize = 10
        self.buckets = [None for x in range(self.bucketSize)]
        self.count = 0
    
    def size(self):
        return self.count
    
    def getHashIndex(self, hc):
        return abs(hc) % self.bucketSize
    
    def insert(self, key, value):
        
        hc = hash(key)
        index = getHashIndex(hc)
        
        head = self.buckets[index]
        while head is not None:
            if head.key == key:
                head.value = value
                return
            head = head.next
        
        newNode = MapNode(key, value)
        newNode.next = self.buckets[index]
        self.buckets[index] = newNode

In [9]:
m = Map()
m.count

0

In [1]:
class MapNode:
    def __init__(self, key, value):
        self.key = key
        self.value = value
        self.next = None

class Map:
    def __init__(self):
        self.bucketSize = 10
        self.buckets = [None for x in range(self.bucketSize)]
        self.count = 0
    
    def size(self):
        return self.count
    
    def getIndex(self, hc):
        return abs(hc) % self.bucketSize
    
    def insert(self, key, value):
        hc = hash(key)
        index = self.getIndex(hc)
        
        head = self.buckets[index]
        while head is not None:
            if head.key == key:
                head.value = value
                return
            head = head.next
        
        if (self.count/self.bucketSize) >= 0.7:
            self.rehash()
        newNode = MapNode(key, value)
        newNode.next = self.buckets[index]
        self.buckets[index] = newNode
        self.count += 1
    
    def rehash(self):
        temp = self.buckets
        self.bucketSize = 2 * self.bucketSize
        self.buckets = [None for x in self.bucketSize]
        
        self.count = 0
        
        for bucket in temp:
            head = bucket
            while head is not None:
                self.insert(head, head.value)
                self.count += 1
                head = head.next
    
    def loadFactor(self):
        return self.count / self.bucketSize
    
    def search(self, key):
        
        hc = hash(key)
        index = self.getIndex(hc)
        head = self.buckets[index]
        
        while head is not None:
            if head.key == key:
                return head.value
        
        return None
    
    def remove_1(self, key):
        
        hc = hash(key)
        index = self.getIndex(hc)
        head = self.buckets[index]
        prev = None
        
        while head is not None:
            if head.key == key:
                if prev is None:
                    self.buckets[index] = head.next
                else:
                    prev.next = head.next
                self.count -= 1
                return head.value
            prev = head
            head = head.next
    
    def remove_2(self, key):
        
        hc = hash(key)
        index = self.getIndex(hc)
        head = self.buckets[index]
        
        if head.key == key:
            self.buckets[index] = head.next
            self.count -= 1
            return head.value
        while head.next is not None:
            if head.next.key == key:
                head.next = head.next.next
                self.count -= 1
                return head.next.value

In [45]:
m = Map()
m.insert('a',10)
print(m.size())
m.insert('b',20)
print(m.size())
m.insert('c',30)
print(m.size())
m.insert('b',50)
print(m.size())
print(m.search('b'))
print(m.remove_2('b'))
print(m.size())
print(m.search('b'))

1
2
3
3
50
50
3
None


In [None]:
m = Map()
m.insert('a',10)
print(m.size())
m.insert('b',20)
print(m.size())
m.insert('c',30)
print(m.size())
m.insert('d', 40)

In [46]:
# Worst case Time complexity of Search, Delete, getValue and Insert operation in a Hashmap

# (1 Hash function time complexity = O(l) where l is the length of the string. We had already discussed how hash function is calculated.
# We iterate through the string
# Hash('abc') = 10^2 * ascii value of 'a' + 10^1 * ascii value of 'b' + 10^0 * ascii value of 'c'

# (2 Time complexity of iterating through the linked list = O(n) assuming that all n elements of the linked list are at the same index

# Total worst case time complexity = O(l) + O(n). We can ignore O(l) when compared to O(n)
# Total worst case time complexity = O(n)

# Average Time complexity of Search operation in a Hashmap

# Suppose there are n entries and b buckets. On an average, there are n/b entries per bucket
# We make sure that n/b < 0.7 (n/b) is also called Load Factor

# Average time complexity = O(0.7) = O(1)

# Rehashing

# Suppose n keeps increasing (i.e we keep inserting the entries), then at some point of time, n/b becomes greater than 0.7
# At such a time, we increase the number of buckets to 2 * b. This is the concept of Rehashing.

In [None]:
# Given a string S, you need to remove all the duplicates. That means, the output string should contain each character only once.
# The respective order of characters should remain same, as in the input string.

In [11]:
def unique_character(s):
    d = {}
    result = ""
    for character in s:
        if character not in d:
            result += character
        d[character] = True
    return result

# Time complexity = O(n)

In [12]:
unique_character("dftredfgcvdertfgbh")

'dftregcvbh'

In [9]:
def unique_character_2(s):
    
    char_dict = {}
    
    for character in s:
        char_dict[character] = char_dict.get(character, 0) + 1
    
    result = ""
    for character in s:
        if character in char_dict:
            result += character
            char_dict.pop(character)
    
    return result

# Time complexity = O(n)

In [10]:
unique_character_2("abddesftbd")

'abdesft'

In [1]:
from collections import OrderedDict

def unique_character_3(s): 
    
    ord_dict = OrderedDict()
    for character in s:
        ord_dict[character] = ord_dict.get(character, 0) + 1
    
    result = ""
    for key in ord_dict:
        result = result + key
    
    return result

# Time complexity = O(n)

In [13]:
# You are given an array of unique integers that contain numbers in random order. 
# You have to find the longest possible sequence of consecutive numbers using the numbers from given array.
# You need to return the output array which contains starting and ending element. 
# If the length of the longest possible sequence is one, then the output array must contain only single element.

In [81]:
def longest_subsequence(arr):
    
    sequences = {}
    sequence = []
    start = None
    n = len(arr)
    for i in range(n):
        if sequence == []:
            sequence = [arr[i]]
            count = 1
        if i+1 < n and arr[i+1] == arr[i] + 1:
            sequence.append(arr[i+1])
            count += 1
            if i == n - 2:
                sequences[sequence[0]] = count
                sequence = []           
        else:
            sequences[sequence[0]] = count
            sequence = [] 
    
    start = max(sequences, key = sequences.get)
    end = start + sequences[start] - 1
    
    if start is None:
        return [start]
    else:
        return [start, end]
    

# Time complexity = O(n)
# We assume that the input arr is sorted array

In [82]:
a = longest_subsequence([0,2,3,4,6,7,8,9,11,12,13,15,16,17,18,19])
print(a)

[15, 19]


In [83]:
def longest_subsequence_2(arr):
    
    diff = 0
    start = None
    n = len(arr)
    for i in range(n-1):
        if arr[i] == arr[i+1] - 1:
            temp_start = arr[i]
            j = i+1
            while (j < n - 1) and (arr[j+1] - arr[j] == 1):
                j = j + 1
            temp_end = arr[j]
            if temp_end - temp_start > diff:
                start = temp_start
                end = temp_end
                diff = temp_end - temp_start
    
    if start is None:
        start = arr[0]
        return [start]
    
    return [start, end]

# Time complexity = O(n^2)

In [84]:
longest_subsequence_2([0,2,3,4])

[2, 4]

In [None]:
# You are given an array of unique integers that contain numbers in random order. 
# You have to find the longest possible sequence of consecutive numbers using the numbers from given array.
# The consecutive numbers may be arranged in random order in the array
# You need to return the output array which contains starting and ending element. 
# If the length of the longest possible sequence is one, then the output array must contain only single element.