**Python Implementation of Heaps**

**1. Min Heap Class**

In [None]:
class MinHeap:
  def __init__(self):
    self.storage = []
    self.size = 0

  def get_parent_index(self, index):
    return (index - 1) // 2

  def get_leftChild_index(self, index):
    return 2 * index + 1

  def get_rightChild_index(self, index):
    return 2 * index + 2

  def has_parent(self, index):
    return self.get_parent_index(index) >= 0

  def has_leftChild(self, index):
    return self.get_leftChild_index < self.size

  def has_rightChild(self, index):
    return self.get_rightChild_index < self.size

  def swap(self, index1, index2):
    self.storage[index1], self.storage[index2] = self.storage[index2], self.storage[index1]

  def heapify_up(self, index):
    if (self.has_parent(index) and self.storage[self.get_parent_index(index)] > self.storage[index]):
      self.swap(self.get_parent_index(index), index)
      self.heapify_up(self.get_parent_index(index))

  def heapify_down(self, index = 0):
    while index < self.size:
      leftChild_index = self.get_leftChild_index(index)
      rightChild_index = self.get_rightChild_index(index)
      smallestChild_index = index
      if leftChild_index < self.size and self.storage[leftChild_index] < self.storage[smallestChild_index]:
        smallestChild_index = leftChild_index
      if rightChild_index < self.size and self.storage[rightChild_index] < self.storage[smallestChild_index]:
        smallestChild_index = rightChild_index
      if smallestChild_index != index:
        self.swap(index, smallestChild_index)
        index = smallestChild_index
      else:
        break

  def insert(self, value):
    self.storage.append(value)
    self.size += 1
    self.heapify_up(self.size - 1)

  def delete_root(self):
    if self.size == 0:
      raise Exception("Empty Heap")
    root = self.storage[0]
    self.storage[0] = self.storage[self.size - 1]
    self.storage.pop()
    self.size -= 1
    self.heapify_down(0)
    return root

  def delete_at_index(self, index):
    if self.size == 0:
      raise Exception("Empty Heap")
    if index > (self.size - 1) or index < 0:
      raise Exception("Invalid Index")
    current = self.storage[index]
    self.storage[index] = self.storage[self.size - 1]
    self.storage.pop()
    self.size -= 1
    self.heapify_down(index)
    if index < self.size and self.storage[index] > current:
      self.heapify_up(index)
    return current

  def sort(self):
    sorted_arr = []
    temp_heap = MinHeap()
    temp_heap.storage = self.storage[:]
    temp_heap.size = self.size
    while temp_heap.size > 0:
      sorted_arr.append(temp_heap.delete_root())
    print(sorted_arr)
  def print(self):
    print(self.storage[:])

# Main Program
if __name__ == '__main__':
  min_heap = MinHeap()

  # insertion operation
  min_heap.insert(30)
  min_heap.insert(15)
  min_heap.insert(8)
  min_heap.insert(20)
  min_heap.insert(10)
  min_heap.insert(5)
  min_heap.insert(0)
  min_heap.insert(77)
  min_heap.insert(99)

  print("min_heap after insertion:")
  min_heap.print()
  print("")

  print("sorted min_heap after insertion:")
  min_heap.sort()
  print("")

  # delete_root operation
  min_heap.delete_root()
  print("min_heap after delete_root operation:")
  min_heap.print()
  print("")

  # delete_at_index operation
  min_heap.delete_at_index(7)
  print("min_heap after delete_at_index operation:")
  min_heap.print()

min_heap after insertion:
[0, 10, 5, 30, 20, 15, 8, 77, 99]

sorted min_heap after insertion:
[0, 5, 8, 10, 15, 20, 30, 77, 99]

min_heap after delete_root operation:
[5, 10, 8, 30, 20, 15, 99, 77]

min_heap after delete_at_index operation:
[5, 10, 8, 30, 20, 15, 99]


**2. Max Heap Class**

In [None]:
class MaxHeap:
  def __init__(self):
    self.storage = []
    self.size = 0

  def get_parent_index(self, index):
    return (index - 1) // 2

  def get_leftChild_index(self, index):
    return 2 * index + 1

  def get_rightChild_index(self, index):
    return 2 * index + 2

  def has_parent(self, index):
    return self.get_parent_index(index) >= 0

  def has_leftChild(self, index):
    return self.get_leftChild_index < self.size

  def has_rightChild(self, index):
    return self.get_rightChild_index < self.size

  def swap(self, index1, index2):
    self.storage[index1], self.storage[index2] = self.storage[index2], self.storage[index1]

  def heapify_up(self, index):
    if (self.has_parent(index) and self.storage[self.get_parent_index(index)] < self.storage[index]):
      self.swap(self.get_parent_index(index), index)
      self.heapify_up(self.get_parent_index(index))

  def heapify_down(self, index = 0):
    while index < self.size:
      leftChild_index = self.get_leftChild_index(index)
      rightChild_index = self.get_rightChild_index(index)
      biggestChild_index = index
      if leftChild_index < self.size and self.storage[leftChild_index] > self.storage[biggestChild_index]:
        biggestChild_index = leftChild_index
      if rightChild_index < self.size and self.storage[rightChild_index] > self.storage[biggestChild_index]:
        biggestChild_index = rightChild_index
      if biggestChild_index != index:
        self.swap(index, biggestChild_index)
        index = biggestChild_index
      else:
        break

  def insert(self, value):
    self.storage.append(value)
    self.size += 1
    self.heapify_up(self.size - 1)

  def delete_root(self):
    if self.size == 0:
      raise Exception("Empty Heap")
    root = self.storage[0]
    self.storage[0] = self.storage[self.size - 1]
    self.storage.pop()
    self.size -= 1
    self.heapify_down(0)
    return root

  def delete_at_index(self, index):
    if self.size == 0:
      raise Exception("Empty Heap")
    if index > (self.size - 1) or index < 0:
      raise Exception("Invalid Index")
    current = self.storage[index]
    self.storage[index] = self.storage[self.size - 1]
    self.storage.pop()
    self.size -= 1
    self.heapify_down(index)
    if index < self.size and self.storage[index] > current:
      self.heapify_up(index)
    return current

  def sort(self):
    sorted_arr = []
    temp_heap = MaxHeap()
    temp_heap.storage = self.storage[:]
    temp_heap.size = self.size
    while temp_heap.size > 0:
      sorted_arr.append(temp_heap.delete_root())
    print(sorted_arr)

  def print(self):
    print(self.storage[:])

# Main Program
if __name__ == '__main__':
  max_heap = MaxHeap()

  # insertion operation
  max_heap.insert(30)
  max_heap.insert(15)
  max_heap.insert(8)
  max_heap.insert(20)
  max_heap.insert(10)
  max_heap.insert(5)
  max_heap.insert(0)
  max_heap.insert(77)
  max_heap.insert(99)

  print("max_heap after insertion:")
  max_heap.print()
  print("")

  print("sorted max_heap after insertion:")
  max_heap.sort()
  print("")

  # delete_root operation
  max_heap.delete_root()
  print("max_heap after delete_root operation:")
  max_heap.print()
  print("")

  # delete_at_index operation
  max_heap.delete_at_index(7)
  print("max_heap after delete_at_index operation:")
  max_heap.print()

max_heap after insertion:
[99, 77, 8, 30, 10, 5, 0, 15, 20]

sorted max_heap after insertion:
[99, 77, 30, 20, 15, 10, 8, 5, 0]

max_heap after delete_root operation:
[77, 30, 8, 20, 10, 5, 0, 15]

max_heap after delete_at_index operation:
[77, 30, 8, 20, 10, 5, 0]


**Python Implementation of Hashing**

1. nyc_weather.csv contains new york city weather for first few days in the month of January. Write a program that can answer following,
  *   What was the average temperature in first week of Jan
  *   What was the maximum temperature in first 10 days of Jan

2. nyc_weather.csv contains new york city weather for first few days in the month of January. Write a program that can answer following,
  *   What was the temperature on Jan 9?
  *   What was the temperature on Jan 4?

3. poem.txt Contains famous poem "Road not taken" by poet Robert Frost. You have to read this file in python and print every word and its count as show below.
  *   'diverged': 2,
  *   'in': 3,
  *   'I': 8

**Answers to Questions 1 and 2**

In [None]:
import pandas as pd

class Node:
  def __init__(self, key, value):
    self.key = key
    self.value = value
    self.prev = None
    self.next = None

class HashTable:
  def __init__(self, size):
    self.size = size
    self.table = [None] * size

  def hash(self, key):  # using djb2
    hash = 5381
    for ch in key:
      hash = hash * 33 + ord(ch)
    return hash % self.size

  def insert(self, key, value):
    index = self.hash(key)
    if self.table[index] is None:
      self.table[index] = Node(key, value)
    else: # address collision using linear probing
      while self.table[index] is not None:
        index = (index + 1) % self.size
      self.table[index] = Node(key, value)

  def get(self, key):
    index = self.hash(key)
    current = self.table[index]
    while current:
      if current.key == key:
        return current.value
      current = current.next

  def remove(self, key):
    index = self.hash(key)
    current = self.table[index]
    while current:
      if current.key == key:
        if prev:
          prev.next = current.next
      self.table[index] = current.next
      prev = current
      current = current.next

  def print_table(self):
    for i in range(self.size):
      current = self.table[i]
      list = []
      while current:
        list.append(f"({current.key}: {current.value})")
        current = current.next
      print(f"Index {i}: {list}")

# Main Program
if __name__ == '__main__':
  ht = HashTable(10)

  # import the csv file and insert data
  df = pd.read_csv(r"/content/drive/MyDrive/nyc_weather.csv")

  for _, row in df.iterrows():
    key = row['date']
    value = row['temperature(F)']
    ht.insert(key, value)

  # display the hash table
  print("Hash table of New York city weather from Jan 1-10")
  ht.print_table()

  # average temperature for the first week of Jan
  sum_temp = 0
  for i in range(1, 8):
    temp = ht.get(f"Jan {i}")
    sum_temp += temp
  avg_temp = sum_temp / 7

  # maximum temperature in the first 10 days of Jan
  max_temp = 0
  for i in range(1, 11):
    temp = ht.get(f"Jan {i}")
    if temp is not None and temp > max_temp:
      max_temp = temp

  # temperature on Jan 4 and 9
  jan_4 = ht.get('Jan 4')
  jan_9 = ht.get('Jan 9')

  # print the results
  print(f"\nThe average temperature for the first week of January is {avg_temp:.5} degrees.")
  print(f"The maximum temperature in the first 10 days of January is {max_temp} degrees.")
  print(f"The temperature on January 4 is {jan_4} degrees.")
  print(f"The temperature on January 9 is {jan_9} degrees.")

Hash table of New York ity weather from Jan 1-10
Index 0: ['(Jan 10: 30)']
Index 1: ['(Jan 1: 27)']
Index 2: ['(Jan 2: 31)']
Index 3: ['(Jan 3: 23)']
Index 4: ['(Jan 4: 34)']
Index 5: ['(Jan 5: 37)']
Index 6: ['(Jan 6: 38)']
Index 7: ['(Jan 7: 29)']
Index 8: ['(Jan 8: 30)']
Index 9: ['(Jan 9: 35)']

The average temperature for the first week of January is 31.286 degrees.
The maximum temperature in the first 10 days of January is 38 degrees.
The temperature on January 4 is 34 degrees.
The temperature on January 9 is 35 degrees.


**Answer to Question 3**

In [3]:
class Node:
  def __init__(self, key, value):
    self.key = key
    self.value = value
    self.prev = None
    self.next = None

class HashTable:
  def __init__(self, size):
    self.size = size
    self.table = [None] * size

  def hash(self, key):  # using djb2
    hash = 5381
    for ch in key:
      hash = hash * 33 + ord(ch)
    return hash % self.size

  def insert(self, key, value):
    index = self.hash(key)
    if self.table[index] is None:
      self.table[index] = Node(key, value)
    else: # address collision using chaining
      current = self.table[index]
      while current.next:
        current = current.next
      current.next = Node(key, value)

  def get(self, key):
    index = self.hash(key)
    current = self.table[index]
    while current:
      if current.key == key:
        return current.value
      current = current.next

  def remove(self, key):
    index = self.hash(key)
    current = self.table[index]
    while current:
      if current.key == key:
        if prev:
          prev.next = current.next
      self.table[index] = current.next
      prev = current
      current = current.next

  def print_table(self):
    for i in range(self.size):
      current = self.table[i]
      list = []
      while current:
        list.append(f"({current.key}: {current.value})")
        current = current.next
      print(f"Index {i}: {list}")

In [4]:
import string
from collections import Counter

def remove_punctuation(text):
  translator = str.maketrans("", "", string.punctuation.replace("-", "") + "–—")
  cleaned = text.translate(translator)
  return cleaned

# Main Program
if __name__ == '__main__':

  # import txt file
  poem = open("/content/drive/MyDrive/poem.txt", "r")
  poem = poem.read()
  print(f"\x1B[3m{poem} \x1B[0m \n")

  # clean and extract words
  cleaned_poem = remove_punctuation(poem).lower().split()
  print(f"Number of unique words in the poem: {len(set(cleaned_poem))}\n")

  # display the hash table
  print("Hash table of unique words (16 indices)")
  ht = HashTable(16)
  word_count = Counter(cleaned_poem)
  for word, count in word_count.items():
    ht.insert(word, count)
  ht.print_table()

[3mTwo roads diverged in a yellow wood,
And sorry I could not travel both
And be one traveler, long I stood
And looked down one as far as I could
To where it bent in the undergrowth;

Then took the other, as just as fair,
And having perhaps the better claim,
Because it was grassy and wanted wear;
Though as for that the passing there
Had worn them really about the same,

And both that morning equally lay
In leaves no step had trodden black.
Oh, I kept the first for another day!
Yet knowing how way leads on to way,
I doubted if I should ever come back.

I shall be telling this with a sigh
Somewhere ages and ages hence:
Two roads diverged in a wood, and I—
I took the one less traveled by,
And that has made all the difference. [0m 

Number of unique words in the poem: 94

Hash table of unique words (16 indices)
Index 0: ['(where: 1)', '(was: 1)', '(about: 1)', '(sigh: 1)', '(by: 1)', '(difference: 1)']
Index 1: ['(yellow: 1)', '(step: 1)', '(with: 1)', '(has: 1)']
Index 2: ['(both: 2)', 