### Hash Table

In [42]:
class HashTable:
    
    def __init__(self):
        self.MAX = 100
        self.arr = [None for ele in range(self.MAX)]
        
    def get_hash(self,key):
        h = 0
        for i in key:
            h += ord(i) # gets the ASCII value of a symbol or alphabet
        return h % self.MAX
    
    def add(self, key, value):
        h = self.get_hash(key)
        self.arr[h] = value
        
    def get(self,key):
        h = self.get_hash(key)
        return self.arr[h]
    
    ### more effecient methods of get and set/add objects we have the following
    def __setitem__(self, key, value):
        h = self.get_hash(key)
        self.arr[h] = value
        
    def __getitem__(self,key):
        h = self.get_hash(key)
        return self.arr[h]
    
    def __delitem__(self, key):
        h = self.get_hash(key)
        self.arr[h] = None
    
        

In [43]:
ht = HashTable()
ht.get_hash('march 6 1')

90

In [44]:
ht.add('march', 31)

In [45]:
ht.get('march')

31

In [46]:
ht.arr

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 31,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [47]:
ht['april'] = 37
ht['may'] = 38

In [48]:
ht['march']

31

In [49]:
ht['april']

37

In [50]:
ht['may']

38

In [51]:
del ht['may']

In [52]:
ht['may']

In [53]:
ht.arr

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 31,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 37,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

### Handling collision in Hash Table

In [1]:
class HashTable:  
    def __init__(self):
        self.MAX = 10
        self.arr = [[] for i in range(self.MAX)]
        
    def get_hash(self, key):
        hash = 0
        for char in key:
            hash += ord(char)
        return hash % self.MAX
    
    def __getitem__(self, key):
        arr_index = self.get_hash(key)
        for kv in self.arr[arr_index]:
            if kv[0] == key:
                return kv[1]
            
    def __setitem__(self, key, val):
        h = self.get_hash(key)
        found = False
        for idx, element in enumerate(self.arr[h]):
            if len(element)==2 and element[0] == key:
                self.arr[h][idx] = (key,val)
                found = True
        if not found:
            self.arr[h].append((key,val))
        
    def __delitem__(self, key):
        arr_index = self.get_hash(key)
        for index, kv in enumerate(self.arr[arr_index]):
            if kv[0] == key:
                print("del",index)
                del self.arr[arr_index][index]

In [2]:
t = HashTable()
t["march 6"] = 310
t["march 7"] = 420
t["march 8"] = 67
t["march 17"] = 63457

In [3]:
t["march 6"]

310

In [4]:
t["march 17"]

63457

In [5]:
t.arr

[[('march 7', 420)],
 [('march 8', 67)],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [('march 6', 310), ('march 17', 63457)]]

In [6]:
t["march 6"] = 11

In [7]:
t.arr

[[('march 7', 420)],
 [('march 8', 67)],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [('march 6', 11), ('march 17', 63457)]]

In [8]:
t["march 6"]

11

In [9]:
del t["march 6"]

del 0


In [11]:
t.arr

[[('march 7', 420)],
 [('march 8', 67)],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [('march 17', 63457)]]

# Exercise 1: From nyc_weather.csv contains new york city weather for first few days in the month of January. Write a program that can answer following,
1. What was the average temperature in first week of Jan
2. What was the maximum temperature in first 10 days of Jan
3. Figure out data structure that is best for this problem

###### A: The best data structure to use here was a list because all we wanted was access of temperature elements

In [18]:
arr = []

with open("Datasets/nyc_weather.csv","r") as f:
    for line in f:
        tokens = line.split(',')
        try:
            temperature = int(tokens[1])
            arr.append(temperature)
        except:
            print("Invalid temperature.Ignore the row")

Invalid temperature.Ignore the row


In [19]:
arr

[27, 31, 23, 34, 37, 38, 29, 30, 35, 30]

What was the average temperature in first week of Jan

In [20]:
sum(arr[0:7])/len(arr[0:7])

31.285714285714285

What was the maximum temperature in first 10 days of Jan

In [21]:
max(arr[0:10])

38

# Exercise 2: nyc_weather.csv contains new york city weather for first few days in the month of January. Write a program that can answer following,
1. What was the temperature on Jan 9?
2. What was the temperature on Jan 4?
* Figure out data structure that is best for this problem

###### A:The best data structure to use here was a dictionary (internally a hash table) because we wanted to know temperature for specific day, requiring key, value pair access where you can look up an element by day using O(1) complexity 

In [24]:
weather_dict = {}

with open("Datasets/nyc_weather.csv","r") as f:
    for line in f:
        tokens = line.split(',')
        day = tokens[0]
        try:
            temperature = int(tokens[1])
            weather_dict[day] = temperature
        except:
            print("Invalid temperature.Ignore the row")

Invalid temperature.Ignore the row


In [25]:
weather_dict

{'Jan 1': 27,
 'Jan 2': 31,
 'Jan 3': 23,
 'Jan 4': 34,
 'Jan 5': 37,
 'Jan 6': 38,
 'Jan 7': 29,
 'Jan 8': 30,
 'Jan 9': 35,
 'Jan 10': 30}

What was the temperature on Jan 9

In [28]:
weather_dict['Jan 9']

35

What was the temperature on Jan 4

In [29]:
weather_dict['Jan 4']

34