In [12]:
from Model import *
from DataLoader import *
from Setting import *
import collections
import os
import numpy as np
import pickle
import heapq

## Week3 code1 Huffman coding (Greedy)

In [13]:
class Node():
    def __init__(self):
        self.isleaf = None
        self.father = None
        self.children = []
        self.value = None


In [14]:
# heapq class
class Heap(object):
    
    def __init__(self, initial=None, key=lambda x:x , reverse = False):
        self.key = key
        if initial:
            self._data = [(key(item),id(item), item) for item in initial]
            heapq.heapify(self._data)
        else:
            self._data = []

    def push(self, item):
        heapq.heappush(self._data, (self.key(item),id(item), item))

    def pop(self):
        try:
            return heapq.heappop(self._data)[2]
        except:
            return None

    def length(self):
        return len(self._data)

In [45]:
class Huffman(Model):
    def __init__(self):
        super().__init__()

        
    def preprocess(self):
        self.data = self.dataLoader.data[1:]
        self.codedict = {}
    
    
    def expend_fathernode(self,node):
        if node.isleaf == True:
            self.codedict[int(node.id[0])] = node.code
            return 

        for index in range(len(node.children)):
            if index == 0:
                node.children[index].code =node.code +  '0'
                self.expend_fathernode(node.children[index])

            if index == 1:
                node.children[index].code =node.code +   '1'
                self.expend_fathernode(node.children[index])

        
        
        
    def model(self):
        
        indexed_data = dict(zip(range(len(self.data)),self.data))
        listed_data = list(zip(self.data,range(len(self.data))))
        listed_data = [(i[0][0],(i[1])) for i in listed_data]
        
        # Initialize heapq
        hp = Heap(listed_data, key = lambda x:x[0])
        
        # Initialize node
        for i in indexed_data.keys():
            node = Node()
            node.value = indexed_data[i][0]
            node.code = ''
            node.isleaf = True
            node.id = (i,)
            indexed_data[i] = node
            

        while hp.length() > 1:
            # Pop two nodes from heapq
            popnode1 = indexed_data[hp.pop()[1]]
            popnode2 = indexed_data[hp.pop()[1]]
            
            '''
            initiate a father node, 
            isleaf = false, 
            value = [subnode1_value, subnode2_value]
            Children = [popnode1, popnode2]
            '''
            
            father_node = Node()
            father_node.code = ''
            father_node.isleaf = False
            father_node.id = popnode1.id + popnode2.id       
            father_node.value = popnode1.value + popnode2.value
            father_node.children.append(popnode1)
            father_node.children.append(popnode2)

            
            # push father node to the heapq
            hp.push((father_node.value,father_node.id))
            
            # store father node into hashtable
            indexed_data[father_node.id] = father_node
        
        # Expend the root node and assign code
        self.expend_fathernode(father_node)
        
        # Compute the max length of code
        max_length = max([len(i) for i in self.codedict.values()])
        min_length = min([len(i) for i in self.codedict.values()])
        

        print('Need bits:',sum([len(self.codedict[i[1]]) * i[0] for i in listed_data]))


        return (max_length,min_length)

In [46]:
processLine = lambda x : list(map(int,map(str.strip,x.split())))
arg = {'fileName':'./data/huffman.txt','numLines':None ,  'processLine' : processLine  }

d = DataLoader(**arg)
m = Huffman()
s = Setting(d,m,False)
s.run()

Loading data ...
Total 1001 lines read
--------------------------------------------------
Need bits: 48457275093
result:(19, 9)


## Week3 code2 Maximum weight independent set (path graph)

In [625]:
class mwi_set(Model):
    def __init__(self):
        super().__init__()

        
    def preprocess(self):
        self.data = self.dataLoader.data[1:]
        self.data = [i[0] for i in self.data]
        
        self.result_dict = {}
    
    def mwi(self,data):
        if len(data) == 1: 
            self.result_dict[len(data)] = data[0] 
            return data[0]
        
        if len(data) == 2: 
            self.result_dict[len(data)] = max(data)
            return max(data)

        if len(data) > 2:
            length = len(data)
            try:
                value1 = self.result_dict[length - 1]
            except:
                value1 = self.mwi(data[:length - 1])
                self.result_dict[length - 1] = value1
                
            try:
                value2 = self.result_dict[length - 2]
            except:
                value2 = self.mwi(data[:length - 2])
                self.result_dict[length - 2] = value2

            result =  max(value1,value2  + data[len(data)-1])
            self.result_dict[length] = result
            return result

    def find_path(self,result_list):
        S = set()
        i = len(result_list)-1
        while i>=0:
            if result_list[i-1] > result_list[i-2] + self.data[i]:
                i = i - 1
            else:
                S.add(i)
                i = i-2
                if i == 0:
                    S.add(i)
        return S
    
    def compute_bit(self,S):
        targ_v = [1,2,3,4,17,117,517,997]
        
        return [1 if (i-1 in S) else 0 for i in targ_v]
    
    
    def model(self):
        data = self.data
        self.mwi(data)
        result_list = [self.result_dict[i+1] for i in range(len(self.result_dict))]
        S = self.find_path(result_list)
        bitcode = self.compute_bit(S)
        return bitcode

In [626]:
processLine = lambda x : list(map(int,map(str.strip,x.split())))
arg = {'fileName':'./data/mwis.txt','numLines':None ,  'processLine' : processLine  }

d = DataLoader(**arg)
m = mwi_set()
s = Setting(d,m,False)
s.run()

Loading data ...
Total 1001 lines read
--------------------------------------------------
result:[1, 0, 1, 0, 0, 1, 1, 0]
