In [1]:
from Model import *
from DataLoader import *
from Setting import *
import collections
import os
import numpy as np
import pickle

## Week3 code1 Huffman coding (Greedy)

In [350]:
class Node():
    def __init__(self):
        self.isleaf = None
        self.father = None
        self.children = []
        self.value = None


In [419]:
# heapq class
class Heap(object):
    def __init__(self, initial=None, key=lambda x:x , reverse = False):
        self.key = key
        if initial:
            self._data = [(key(item),id(item), item) for item in initial]
            heapq.heapify(self._data)
        else:
            self._data = []

    def push(self, item):
        print((self.key(item),id(item), item))
        heapq.heappush(self._data, (self.key(item),id(item), item))

    def pop(self):
        try:
            return heapq.heappop(self._data)[2]
        except:
            return None
        
    def length(self):
        return len(self._data)

In [453]:
class Huffman(Model):
    def __init__(self):
        super().__init__()

        
    def preprocess(self):
        self.data = self.dataLoader.data[1:]
        self.codedict = {}
    
    
    def expend_fathernode(self,node):
        if node.isleaf == True:
            print('leaf:',node.code)
            print('leaf_id:',node.id)
            self.codedict[int(node.id[0])] = node.code
            return 

        for index in range(len(node.children)):
            if index == 0:
                node.children[index].code =node.code +  '0'
                self.expend_fathernode(node.children[index])

            if index == 1:
                node.children[index].code =node.code +   '1'
                self.expend_fathernode(node.children[index])
        
    def model(self):
        
        indexed_data = dict(zip(range(len(self.data)),self.data))
        listed_data = list(zip(self.data,range(len(self.data))))
        listed_data = [(i[0][0],(i[1])) for i in listed_data]
        
        # Initialize heapq
        hp = Heap(listed_data, key = lambda x:x[0])
        
        
        
        # Initialize node
        for i in indexed_data.keys():
            node = Node()
            node.value = indexed_data[i][0]
            node.code = ''
            node.isleaf = True
            node.id = (i,)
            indexed_data[i] = node
            
            
        while hp.length() > 1:
            # Pop two nodes from heapq
            popnode1 = indexed_data[hp.pop()[1]]
            popnode2 = indexed_data[hp.pop()[1]]
            
            '''
            initiate a father node, 
            isleaf = false, 
            value = [subnode1_value, subnode2_value]
            Children = [popnode1, popnode2]
            '''
            father_node = Node()
            father_node.code = ''
            father_node.isleaf = False
            father_node.id = popnode1.id + popnode2.id       
            father_node.value = popnode1.value + popnode2.value
            father_node.children.append(popnode1)
            father_node.children.append(popnode2)

            
            # push father node to the heapq
            print('push:',(father_node.value,father_node.id   ))
            hp.push((father_node.value,father_node.id))
            
            # store father node into hashtable
            indexed_data[father_node.id] = father_node
        
        # Expend the root node and assign code
        self.expend_fathernode(father_node)
        
        # Compute the max length of code
        max_length = max([len(i) for i in self.codedict.values()])
        min_length = min([len(i) for i in self.codedict.values()])
        print(max_length,min_length)
        return max_length

In [454]:
processLine = lambda x : list(map(int,map(str.strip,x.split())))
arg = {'fileName':'./data/huffman.txt','numLines':None ,  'processLine' : processLine  }

d = DataLoader(**arg)
m = Huffman()
s = Setting(d,m,False)
s.run()

Loading data ...
Total 1001 lines read
--------------------------------------------------
push: (14583, (471, 798))
(14583, 4629061000, (14583, (471, 798)))
push: (51747, (471, 798, 752))
(51747, 4629061000, (51747, (471, 798, 752)))
push: (92629, (448, 471, 798, 752))
(92629, 4629061000, (92629, (448, 471, 798, 752)))
push: (119084, (554, 867))
(119084, 4625332936, (119084, (554, 867)))
push: (134653, (356, 957))
(134653, 4628494472, (134653, (356, 957)))
push: (174368, (598, 382))
(174368, 4625162888, (174368, (598, 382)))
push: (190050, (448, 471, 798, 752, 210))
(190050, 4629061000, (190050, (448, 471, 798, 752, 210)))
push: (221607, (538, 371))
(221607, 4623538632, (221607, (538, 371)))
push: (253031, (554, 867, 804))
(253031, 4625332936, (253031, (554, 867, 804)))
push: (276550, (356, 957, 878))
(276550, 4628494472, (276550, (356, 957, 878)))
push: (298907, (971, 174))
(298907, 4625331848, (298907, (971, 174)))
push: (320558, (756, 941))
(320558, 4632267144, (320558, (756, 941)))

push: (22824170, (241, 364, 667, 703, 83))
(22824170, 4622568072, (22824170, (241, 364, 667, 703, 83)))
push: (22927533, (203, 828, 547, 530, 72))
(22927533, 4628208904, (22927533, (203, 828, 547, 530, 72)))
push: (23035309, (453, 707, 354, 600, 261, 948))
(23035309, 4627951560, (23035309, (453, 707, 354, 600, 261, 948)))
push: (23160151, (65, 764, 459, 792))
(23160151, 4631008776, (23160151, (65, 764, 459, 792)))
push: (23306258, (283, 24, 294, 918, 648, 671, 980))
(23306258, 4625210632, (23306258, (283, 24, 294, 918, 648, 671, 980)))
push: (23434488, (771, 642, 598, 382, 448, 471, 798, 752, 210, 936, 952, 823, 426, 232))
(23434488, 4632266888, (23434488, (771, 642, 598, 382, 448, 471, 798, 752, 210, 936, 952, 823, 426, 232)))
push: (23527923, (623, 224, 7, 78))
(23527923, 4625927112, (23527923, (623, 224, 7, 78)))
push: (23626474, (811, 89, 417, 590, 296, 49, 184, 714))
(23626474, 4625966408, (23626474, (811, 89, 417, 590, 296, 49, 184, 714)))
push: (23989549, (949, 392, 564, 972, 44

leaf: 0111010011111
leaf_id: (188,)
leaf: 011101010
leaf_id: (717,)
leaf: 011101011
leaf_id: (195,)
leaf: 011101100
leaf_id: (708,)
leaf: 011101101
leaf_id: (587,)
leaf: 0111011100
leaf_id: (491,)
leaf: 0111011101
leaf_id: (743,)
leaf: 0111011110
leaf_id: (834,)
leaf: 0111011111
leaf_id: (746,)
leaf: 011110000
leaf_id: (872,)
leaf: 011110001
leaf_id: (727,)
leaf: 011110010
leaf_id: (629,)
leaf: 011110011
leaf_id: (536,)
leaf: 01111010000
leaf_id: (870,)
leaf: 01111010001
leaf_id: (284,)
leaf: 0111101001
leaf_id: (544,)
leaf: 011110101
leaf_id: (398,)
leaf: 0111101100
leaf_id: (449,)
leaf: 0111101101
leaf_id: (134,)
leaf: 011110111
leaf_id: (303,)
leaf: 011111000
leaf_id: (320,)
leaf: 011111001
leaf_id: (831,)
leaf: 01111101000
leaf_id: (605,)
leaf: 01111101001
leaf_id: (841,)
leaf: 0111110101
leaf_id: (887,)
leaf: 011111011
leaf_id: (231,)
leaf: 011111100
leaf_id: (85,)
leaf: 0111111010
leaf_id: (335,)
leaf: 011111101100
leaf_id: (323,)
leaf: 0111111011010
leaf_id: (725,)
leaf: 0111111

leaf: 1110110101
leaf_id: (555,)
leaf: 11101101100
leaf_id: (218,)
leaf: 11101101101
leaf_id: (488,)
leaf: 1110110111
leaf_id: (612,)
leaf: 1110111000
leaf_id: (1,)
leaf: 11101110010
leaf_id: (919,)
leaf: 11101110011
leaf_id: (336,)
leaf: 1110111010
leaf_id: (750,)
leaf: 1110111011
leaf_id: (454,)
leaf: 11101111000
leaf_id: (71,)
leaf: 11101111001
leaf_id: (566,)
leaf: 1110111101
leaf_id: (358,)
leaf: 1110111110
leaf_id: (483,)
leaf: 1110111111
leaf_id: (682,)
leaf: 11110000000
leaf_id: (816,)
leaf: 11110000001
leaf_id: (584,)
leaf: 1111000001
leaf_id: (898,)
leaf: 1111000010
leaf_id: (216,)
leaf: 1111000011
leaf_id: (738,)
leaf: 111100010000
leaf_id: (474,)
leaf: 111100010001
leaf_id: (869,)
leaf: 11110001001
leaf_id: (638,)
leaf: 1111000101
leaf_id: (259,)
leaf: 1111000110
leaf_id: (463,)
leaf: 1111000111
leaf_id: (60,)
leaf: 1111001000
leaf_id: (329,)
leaf: 1111001001
leaf_id: (628,)
leaf: 1111001010
leaf_id: (420,)
leaf: 1111001011
leaf_id: (702,)
leaf: 1111001100
leaf_id: (964,)
l

## Week3 code2 Maximum weight independent set (path graph)

In [563]:
class mwi_set(Model):
    def __init__(self):
        super().__init__()

        
    def preprocess(self):
        self.data = self.dataLoader.data[1:]
        self.result_dict = {}
    
    def mwi(self,data):
        if len(data) == 1: 
            self.result_dict[len(data)] = data[0] 
            return data[0]
        if len(data) == 2: 
            self.result_dict[len(data)] = max(data)
            return max(data)

        if len(data) > 2:

            length = len(data)
            try:
                value1 = self.result_dict[length - 1]
            except:
                value1 = self.mwi(data[:length - 1])
                self.result_dict[length - 1] = value1
                
            try:
                value2 = self.result_dict[length - 2]
            except:
                value2 = self.mwi(data[:length - 2])
                self.result_dict[length - 2] = value2

            self.result_dict[length] =  max(value1,value2  + data[len(data)-1])
            return result_dict[length]

    def find_path(self,data):
        S = set()
        i = len(result_list)-1
        while i>=0:
            if result_list[i-1] > result_list[i-2] + data[i]:
                i = i - 1
            else:
                S.add(i)
                i = i-2
                if i == 0:
                    S.add(i)
        return S
    
    def model(self):
        


        data = self.data
        self.mwi(data)
        result_list = [result_dict[i+1] for i in range(len(self.result_dict))]
        find_path(result_list,data)
        

        return 

In [564]:
processLine = lambda x : list(map(int,map(str.strip,x.split())))
arg = {'fileName':'./data/mwis.txt','numLines':None ,  'processLine' : processLine  }

d = DataLoader(**arg)
m = mwi_set()
s = Setting(d,m,False)
s.run()

Loading data ...
Total 1001 lines read
--------------------------------------------------


NameError: name 'mwi' is not defined

In [552]:


def model(data):
    
    if len(data) == 1: 
        result_dict[len(data)] = data[0] 
        return data[0]
    if len(data) == 2: 
        result_dict[len(data)] = max(data)
        return max(data)
    
    if len(data) > 2:

        length = len(data)
        result_dict[length - 1] = model(data[:length - 1])
        result_dict[length - 2] = model(data[:length - 2]) 
        result_dict[length] =  max(   result_dict[length - 1],  result_dict[length - 2]  + data[len(data)-1])
        return result_dict[length]

def find_path(result_list,data):
    S = set()
    i = len(result_list)-1
    while i>=0:
        print(i)

        if result_list[i-1] > result_list[i-2] + data[i]:
            i = i - 1
        else:
            print('**')
            S.add(i)
            i = i-2
            if i == 0:
                S.add(i)
            
    return S
    




model(data)
result_list = [result_dict[i+1] for i in range(len(result_dict))]
find_path(result_list,data)





3
**
1
**


{1, 3}