In [1]:
import os
import heapq
from collections import namedtuple

In [5]:
data_folder = os.path.dirname(os.path.dirname(os.getcwd())) + "/data"
fname_input_weight = os.path.join(data_folder, 'huffman.txt')
fname_min_weight_set = os.path.join(data_folder, "mwis.txt")
print(fname_input_weight)
print(fname_min_weight_set)

/Users/shisi/ipython_projects/data/huffman.txt
/Users/shisi/ipython_projects/data/mwis.txt


### Huffman coding with Greedy Algorithm

In [3]:
Node = namedtuple('Node', ('weight', 'index'))

def combine_nodes(node1, node2):
    return Node(node1.weight + node2.weight, "+".join([node1.index, node2.index]))

def read_symbol_weights(fname):
    nodes = []
    with open(fname, 'rb') as file:
        header = next(file)
        for idx, line in enumerate(file):
            nodes.append(Node(int(line), str(idx)))
    return nodes

In [4]:
tree = read_symbol_weights(fname_input_weight)
heapq.heapify(tree)

In [37]:
code_lens = [0] * len(tree)
print("len of code_lengths:", len(code_lens))
while len(tree) > 1:
    # Pop two min items with smallest weights/lowest frequency
    a = heapq.heappop(tree)
    b = heapq.heappop(tree)
    # Merge two min items into one node and push to the tree
    combined_node = combine_nodes(a, b)
    heapq.heappush(tree, combined_node)
    # add 1 to the code length for a,b
    for node_idx in combined_node.index.split("+"):
        code_lens[int(node_idx)] += 1

len of code_lengths: 1000


In [38]:
"Q1.What is the maximum length of a codeword in the resulting Huffman code"
print(max(code_lens))
"Q2.What is the minimum length of a codeword in your Huffman code?"
print(min(code_lens))

19
9


### Dynamic programming algorithm for computing a maximum-weight independent set of a path graph
Your task in this problem is to run the dynamic programming algorithm (and the reconstruction procedure) from lecture on this data set. The question is: of the vertices 1, 2, 3, 4, 17, 117, 517, and 997, which ones belong to the maximum-weight independent set?

In [9]:
def read_weights(fname):
    with open(fname, "rb") as f:
        header = next(f)
        weights = [0] * (int(header) + 1)
        for idx, line in enumerate(f):
            weights[idx + 1] += int(line)
    return weights

In [20]:
weights = read_weights(fname_min_weight_set)
test_vertices = [1, 2, 3, 4, 17, 117, 517, 997]
print("num of node weights available:", len(weights)-1, weights[:5])

num of node weights available: 1000 [0, 4962786, 6395702, 5601590, 3803402]


In [27]:
def WIS(weights):
    #https://github.com/SSQ/Coursera-Stanford-Greedy-Algorithms-Minimum-Spanning-Trees-and-Dynamic-Programming/blob/master/Lecture%20Slides/25.4-algo2-dp-wis4-typed.pdf
    A = [0] * len(weights)
    A[0] = 0
    A[1] = weights[1]
    # Foward search to find the optimal value
    for node_idx in range(2, len(weights) -1):
        A[node_idx] = max(A[node_idx - 1], A[node_idx - 2] + weights[node_idx])
    # Backward reconstruct the optimal solution
    S = []
    i = len(weights) -1
    
    while i>=1:
        if A[i-1] >= A[i-2] + weights[i]:
            i -= 1
        else:
            S.append(i)
            i -= 2
    return S
    

In [28]:
optimal_mis = WIS(weights)

In [29]:
ans = ""
for vid in test_vertices:
    if vid in optimal_mis:
        ans += "1"
    else:
        ans += "0"
print("answer:", ans)

answer: 10100110
