In [64]:
import numpy as np
from skimage import io
from sklearn.cluster import KMeans
import cv2
import warnings
warnings.filterwarnings("ignore")

In [65]:
img = io.imread('cat.jpg')
pixels = img.reshape((-1, 3))
kmeans = KMeans(n_clusters=8, random_state=0).fit(pixels)
quantized_pixels = kmeans.cluster_centers_[kmeans.labels_]
quantized_img = quantized_pixels.reshape(img.shape)
quantized_img = quantized_img.astype(np.uint8)
io.imsave('output_image2.jpg', quantized_img)

In [66]:
import gzip
with open('full_text.txt', 'rb') as f_in:
    with gzip.open('compressed_text.txt.gz', 'wb') as f_out:
        f_out.write(f_in.read())

In [67]:
import heapq
from heapq import heappop, heappush


def isLeaf(root):
    return root.left is None and root.right is None


class Node:
    def __init__(self, ch, freq, left=None, right=None):
        self.ch = ch
        self.freq = freq
        self.left = left
        self.right = right

    def __lt__(self, other):
        return self.freq < other.freq


def encode(root, s, huffman_code):
    if root is None:
        return
    if isLeaf(root):
        huffman_code[root.ch] = s if len(s) > 0 else '1'
    encode(root.left, s + '0', huffman_code)
    encode(root.right, s + '1', huffman_code)


def decode(root, index, s):
    if root is None:
        return index
    if isLeaf(root):
        print(root.ch, end='')
        return index
    index = index + 1
    root = root.left if s[index] == '0' else root.right
    return decode(root, index, s)


def buildHuffmanTree(text):
    if len(text) == 0:
        return
    freq = {i: text.count(i) for i in set(text)}
    pq = [Node(k, v) for k, v in freq.items()]
    heapq.heapify(pq)
    while len(pq) != 1:
        left = heappop(pq)
        right = heappop(pq)
        total = left.freq + right.freq
        heappush(pq, Node(None, total, left, right))
    root = pq[0]
    return root


def compress_and_save(file_path, decompressed_path, huffman_path):
    with open(file_path, 'r') as f:
        text = f.read()
    root = buildHuffmanTree(text)
    huffmanCode = {}
    encode(root, '', huffmanCode)
    s = ''
    for c in text:
        s += huffmanCode.get(c)
    with open(decompressed_path, 'w') as d:
        d.write(s)
    with open(huffman_path, 'w') as h:
        h.write(str(huffmanCode))


def decompress_and_save(file_path, huffman_path, decompressed_path):
    with open(huffman_path, 'r') as h:
        huffmanCode = eval(h.read())
    with open(file_path, 'r') as f:
        compressed = f.read()
    curr_code = ""
    decompressed = ""
    for i in compressed:
        curr_code += i
        for char, code in huffmanCode.items():
            if code == curr_code:
                decompressed += char
                curr_code = ""
                break
    with open(decompressed_path, 'w') as d:
        d.write(decompressed)


compress_and_save('test.txt', 'test_comp.txt','huffman.txt')
decompress_and_save('test_comp.txt','huffman.txt', 'test_decomp.txt')
