In [132]:
from PIL import Image    #for loading the image
import numpy as np
from collections import Counter
import pickle       # for saving the compressed image
import heapq


In [133]:
image = Image.open("images/sample_image3mb.png").convert('L')
# image = image.convert("L") #grayscaling the image
image.show() #to view the image

In [134]:
image_data = np.array(image)

In [135]:
image_data

array([[11, 11, 11, ..., 11, 12, 11],
       [11, 11, 11, ..., 12, 12, 12],
       [11, 11, 11, ..., 12, 12, 12],
       ...,
       [32, 30, 30, ..., 31, 31, 31],
       [29, 29, 29, ..., 35, 35, 35],
       [29, 29, 29, ..., 33, 31, 31]], dtype=uint8)

In [136]:
original_shape = image_data.shape

In [137]:
original_shape

(1080, 1920)

In [138]:
flattened_image_data = image_data.flatten()

In [139]:
flattened_image_data

array([11, 11, 11, ..., 33, 31, 31], dtype=uint8)

In [140]:
# image_data = np.array(image)
# original_shape = image_data.shape
# flattened_image_data  = image_data.flatten() # Flattening the 2D image to 1d for frequency analysis

In [141]:
image_data.shape

(1080, 1920)

In [142]:
flattened_image_data.shape

(2073600,)

In [143]:
frequency = Counter(flattened_image_data)

In [144]:
class Node:
    def __init__(self, pixel, freq):
        self.pixel = pixel
        self.freq = freq
        self.left = None
        self.right = None
        
    def __lt__(self, other):  # needed for heapq
        return self.freq < other.freq

In [145]:
flattened_image_data.shape

(2073600,)

In [146]:
heap = [Node(pixel, freq) for pixel, freq in frequency.items()]
# for node in heap:
#     print(f"Pixel: {node.pixel}, Frequency: {node.freq}")

In [147]:
heapq.heapify(heap)

In [148]:
# for node in heap:
#     print(f"Pixel: {node.pixel}, Frequency: {node.freq}")

In [149]:
while len(heap) > 1:
    node1 = heapq.heappop(heap)
    node2 = heapq.heappop(heap)
    merged = Node(None, node1.freq + node2.freq)
    merged.left = node1
    merged.right = node2
    heapq.heappush(heap, merged)

root = heap[0]

In [150]:
for node in heap:
    print(f"Pixel: {node.pixel}, Frequency: {node.freq}")
print(root)

Pixel: None, Frequency: 2073600
<__main__.Node object at 0x0000020FEB616FC0>


In [151]:
def generate_codes(node, current_code="", codes={}):
    if node:
        if node.pixel is not None:
            codes[node.pixel] = current_code
        generate_codes(node.left, current_code + "0", codes)
        generate_codes(node.right, current_code + "1", codes)
    return codes

In [152]:
huffman_codes = generate_codes(root)

In [153]:
compressed_data = ''.join([huffman_codes[p] for p in flattened_image_data])

In [154]:
def binary_string_to_bytes(binary_str):
    # Pad binary string to be divisible by 8
    padded_length = 8 - (len(binary_str) % 8)
    binary_str += '0' * padded_length
    
    # Convert to bytes
    byte_array = bytearray()
    for i in range(0, len(binary_str), 8):
        byte = binary_str[i:i+8]
        byte_array.append(int(byte, 2))
    
    return byte_array, padded_length



In [155]:
# Convert and save with padding info
byte_data, padding = binary_string_to_bytes(compressed_data)

In [156]:
with open("compressed_image.bin", "wb") as f:
    pickle.dump((huffman_codes, byte_data, padding, original_shape), f)