In [None]:
import cv2
import numpy as np
import os

# Define the block size. This is the core parameter controlling compression/loss.
# Smaller BLOCK_SIZE -> Less compression, less loss (higher fidelity)
# Larger BLOCK_SIZE -> More compression, more loss (lower fidelity)
BLOCK_SIZE = 2 # Increased block size for potentially better compression demonstration

def get_block_average_color(image, y_start, x_start, block_size, img_h, img_w):
    """Calculates the average BGR color for a block."""
    y_end = min(y_start + block_size, img_h)
    x_end = min(x_start + block_size, img_w)
    block = image[y_start:y_end, x_start:x_end]

    if block.size == 0:
         return np.array([0, 0, 0], dtype=np.uint8)

    avg_color = block.mean(axis=(0, 1))
    return avg_color.astype(np.uint8)

def compress_image_block_average(image_path, output_path="test_blockavg.npz", block_size=BLOCK_SIZE):
    """
    Compresses a color image by dividing it into blocks and storing the
    average color of each block. Saves using numpy's compressed format (.npz).

    This is a LOSSY compression method. The degree of loss depends on BLOCK_SIZE.
    Compression is measured by the size of the .npz file compared to the
    original raw pixel data size.
    """
    try:
        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        if image is None:
            print(f"[!] Error: Could not load image from {image_path}")
            return

        img_h, img_w, img_c = image.shape # Get height, width, and channels

        # Calculate dimensions of the block grid
        grid_h = (img_h + block_size - 1) // block_size
        grid_w = (img_w + block_size - 1) // block_size

        # Array to store the average color (B, G, R) for each block
        block_avg_colors = np.zeros((grid_h, grid_w, 3), dtype=np.uint8)

        print(f"Processing image {img_w}x{img_h} ({img_c} channels) in {block_size}x{block_size} blocks...")

        for i in range(grid_h):
            for j in range(grid_w):
                y_start = i * block_size
                x_start = j * block_size
                avg_color = get_block_average_color(image, y_start, x_start, block_size, img_h, img_w)
                block_avg_colors[i, j] = avg_color

        # Save original dimensions, block size, and the block average colors
        # using numpy's compressed format (.npz)
        np.savez_compressed(output_path,
                            original_height=img_h,
                            original_width=img_w,
                            block_size=block_size,
                            block_avg_colors=block_avg_colors)

        print(f"[✓] Processed {image_path}. Saved block average colors to {output_path}")

        # --- Report Sizes ---
        original_image_file_size = os.path.getsize(image_path)
        compressed_npz_size = os.path.getsize(output_path)
        # Estimate original raw pixel data size (assuming 1 byte per channel)
        original_raw_size = img_h * img_w * img_c

        print(f"\n--- Compression Metrics ---")
        print(f"    Original image file size (e.g., JPEG): {original_image_file_size} bytes")
        print(f"    Estimated original raw pixel data size: {original_raw_size} bytes")
        print(f"    Compressed file size (.npz): {compressed_npz_size} bytes")

        # Compression is measured against the *raw* data
        if original_raw_size > 0:
             print(f"    Compression ratio (vs. raw data): {original_raw_size / compressed_npz_size:.2f}x")
        # Comparison vs. the already compressed original file
        if original_image_file_size > 0:
            print(f"    Size comparison (vs. original file): {compressed_npz_size / original_image_file_size:.2f}x the size")
            if compressed_npz_size < original_image_file_size:
                 print(f"    Compressed file is smaller than original image file.")
            else:
                 print(f"    Compressed file is larger than original image file (Original was already highly compressed).")

        print(f"\n    Note: This process is LOSSY for the original pixel data due to block averaging (detail within blocks is lost).")


    except Exception as e:
        print(f"[!] An error occurred during compression: {e}")

def decompress_block_average_to_image(input_path="test_blockavg.npz", output_path="decompressed_blockavg.jpg"):
    """
    Decompresses block average color data back into a color image.
    Saves the resulting image as a JPEG.
    """
    try:
        # Load the data from the numpy compressed file
        data = np.load(input_path)

        img_h = data['original_height']
        img_w = data['original_width']
        block_size = data['block_size']
        block_avg_colors = data['block_avg_colors'] # Shape (grid_h, grid_w, 3)

        grid_h, grid_w, _ = block_avg_colors.shape

        # Create a blank image with the original dimensions
        reconstructed_image = np.zeros((img_h, img_w, 3), dtype=np.uint8)

        print(f"Reconstructing image {img_w}x{img_h} from {grid_w}x{grid_h} blocks...")

        # Iterate through the stored block average colors
        for i in range(grid_h):
            for j in range(grid_w):
                y_start = i * block_size
                x_start = j * block_size
                y_end = min(y_start + block_size, img_h)
                x_end = min(x_start + block_size, img_w)

                avg_color = block_avg_colors[i, j]

                # Fill the corresponding block area
                reconstructed_image[y_start:y_end, x_start:x_end] = avg_color

        # Save the reconstructed color image
        # You can control JPEG quality using a parameter like cv2.IMWRITE_JPEG_QUALITY
        # Saving at lower quality might make the output file smaller, but adds more loss.
        cv2.imwrite(output_path, reconstructed_image, [cv2.IMWRITE_JPEG_QUALITY, 90]) # Save with quality 90 (default is often 95)

        print(f"[✓] Decompressed from {input_path} to {output_path}")

        # --- Report Decompression Metrics ---
        decompressed_image_file_size = os.path.getsize(output_path)
        print(f"\n--- Decompression Metrics ---")
        print(f"    Decompressed image file size ({os.path.basename(output_path)}): {decompressed_image_file_size} bytes")
        print(f"    Note: This output image is an approximation due to lossy block averaging. Its file size depends on the output format's own compression (e.g., JPEG quality).")


    except FileNotFoundError:
        print(f"[!] Error: Input file not found at {input_path}")
    except KeyError as e:
         print(f"[!] Error: Missing expected key in the compressed file: {e}")
    except Exception as e:
        print(f"[!] An error occurred during decompression: {e}")

if __name__ == "__main__":
    # Ensure you have a 'test.jpg' image in the same directory
    # Or change the path to your image file. Use a color image!
    input_image = "test.jpg" # Make sure test.jpg is a color image
    compressed_file = "test_blockavg.npz"
    decompressed_image = "decompressed_blockavg.jpg"

    # --- Compression (Block Averaging) ---
    # Experiment with different BLOCK_SIZE values (e.g., 4, 8, 16)
    # Larger block size = more compression (smaller .npz file) = more loss (more blocky image)
    print("--- Running Block Average Compression ---")
    compress_image_block_average(input_image, compressed_file, block_size=BLOCK_SIZE) # Try changing block_size here

    print("\n--- Running Block Average Decompression ---")
    decompress_block_average_to_image(compressed_file, decompressed_image)

    print("\n--- Summary ---")
    print(f"Original image: {input_image} (size reported during compression)")
    print(f"Compressed data: {compressed_file} (size reported during compression)")
    print(f"Decompressed image: {decompressed_image} (size reported during decompression)")
    print(f"\nTo evaluate the compression method, compare the '{compressed_file}' size to the estimated raw data size or the original file size as explained above.")
    print(f"To evaluate the lossiness, compare the '{input_image}' and '{decompressed_image}' visually.")

--- Running Block Average Compression ---
Processing image 534x400 (3 channels) in 8x8 blocks...
[✓] Processed test.jpg. Saved block average colors to test_blockavg.npz

--- Compression Metrics ---
    Original image file size (e.g., JPEG): 17281 bytes
    Estimated original raw pixel data size: 640800 bytes
    Compressed file size (.npz): 5474 bytes
    Compression ratio (vs. raw data): 117.06x
    Size comparison (vs. original file): 0.32x the size
    Compressed file is smaller than original image file.

    Note: This process is LOSSY for the original pixel data due to block averaging (detail within blocks is lost).

--- Running Block Average Decompression ---
Reconstructing image 534x400 from 67x50 blocks...
[✓] Decompressed from test_blockavg.npz to decompressed_blockavg.jpg

--- Decompression Metrics ---
    Decompressed image file size (decompressed_blockavg.jpg): 8050 bytes
    Note: This output image is an approximation due to lossy block averaging. Its file size depends on 