# Pixelize Analysis Notebook

This notebook analyzes the **Pixelize (Mosaic)** algorithm.

**Architectural Significance**: Unlike KNN or NLM, Pixelize has **Low Arithmetic Intensity** (very few calculations per byte of data loaded).
This makes it a perfect test case to demonstrate **Memory Bottlenecks** and **Data Transfer Overhead**.
We expect the GPU Speedup to be low (or even < 1.0) compared to the other algorithms.

Tasks:
1.  **Visual Effect**: Demonstrate the "Lego" aesthetic.
2.  **Bottleneck Analysis**: Show that for simple tasks, CPU can be faster than GPU due to PCIe latency.

In [None]:
import os
import time
import matplotlib.pyplot as plt
import cv2
import numpy as np

# Config & Paths
PROJECT_ROOT = os.path.abspath("..")
IMAGES_DIR = os.path.join(PROJECT_ROOT, "images")
INPUT_CLEAN = os.path.join(IMAGES_DIR, "input.jpg")
OUTPUT_DIR = os.path.join(PROJECT_ROOT, "analysis_output")

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

print(f"Project Root: {PROJECT_ROOT}")
print(f"Input Clean Image: {INPUT_CLEAN}")

In [None]:
def compile_gpu(tile_width):
    """Re-compiles the GPU code with a specific TILE_WIDTH."""
    print(f"[Build] Compiling GPU with TILE_WIDTH={tile_width}...")
    build_dir = os.path.join(PROJECT_ROOT, "src/gpu/build")
    if not os.path.exists(build_dir):
        os.makedirs(build_dir)
    if not os.path.exists(os.path.join(build_dir, "output")):
        os.makedirs(os.path.join(build_dir, "output"))
    
    os.chdir(build_dir)
    # Clean and Rebuild
    os.system("make clean")
    
    cmd = f'cmake -DCMAKE_CUDA_FLAGS="-DTILE_WIDTH={tile_width} -DTILE_HEIGHT={tile_width}" ..'
    os.system(cmd)
    
    os.system("make")
    os.chdir(PROJECT_ROOT)

def run_algo(start_dir, image_path, algo, params):
    """Runs the algorithm binary and returns execution time."""
    cwd = os.getcwd()
    # Ensure we use absolute paths for chdir and checking binary
    abs_start_dir = os.path.abspath(start_dir)
    binary_path = os.path.join(abs_start_dir, "main")
    
    if not os.path.exists(binary_path):
        print(f"[Error] Binary not found at {binary_path}")
        # Try to find it in case it's in a subdirectory or misnamed
        # But for now we just return None
        return None
    
    # Ensure executable permissions (Linux/Mac)
    if os.name != 'nt':
        os.system(f'chmod +x "{binary_path}"')
    
    os.chdir(abs_start_dir)
    
    start_time = time.time()
    # Use ./main explicitly. 
    # We wrap image_path in quotes in case of spaces
    cmd = f'./main "{image_path}" {algo} {params}'
    
    ret_code = os.system(cmd)
    end_time = time.time()
    
    os.chdir(cwd)
    if ret_code != 0:
        print(f"[Error] Execution failed for {algo} in {start_dir} (RetCode: {ret_code})")
        return None
    return end_time - start_time

## 1. Visual Verification
Apply Pixelize (Block Size 16) to create the mosaic effect.

In [None]:
# Ensure basic compilation (default block size 16)
compile_gpu(16)
# Build CPU
print("[Build] Compiling CPU...")
cpu_build_dir = os.path.join(PROJECT_ROOT, "src/cpu/build")
if not os.path.exists(cpu_build_dir):
    os.makedirs(cpu_build_dir)
if not os.path.exists(os.path.join(cpu_build_dir, "output")):
    os.makedirs(os.path.join(cpu_build_dir, "output"))

os.chdir(cpu_build_dir)
os.system("cmake .. && make")
os.chdir(PROJECT_ROOT)

params = "16"

if os.path.exists(INPUT_CLEAN):
    print("Running CPU Pixelize...")
    run_algo("src/cpu/build", INPUT_CLEAN, "pixelize", params)
    
    print("Running GPU Pixelize...")
    run_algo("src/gpu/build", INPUT_CLEAN, "pixelize", params)
    
    # Load Results
    img_in = cv2.imread(INPUT_CLEAN)
    # Note: verify your CPU main.cpp saves as cpu_pixelize_result.jpg
    img_cpu = cv2.imread("src/cpu/build/output/cpu_pixelize_result.jpg")
    img_gpu = cv2.imread("src/gpu/build/output/pixelize_result.jpg")
    
    # Visualize
    fig, ax = plt.subplots(1, 3, figsize=(15, 6))
    ax[0].imshow(cv2.cvtColor(img_in, cv2.COLOR_BGR2RGB))
    ax[0].set_title("Clean Input")
    
    if img_cpu is not None:
        ax[1].imshow(cv2.cvtColor(img_cpu, cv2.COLOR_BGR2RGB))
        ax[1].set_title("CPU Output")
    else:
        ax[1].text(0.5, 0.5, "CPU Output Not Found", ha='center')
        
    if img_gpu is not None:
        ax[2].imshow(cv2.cvtColor(img_gpu, cv2.COLOR_BGR2RGB))
        ax[2].set_title("GPU Output")
    else:
        ax[2].text(0.5, 0.5, "GPU Output Not Found", ha='center')
        
    for a in ax: a.axis('off')
    plt.savefig(os.path.join(OUTPUT_DIR, 'pixelize_visual_verification.png'))
    plt.show()
else:
    print("⚠️ ERROR: input.jpg not found! Please upload it to 'images/' folder.")

## 2. Benchmarking: Resolution Scaling & Bottleneck Analysis
We investigate if GPU is actually faster for this lightweight task.

In [None]:
resolutions = [512, 1024, 2048] # Add 4096 if you have a 4K image
cpu_times = []
gpu_times = []
speedups = []

if os.path.exists(INPUT_CLEAN):
    base_img = cv2.imread(INPUT_CLEAN)
    
    for res in resolutions:
        print(f"Benchmarking Resolution: {res}x{res}...")
        
        # Create Temp Image
        temp_img_path = os.path.join(OUTPUT_DIR, f"temp_{res}.jpg")
        resized = cv2.resize(base_img, (res, res))
        cv2.imwrite(temp_img_path, resized)
        
        # Run
        t_cpu = run_algo("src/cpu/build", temp_img_path, "pixelize", params)
        t_gpu = run_algo("src/gpu/build", temp_img_path, "pixelize", params)
        
        if t_cpu and t_gpu:
            cpu_times.append(t_cpu)
            gpu_times.append(t_gpu)
            speedups.append(t_cpu / t_gpu)
            print(f"  CPU: {t_cpu:.4f}s | GPU: {t_gpu:.4f}s | Speedup: {t_cpu/t_gpu:.2f}x")
        
        # Cleanup
        if os.path.exists(temp_img_path):
            os.remove(temp_img_path)

    # Plot Res Scaling
    fig, ax1 = plt.subplots(figsize=(10, 5))
    
    ax1.set_xlabel('Resolution (NxN)')
    ax1.set_ylabel('Execution Time (s)', color='tab:blue')
    ax1.plot(resolutions, cpu_times, label='CPU Time', color='tab:blue', marker='o')
    ax1.plot(resolutions, gpu_times, label='GPU Time', color='tab:cyan', marker='o')
    ax1.tick_params(axis='y', labelcolor='tab:blue')
    ax1.legend(loc='upper right')
    
    # Note: Speedup might be < 1, so we plot it carefully
    ax2 = ax1.twinx()
    ax2.set_ylabel('Speedup Factor (Below 1.0 = CPU is Faster)', color='tab:red')
    ax2.plot(resolutions, speedups, label='Speedup', color='tab:red', marker='x', linestyle='--')
    ax2.tick_params(axis='y', labelcolor='tab:red')
    ax2.legend(loc='lower right')
    
    # Draw line at 1.0 to show break-even point
    ax2.axhline(y=1.0, color='gray', linestyle=':', label='Break-even')
    
    plt.title("Performance vs Resolution (Pixelize - Memory Bound)")
    plt.grid(True)
    plt.xticks(resolutions)
    plt.savefig(os.path.join(OUTPUT_DIR, 'pixelize_resolution_benchmark.png'))
    plt.show()

### Conclusion
If the Red Line (Speedup) is near or below 1.0, it proves that the algorithm is **Memory Bound**.
The time taken to copy the image to the GPU (Latency) is greater than the time saved by parallel calculation.