# Profiling Analysis
Baseline: `Mosaic_Generator` reference implementation.\n
Optimized: `Mosaic_Gnerator_Improved` package.


## 1. Environment Setup
Install profiling tools and import both pipelines.


In [3]:
%pip install -q line_profiler
from pathlib import Path
import time
import cProfile, pstats
import numpy as np
from PIL import Image
from Mosaic_Generator.src.config import Config as LegacyConfig
from Mosaic_Generator.src.pipeline import MosaicPipeline as LegacyPipeline
from Mosaic_Gnerator_Improved.config import Config as ImprovedConfig
from Mosaic_Gnerator_Improved.mosaic_builder import MosaicPipeline as ImprovedPipeline
ASSETS = Path('data/test_images')
ASSETS.mkdir(parents=True, exist_ok=True)
assert any(ASSETS.glob('*')), 'Place sample square images in data/test_images'



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Applications/Xcode.app/Contents/Developer/usr/bin/python3 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


ModuleNotFoundError: No module named 'Mosaic_Generator'

## 2. Benchmark Harness
Measure total runtime for selected resolutions and grid sizes.


In [None]:
def benchmark(image_path: Path, size: int, grid: int):
    img = Image.open(image_path).convert('RGB').resize((size, size))
    legacy_cfg = LegacyConfig(grid=grid, out_w=size, out_h=size, tiles_cache_dir='Mosaic_Gnerator_Improved/tile_cache')
    improved_cfg = ImprovedConfig(grid=grid, out_w=size, out_h=size, tiles_cache_dir='Mosaic_Gnerator_Improved/tile_cache')
    legacy = LegacyPipeline(legacy_cfg)
    improved = ImprovedPipeline(improved_cfg)
    t0 = time.perf_counter(); legacy.run_full_pipeline(img); legacy_time = time.perf_counter() - t0
    t1 = time.perf_counter(); improved.run_full_pipeline(img); improved_time = time.perf_counter() - t1
    return legacy_time, improved_time


## 3. Timing Sweep
Run benchmarks for multiple (image size, grid) combinations and tabulate results.


In [None]:
tests = [(256, 16), (512, 32), (1024, 64)]
sample = next(ASSETS.glob('*'))
results = []
for size, grid in tests:
    legacy_time, improved_time = benchmark(sample, size, grid)
    results.append((size, grid, legacy_time, improved_time, legacy_time / improved_time))
print('Image	Grid	Legacy(s)	Improved(s)	Speedup')
for image, grid, legacy_time, improved_time, speedup in results:
    print(f'{image}	{grid}	{legacy_time:.3f}	{improved_time:.3f}	{speedup:.1f}x')


## 4. cProfile Analysis
Capture function-level hotspots for both implementations.


In [None]:
def profile_pipeline(pipeline_cls, config, image, outfile):
    profiler = cProfile.Profile()
    profiler.enable()
    pipeline = pipeline_cls(config)
    pipeline.run_full_pipeline(image)
    profiler.disable()
    profiler.dump_stats(outfile)
    stats = pstats.Stats(profiler).strip_dirs().sort_stats('cumulative')
    stats.print_stats(15)
sample_img = Image.open(sample).convert('RGB').resize((512, 512))
profile_pipeline(LegacyPipeline, LegacyConfig(grid=32, out_w=512, out_h=512, tiles_cache_dir='Mosaic_Gnerator_Improved/tile_cache'), sample_img, 'legacy.prof')
profile_pipeline(ImprovedPipeline, ImprovedConfig(grid=32, out_w=512, out_h=512, tiles_cache_dir='Mosaic_Gnerator_Improved/tile_cache'), sample_img, 'improved.prof')


## 5. line_profiler Deep Dive
Instrument the slowest baseline functions to inspect line-level bottlenecks.


In [None]:
from line_profiler import LineProfiler
legacy_pipeline = LegacyPipeline(LegacyConfig(grid=32, out_w=512, out_h=512, tiles_cache_dir='Mosaic_Gnerator_Improved/tile_cache'))
legacy_generator = legacy_pipeline.mosaic_generator
lp = LineProfiler()
lp.add_function(legacy_generator.analyze_grid_cells)
lp.add_function(legacy_generator.map_tiles_to_grid)
lp_wrapper = lp(legacy_pipeline.run_full_pipeline)
lp_wrapper(sample_img)
lp.print_stats()


## 6. Bottlenecks Identified
- Tile matching loops (baseline `map_tiles_to_grid`).
- Repeated tile downloads (baseline `_load_tiles_from_source`).
- Python grid mean computation (`utils.cell_means`).
Replacement strategies live in the improved code path (vectorized distance matrices, disk+memory cache, block reshape).
