# üó∫Ô∏è Classical IPM (Inverse Perspective Mapping)
## Transform Camera Images to Bird's Eye View

**Goal:** Understand geometric transformation from perspective to BEV

In [None]:
import sys
sys.path.append('..')

import numpy as np
import matplotlib.pyplot as plt
from nuscenes.nuscenes import NuScenes
from PIL import Image
import os

from src.models.ipm import InversePerspectiveMapping
from src.data.dataset import NuScenesMultiViewDataset

## 1. Load Data

In [None]:
# Load dataset
dataset = NuScenesMultiViewDataset(
    data_root='../data/nuscenes',
    version='v1.0-mini',
    split='train',
    image_size=(224, 400)
)

# Get one sample
sample = dataset[0]
print(f"Loaded sample with {sample['images'].shape[0]} cameras")

## 2. Apply IPM to Front Camera

In [None]:
# Create IPM transformer
ipm = InversePerspectiveMapping(
    image_size=(224, 400),
    bev_size=(200, 200),
    bev_range=(-25, 25, 0, 50)  # 25m left/right, 50m forward
)

# Get front camera (index 0)
front_image = sample['images'][0].numpy().transpose(1, 2, 0)  # (H, W, 3)
front_image = (front_image * 255).astype(np.uint8)
front_K = sample['intrinsics'][0].numpy()
front_extrinsics = sample['extrinsics'][0].numpy()

# Transform to BEV
bev_image = ipm.transform_image_to_bev(
    front_image,
    front_K,
    front_extrinsics
)

print(f"Input image: {front_image.shape}")
print(f"Output BEV: {bev_image.shape}")

## 3. Visualize: Camera vs BEV

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Camera view
axes[0].imshow(front_image)
axes[0].set_title('Camera View (Perspective)', fontsize=14, fontweight='bold')
axes[0].axis('off')

# BEV view
axes[1].imshow(bev_image, origin='lower')
axes[1].set_title('Bird\'s Eye View (IPM)', fontsize=14, fontweight='bold')
axes[1].set_xlabel('X (lateral, meters)', fontsize=12)
axes[1].set_ylabel('Y (forward, meters)', fontsize=12)

# Add grid
axes[1].grid(True, alpha=0.3, color='white', linewidth=0.5)

plt.tight_layout()
plt.savefig('../results/images/ipm_transformation.png', dpi=150, bbox_inches='tight')
plt.show()

print("‚úÖ Saved to results/images/ipm_transformation.png")

## 4. Multi-Camera BEV Fusion

In [None]:
# Transform all 6 cameras to BEV
fig, axes = plt.subplots(2, 3, figsize=(20, 12))

cameras = dataset.cameras

for idx in range(6):
    # Get camera data
    img = sample['images'][idx].numpy().transpose(1, 2, 0)
    img = (img * 255).astype(np.uint8)
    K = sample['intrinsics'][idx].numpy()
    extrinsics = sample['extrinsics'][idx].numpy()
    
    # Transform to BEV
    bev = ipm.transform_image_to_bev(img, K, extrinsics)
    
    # Plot
    ax = axes[idx // 3, idx % 3]
    ax.imshow(bev, origin='lower')
    ax.set_title(f"{cameras[idx]} ‚Üí BEV", fontsize=12, fontweight='bold')
    ax.axis('off')

plt.suptitle('IPM Applied to All 6 Cameras', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig('../results/images/ipm_all_cameras.png', dpi=150, bbox_inches='tight')
plt.show()

print("‚úÖ Saved to results/images/ipm_all_cameras.png")

## 5. Analyze IPM Limitations

**What IPM does well:**
- ‚úÖ Road surface transformation
- ‚úÖ Lane markings
- ‚úÖ Fast (no neural network)
- ‚úÖ Interpretable (pure geometry)

**What IPM fails at:**
- ‚ùå 3D objects (cars, pedestrians)
- ‚ùå Elevated structures (bridges, signs)
- ‚ùå Non-flat terrain (hills, ramps)
- ‚ùå Occlusions

In [None]:
# Zoom into BEV to see distortions on cars
plt.figure(figsize=(10, 10))
plt.imshow(bev_image, origin='lower')
plt.title('IPM Result - Notice Cars are Distorted!', fontsize=14, fontweight='bold')
plt.xlabel('X (meters)')
plt.ylabel('Y (meters)')
plt.grid(True, alpha=0.3)
plt.savefig('../results/images/ipm_limitations.png', dpi=150, bbox_inches='tight')
plt.show()

print("\nüí° Key Observation:")
print("   Cars appear 'stretched' and distorted in BEV")
print("   This is because IPM assumes everything is on the ground (Z=0)")
print("   But cars have height! They stick up above the ground.")
print("\n   This is why we need NEURAL methods (LSS) next!")

## ‚úÖ Phase 2 Complete!

**What we learned:**
- ‚úÖ IPM uses homography (3√ó3 matrix) for transformation
- ‚úÖ Works by assuming flat ground plane (Z=0)
- ‚úÖ Fast and interpretable
- ‚úÖ But fails for 3D objects

**Next: Implement LSS (Neural BEV Transformation)**
- Predict depth for each pixel
- Lift to 3D, splat to voxels, shoot to BEV
- Handle 3D objects correctly!