# Assignment 4: Augmented Reality with PyTorch3D

**Complete AR Pipeline Implementation**

## Setup (Run Once)

In [None]:
# Setup - Clear cache and install dependencies
!rm -rf compuer_vision_assignment4
!git clone https://github.com/Keval-7503/compuer_vision_assignment4.git
%cd compuer_vision_assignment4
!git pull
!pip install -q torch torchvision fvcore iopath
!pip install -q "git+https://github.com/facebookresearch/pytorch3d.git"

## Option 1: Run Demo Pipeline (Default)

In [None]:
# Run default demo
from run_ar_pipeline import run_ar_pipeline
result = run_ar_pipeline()

---

**Pipeline complete!** Results saved to `results/` folder.

All AR processing happens in the backend code (`src/` folder).

## Option 2: Use Your Own Image

In [None]:
# Step 4: Run AR Pipeline
from run_ar_pipeline import run_ar_pipeline_custom

print("Running AR pipeline...")
ar_result, rendered, R, t, rmse = run_ar_pipeline_custom(
    image_path=image_path,
    image_points_2d=image_points_2d,
    object_width=object_width,
    object_height=object_height
)

# Display result
result_img = cv2.imread('results/custom_ar_result.png')
result_rgb = cv2.cvtColor(result_img, cv2.COLOR_BGR2RGB)

plt.figure(figsize=(15, 10))
plt.imshow(result_rgb)
plt.title("Augmented Reality Result")
plt.axis('off')
plt.show()

print(f"\nCamera Pose RMSE: {rmse:.4f} pixels")
print("Saved to 'results/custom_ar_result.png'")

In [None]:
# Step 3: Specify object dimensions
print("Enter the real-world dimensions of your planar object:")
print("(e.g., A4 paper = 21cm x 29.7cm, Notebook = 20cm x 25cm)")
object_width_cm = float(input("Width in cm: "))
object_height_cm = float(input("Height in cm: "))

object_width = object_width_cm / 100
object_height = object_height_cm / 100
print(f"\nObject size: {object_width}m x {object_height}m")

In [None]:
# Step 2: Mark the 4 corners of your planar object
import numpy as np
import cv2
import matplotlib.pyplot as plt

# Load and display image
img = cv2.imread(image_path)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
height, width = img.shape[:2]

print(f"Image size: {width} x {height}")
print("\nInstructions:")
print("Look at the image below and identify the 4 corners")
print("Order: Top-Left, Top-Right, Bottom-Right, Bottom-Left\n")

plt.figure(figsize=(12, 8))
plt.imshow(img_rgb)
plt.title("Identify the 4 corners of your planar object")
plt.grid(True, alpha=0.3)
plt.show()

# Input corners
print("Enter corner coordinates (x, y):")
tl_x = int(input("Top-Left X: "))
tl_y = int(input("Top-Left Y: "))
tr_x = int(input("Top-Right X: "))
tr_y = int(input("Top-Right Y: "))
br_x = int(input("Bottom-Right X: "))
br_y = int(input("Bottom-Right Y: "))
bl_x = int(input("Bottom-Left X: "))
bl_y = int(input("Bottom-Left Y: "))

image_points_2d = np.array([
    [tl_x, tl_y], [tr_x, tr_y], [br_x, br_y], [bl_x, bl_y]
], dtype=np.float32)

# Show marked corners
img_marked = img_rgb.copy()
for i, pt in enumerate(image_points_2d):
    pt_int = tuple(pt.astype(int))
    cv2.circle(img_marked, pt_int, 10, (255, 0, 0), -1)
    cv2.putText(img_marked, str(i+1), (pt_int[0]+15, pt_int[1]), 
                cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 0, 0), 3)

for i in range(4):
    pt1 = tuple(image_points_2d[i].astype(int))
    pt2 = tuple(image_points_2d[(i+1)%4].astype(int))
    cv2.line(img_marked, pt1, pt2, (0, 255, 0), 3)

plt.figure(figsize=(12, 8))
plt.imshow(img_marked)
plt.title("Marked Corners")
plt.show()
print("Corners marked!")

In [None]:
# Step 1: Upload your image
from google.colab import files
import shutil

print("Upload an image with a flat planar object (book, paper, notebook, etc.)")
uploaded = files.upload()

# Save uploaded file
image_filename = list(uploaded.keys())[0]
image_path = f"uploaded_{image_filename}"
shutil.move(image_filename, image_path)
print(f"\nImage saved as: {image_path}")