In [15]:
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
from controlnet_aux import MidasDetector
import torch
from PIL import Image
import os
from tqdm import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load ControlNet Depth Model
controlnet = ControlNetModel.from_pretrained(
    "lllyasviel/sd-controlnet-depth",
    torch_dtype=torch.float16 if device=="cuda" else torch.float32
)

# Load Stable Diffusion Pipeline
pipe = StableDiffusionControlNetPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    controlnet=controlnet,
    torch_dtype=torch.float16 if device=="cuda" else torch.float32
).to(device)

pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

# Load Midas Depth Estimator
depth_estimator = MidasDetector.from_pretrained("lllyasviel/ControlNet").to(device)

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

In [20]:
from PIL import Image
import os

# Load the original image
image_path = "output_20250423_194637.png"  # Change to your file path
image = Image.open(image_path)

# Create output directory
output_dir = "context"
os.makedirs(output_dir, exist_ok=True)

# Set grid size
rows, cols = 2, 3
width, height = image.size
cell_width = width // cols
cell_height = height // rows

# Split image and save
for row in range(rows):
    for col in range(cols):
        left = col * cell_width
        upper = row * cell_height
        right = left + cell_width
        lower = upper + cell_height
        cropped = image.crop((left, upper, right, lower))
        cropped.save(os.path.join(output_dir, f"nandi_angle_{row*cols + col + 1}.png"))

print("Images saved to:", output_dir)


Images saved to: context


In [21]:
input_folder = "D:\\college\\imp-doc\\sem6\\GENAI\\project\\3D-Reconstruction-of-Monuments\\context"
output_folder = "output_images"
os.makedirs(output_folder, exist_ok=True)

# Process all images
for filename in tqdm(os.listdir(input_folder)):
    if filename.endswith(('.png', '.jpg', '.jpeg')):
        image_path = os.path.join(input_folder, filename)
        input_image = Image.open(image_path).convert("RGB")

        # Generate Depth Map
        depth_map = depth_estimator(input_image)
        depth_map = depth_map.resize(input_image.size)

        # Prompt for generation
        prompt = "highly detailed, same object but from a different angle, different perspective, realistic DSLR photo"

        # Generate new view
        output = pipe(prompt=prompt, image=depth_map, num_inference_steps=30).images[0]

        # Save Outputs
        output.save(os.path.join(output_folder, f"output_{filename}"))
        depth_map.save(os.path.join(output_folder, f"depth_{filename}"))

print("Done! Check the output_images folder.")

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

 17%|█▋        | 1/6 [00:49<04:06, 49.35s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

 33%|███▎      | 2/6 [01:29<02:56, 44.17s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

 50%|█████     | 3/6 [02:55<03:09, 63.23s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

 67%|██████▋   | 4/6 [03:31<01:44, 52.27s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

 83%|████████▎ | 5/6 [04:25<00:53, 53.05s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

100%|██████████| 6/6 [04:57<00:00, 49.63s/it]

Done! Check the output_images folder.





In [24]:
import os
import cv2
import numpy as np
import open3d as o3d

# Path to your output_images directory
input_folder = "D:\\college\\imp-doc\\sem6\\GENAI\\project\\3D-Reconstruction-of-Monuments\\output_images"

# Camera intrinsics
fx, fy = 615.426, 615.426
cx, cy = 312.500, 249.500
depth_scale = 5.0 / 254  # adjust if using mm, etc.

# Initialize a global point cloud
global_pcd = o3d.geometry.PointCloud()

# Iterate over all depth image files
for filename in sorted(os.listdir(input_folder)):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')) and "output" in filename:
        image_path = os.path.join(input_folder, filename)
        depth = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)

        if depth is None:
            print(f"Error loading {filename}")
            continue

        if len(depth.shape) == 3:
            depth = cv2.cvtColor(depth, cv2.COLOR_BGR2GRAY)

        depth = cv2.bilateralFilter(depth, d=5, sigmaColor=75, sigmaSpace=75)

        points = []
        height, width = depth.shape

        for v in range(height):
            for u in range(width):
                d = depth[v, u]
                Z = d * depth_scale
                if Z < 0.1 or Z > 5.0:
                    continue
                X = (u - cx) * Z / fx
                Y = (v - cy) * Z / fy
                points.append([X, Y, Z])

        # Convert to Open3D point cloud
        pcd = o3d.geometry.PointCloud()
        pcd.points = o3d.utility.Vector3dVector(np.array(points))

        # Transform this cloud to simulate different camera angles
        # In real scenarios, you'd get these from camera extrinsics
        # For demo, simulate a rotating camera (example rotation)
        angle_rad = np.radians(10 * len(global_pcd.points) // 100000)  # example increment
        R = pcd.get_rotation_matrix_from_xyz((0, angle_rad, 0))
        pcd.rotate(R, center=(0, 0, 0))  # Simulate motion by rotating each point cloud

        # Merge with the global point cloud
        global_pcd += pcd

        print(f"Added {filename}, total points so far: {len(global_pcd.points)}")

# Clean the merged point cloud
global_pcd = global_pcd.voxel_down_sample(voxel_size=0.005)
global_pcd, _ = global_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)

# Visualize the full 3D reconstruction
o3d.visualization.draw_geometries([global_pcd])


Added output_nandi_angle_1.png, total points so far: 98622
Added output_nandi_angle_2.png, total points so far: 197037
Added output_nandi_angle_3.png, total points so far: 294979
Added output_nandi_angle_4.png, total points so far: 382966
Added output_nandi_angle_5.png, total points so far: 480132
Added output_nandi_angle_6.png, total points so far: 571800
