In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install torch torchvision opencv-python open3d timm

Collecting open3d
  Downloading open3d-0.19.0-cp311-cp311-manylinux_2_31_x86_64.whl.metadata (4.3 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (

In [3]:
!pip install open3d -q --no-cache-dir


In [5]:
import torch
import cv2
import numpy as np
import open3d as o3d
from torchvision.transforms import Compose, Resize, ToTensor, Normalize
from PIL import Image

def load_midas():
    model_type = "DPT_Large"

    midas = torch.hub.load("intel-isl/MiDaS", model_type, trust_repo=True)
    midas.eval()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    midas.to(device)

    if model_type in ["DPT_Large", "DPT_Hybrid"]:
        transform = torch.hub.load("intel-isl/MiDaS", "transforms", trust_repo=True).dpt_transform
    else:
        transform = torch.hub.load("intel-isl/MiDaS", "transforms", trust_repo=True).small_transform

    return midas, transform, device



In [6]:
def estimate_depth(image_path, midas, transform, device):
    from torchvision.transforms.functional import to_tensor

    img = Image.open(image_path).convert("RGB")
    img_np = np.array(img).astype(np.float32)

    sample = transform(img_np)
    if isinstance(sample, dict):
        input_tensor = sample["image"].to(device)
    else:
        input_tensor = sample.to(device)

    print(f"Input tensor shape: {input_tensor.shape}")

    if len(input_tensor.shape) == 3:
        input_tensor = input_tensor.unsqueeze(0)


    print(f"Input tensor shape after adding batch dimension: {input_tensor.shape}")

    with torch.no_grad():
        prediction = midas(input_tensor)  # Prediction should now work fine
        prediction = torch.nn.functional.interpolate(
            prediction.unsqueeze(1),
            size=img_np.shape[:2],
            mode="bicubic",
            align_corners=False,
        ).squeeze()

    depth = prediction.cpu().numpy()
    return img_np, depth

In [7]:
def depth_to_point_cloud(img, depth):
    h, w = depth.shape
    fx = fy = 0.5 * w
    cx, cy = w // 2, h // 2

    i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy')
    z = depth
    x = (i - cx) * z / fx
    y = (j - cy) * z / fy

    xyz = np.stack((x, y, z), axis=2).reshape(-1, 3)
    colors = img.reshape(-1, 3) / 255.0

    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(xyz)
    pcd.colors = o3d.utility.Vector3dVector(colors)
    return pcd


def point_cloud_to_mesh(pcd):
    pcd.estimate_normals()
    poisson_mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(pcd, depth=9)
    return poisson_mesh


def main(image_path):
    midas, transform, device = load_midas()
    img, depth = estimate_depth(image_path, midas, transform, device)

    pcd = depth_to_point_cloud(img, depth)
    o3d.io.write_point_cloud("/content/point_cloud.ply", pcd)
    print("Saved point cloud to 'point_cloud.ply'")


    mesh = point_cloud_to_mesh(pcd)
    mesh.compute_vertex_normals()
    o3d.io.write_triangle_mesh("/content/reconstructed_mesh.ply", mesh)
    print("Saved reconstructed mesh to 'reconstructed_mesh.ply'")

if __name__ == "__main__":
    main("/content/drive/MyDrive/ring_outputs/ring_0_0.png")  # Replace with your image path


Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master
Downloading: "https://github.com/isl-org/MiDaS/releases/download/v3/dpt_large_384.pt" to /root/.cache/torch/hub/checkpoints/dpt_large_384.pt
100%|██████████| 1.28G/1.28G [00:10<00:00, 129MB/s]
Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master


Input tensor shape: torch.Size([1, 3, 384, 288])
Input tensor shape after adding batch dimension: torch.Size([1, 3, 384, 288])
Saved point cloud to 'point_cloud.ply'
Saved reconstructed mesh to 'reconstructed_mesh.ply'


In [8]:
import plotly.graph_objects as go

In [9]:
print("Load a ply point cloud, print it, and render it")
ply_point_cloud = o3d.data.PLYPointCloud()
pcd = o3d.io.read_point_cloud("/content/point_cloud.ply")
points = np.asarray(pcd.points)
print(pcd)
print(np.asarray(pcd.points))

Load a ply point cloud, print it, and render it
[Open3D INFO] Downloading https://github.com/isl-org/open3d_downloads/releases/download/20220201-data/fragment.ply
[Open3D INFO] Downloaded to /root/open3d_data/download/PLYPointCloud/fragment.ply
PointCloud with 2365 points.
[[-1.05298188 -1.35383385  1.07805288]
 [-1.0210931  -1.37847569  1.09767509]
 [-1.00941343 -1.43442961  1.14223099]
 ...
 [22.18056976 31.51975703 25.09906578]
 [23.29072464 31.44247827 25.03752899]
 [24.35169158 31.30931774 24.93149376]]


In [10]:
colors = None
if pcd.has_colors():
    colors = np.asarray(pcd.colors)
elif pcd.has_normals():
    colors = (0.5, 0.5, 0.5) + np.asarray(pcd.normals) * 0.5

In [11]:
fig = go.Figure(
  data=[
    go.Scatter3d(
      x=points[:,0], y=points[:,1], z=points[:,2],
      mode='markers',
      marker=dict(size=1, color=colors)
)
],
  layout=dict(
    scene=dict(
      xaxis=dict(visible=False),
      yaxis=dict(visible=False),
      zaxis=dict(visible=False)
)
)
)
fig.show()

In [12]:
print("Load a ply point cloud, print it, and render it")
ply_point_cloud = o3d.data.PLYPointCloud()
pcd = o3d.io.read_point_cloud("/content/reconstructed_mesh.ply")
points = np.asarray(pcd.points)
print(pcd)
print(np.asarray(pcd.points))

Load a ply point cloud, print it, and render it
PointCloud with 8135 points.
[[ -1.05260849  -1.42097855   0.24451637]
 [ -1.21109009  -1.22798157   0.24451637]
 [ -0.76000595  -1.49949265   0.24451637]
 ...
 [ 14.45536804  -8.54306412  28.67831612]
 [  9.77371216 -36.63298035  31.08900261]
 [ 12.1145401  -36.63298035  31.40366554]]


In [13]:
colors = None
if pcd.has_colors():
    colors = np.asarray(pcd.colors)
elif pcd.has_normals():
    colors = (0.5, 0.5, 0.5) + np.asarray(pcd.normals) * 0.5

In [14]:
fig = go.Figure(
  data=[
    go.Scatter3d(
      x=points[:,0], y=points[:,1], z=points[:,2],
      mode='markers',
      marker=dict(size=1, color=colors)
)
],
  layout=dict(
    scene=dict(
      xaxis=dict(visible=False),
      yaxis=dict(visible=False),
      zaxis=dict(visible=False)
)
)
)
fig.show()