In [3]:
!git clone https://github.com/pyimagesearch/TripoSR.git
import sys
sys.path.append('/content/TripoSR/tsr')
%cd TripoSR
!pip install -r requirements.txt -q

fatal: destination path 'TripoSR' already exists and is not an empty directory.
/content/TripoSR
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [2]:
!pip install onnxruntime



In [4]:
import torch
import os
import time
from PIL import Image
import numpy as np
from IPython.display import Video
from tsr.system import TSR
from tsr.utils import remove_background, resize_foreground, save_video
import pymeshlab as pymesh
import rembg

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [6]:
class Timer:
    def __init__(self):
        self.items = {}
        self.time_scale = 1000.0  # ms
        self.time_unit = "ms"
    def start(self, name: str) -> None:
        if torch.cuda.is_available():
            torch.cuda.synchronize()
        self.items[name] = time.time()
    def end(self, name: str) -> float:
        if name not in self.items:
            return
        if torch.cuda.is_available():
            torch.cuda.synchronize()
        start_time = self.items.pop(name)
        delta = time.time() - start_time
        t = delta * self.time_scale
        print(f"{name} finished in {t:.2f}{self.time_unit}.")
timer = Timer()

In [8]:
from google.colab import files
uploaded = files.upload()
original_image = Image.open(list(uploaded.keys())[0])
original_image.resize((512, 512)).save("examples/product.png")

Saving box.jpg to box (1).jpg


In [9]:
image_paths = "/content/TripoSR/examples/product.png"
device = "cuda:0"
pretrained_model_name_or_path = "stabilityai/TripoSR"
chunk_size = 8192
no_remove_bg = True
foreground_ratio = 0.85
output_dir = "output/"
model_save_format = "obj"
render = True
output_dir = output_dir.strip()
os.makedirs(output_dir, exist_ok=True)

In [10]:
!pip show pillow

Name: Pillow
Version: 10.1.0
Summary: Python Imaging Library (Fork)
Home-page: https://python-pillow.org
Author: Jeffrey A. Clark (Alex)
Author-email: aclark@aclark.net
License: HPND
Location: /usr/local/lib/python3.11/dist-packages
Requires: 
Required-by: bokeh, diffusers, dopamine_rl, fastai, gradio, imageio, matplotlib, PyMatting, rembg, scikit-image, sentence-transformers, torchtune, torchvision, wordcloud


In [11]:
timer.start("Initializing model")
model = TSR.from_pretrained(
    pretrained_model_name_or_path,
    config_name="config.yaml",
    weight_name="model.ckpt",
)
model.renderer.set_chunk_size(chunk_size)
model.to(device)
timer.end("Initializing model")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Initializing model finished in 15620.67ms.


In [13]:
timer.start("Processing images")
images = []
rembg_session = rembg.new_session()

# remove background — output image has alpha channel (RGBA)
image = remove_background(original_image, rembg_session)

# pass this image with alpha channel to resize_foreground
image = resize_foreground(image, foreground_ratio)
if image.mode == "RGBA":
    image = np.array(image).astype(np.float32) / 255.0
    image = image[:, :, :3] * image[:, :, 3:4] + (1 - image[:, :, 3:4]) * 0.5
    image = Image.fromarray((image * 255.0).astype(np.uint8))
image_dir = os.path.join(output_dir, str(0))
os.makedirs(image_dir, exist_ok=True)
image.save(os.path.join(image_dir, "input.png"))
images.append(image)
timer.end("Processing images")

Processing images finished in 2488.11ms.


In [14]:
for i, image in enumerate(images):
    print(f"Running image {i + 1}/{len(images)} ...")
    timer.start("Running model")
    with torch.no_grad():
        scene_codes = model([image], device=device)
    timer.end("Running model")

    if render:
        timer.start("Rendering")
        render_images = model.render(scene_codes, n_views=30, return_type="pil")
        for ri, render_image in enumerate(render_images[0]):
            render_image.save(os.path.join(output_dir, str(i), f"render_{ri:03d}.png"))
        save_video(
            render_images[0], os.path.join(output_dir, str(i), "render.mp4"), fps=30
        )
        timer.end("Rendering")

    timer.start("Exporting mesh")
    meshes = model.extract_mesh(scene_codes, has_vertex_color=False)
    mesh_file = os.path.join(output_dir, str(i), f"mesh.{model_save_format}")
    meshes[0].export(mesh_file)
    timer.end("Exporting mesh")
print("Processing complete.")

Running image 1/1 ...
Running model finished in 2019.24ms.


Please either pass the dim explicitly or simply use torch.linalg.cross.
The default value of dim will change to agree with that of linalg.cross in a future release. (Triggered internally at /pytorch/aten/src/ATen/native/Cross.cpp:62.)
  right = F.normalize(torch.cross(lookat, up), dim=-1)


Rendering finished in 33781.73ms.
Exporting mesh finished in 3492.76ms.
Processing complete.


In [15]:
obj_file = "/content/TripoSR/output/0/mesh.obj"
# Load the .obj mesh
ms = pymesh.MeshSet()
ms.load_new_mesh(obj_file)
mesh = ms.current_mesh()
# Convert to .stl format
stl_file = 'model.stl'
ms.save_current_mesh(stl_file)

In [16]:
Video('output/0/render.mp4', embed=True)