## SDXL_NBG

In [1]:
import os
import subprocess
import torch
import psutil
from diffusers import DiffusionPipeline
from transformers import pipeline
from PIL import Image
import time

# Install required libraries
subprocess.run("pip install diffusers transformers accelerate safetensors psutil", shell=True, check=True)

# Path to save final images without background
save_path = "/content/SDXL_NBG"

# Ensure the save directory exists
os.makedirs(save_path, exist_ok=True)

def get_memory_usage():
    """Returns memory usage in MB"""
    if torch.cuda.is_available():
        # For GPU memory usage
        return torch.cuda.memory_allocated() / (1024 ** 2)
    else:
        # For CPU memory usage
        process = psutil.Process(os.getpid())
        return process.memory_info().rss / (1024 ** 2)  # in MB

def sdxl_bg_remove(prompt, save_path):
    # Check if cuda is available
    use_cuda = torch.cuda.is_available()
    # Set proper device based on cuda availability
    device = torch.device("cuda" if use_cuda else "cpu")

    # Load Stable Diffusion XL for image generation
    pipe = DiffusionPipeline.from_pretrained(
        "stabilityai/stable-diffusion-xl-base-1.0",
        torch_dtype=torch.float16,
        use_safetensors=True,
        variant="fp16"
    )
    pipe.to(device)

    # Generate image from prompt and track time/memory
    start_time = time.time()
    start_mem = get_memory_usage()
    gen_image = pipe(prompt=prompt).images[0]
    sdxl_time = time.time() - start_time
    sdxl_memory = get_memory_usage() - start_mem

    # Background removal pipeline
    start_time = time.time()
    start_mem = get_memory_usage()
    bg_removal_pipe = pipeline("image-segmentation", model="briaai/RMBG-1.4", trust_remote_code=True, device=device)
    result = bg_removal_pipe(gen_image)
    final_image = result.convert('RGB')
    bg_removal_time = time.time() - start_time
    bg_removal_memory = get_memory_usage() - start_mem

    # Calculate total time and memory
    total_time = sdxl_time + bg_removal_time
    total_memory = sdxl_memory + bg_removal_memory

    # Print summarized output
    print(f"{prompt} - Total time: {total_time:.4f} seconds, Total memory: {total_memory:.2f} MB")

    # Save final image in the specified folder
    image_save_path = os.path.join(save_path, f"{prompt.replace(' ', '_')}.png")
    final_image.save(image_save_path)
    print(f"Image saved to: {image_save_path}")

if __name__ == '__main__':
    # Manually defined prompts
    prompts = ["A black hat"]

    # Iterate over each prompt and generate the corresponding image
    for prompt in prompts:
        print(f"Generating image for prompt: {prompt}")
        sdxl_bg_remove(prompt, save_path)


The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

Generating image for prompt: A black hat


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model_index.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

Fetching 19 files:   0%|          | 0/19 [00:00<?, ?it/s]

text_encoder_2/config.json:   0%|          | 0.00/575 [00:00<?, ?B/s]

scheduler/scheduler_config.json:   0%|          | 0.00/479 [00:00<?, ?B/s]

tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer/special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

text_encoder/config.json:   0%|          | 0.00/565 [00:00<?, ?B/s]

tokenizer/tokenizer_config.json:   0%|          | 0.00/737 [00:00<?, ?B/s]

model.fp16.safetensors:   0%|          | 0.00/246M [00:00<?, ?B/s]

model.fp16.safetensors:   0%|          | 0.00/1.39G [00:00<?, ?B/s]

tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

tokenizer_2/special_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

diffusion_pytorch_model.fp16.safetensors:   0%|          | 0.00/5.14G [00:00<?, ?B/s]

unet/config.json:   0%|          | 0.00/1.68k [00:00<?, ?B/s]

tokenizer_2/tokenizer_config.json:   0%|          | 0.00/725 [00:00<?, ?B/s]

vae/config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

diffusion_pytorch_model.fp16.safetensors:   0%|          | 0.00/167M [00:00<?, ?B/s]

diffusion_pytorch_model.fp16.safetensors:   0%|          | 0.00/167M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/548 [00:00<?, ?B/s]

MyConfig.py:   0%|          | 0.00/326 [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/briaai/RMBG-1.4:
- MyConfig.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


MyPipe.py:   0%|          | 0.00/2.92k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/briaai/RMBG-1.4:
- MyPipe.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


briarmbg.py:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/briaai/RMBG-1.4:
- briarmbg.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/176M [00:00<?, ?B/s]

A black hat - Total time: 14.9339 seconds, Total memory: 175.34 MB
Image saved to: /content/SDXL_NBG/A_black_hat.png


## Init code

In [2]:
%%capture
# Clone the repository
!git clone https://github.com/szymanowiczs/splatter-image.git

In [3]:
# Change directory to the cloned repo
%cd splatter-image

/content/splatter-image


In [4]:
%%capture
!pip install -r requirements.txt
!pip install rembg
!pip install omegaconf

In [5]:
%%capture
!git clone https://github.com/graphdeco-inria/diff-gaussian-rasterization

In [6]:
%cd diff-gaussian-rasterization

/content/splatter-image/diff-gaussian-rasterization


In [7]:
%%capture
!apt-get install -y libglm-dev

In [8]:
%%capture
!python setup.py build_ext --inplace

In [9]:
import sys
sys.path.append('/content/splatter-image')
sys.path.append('diff-gaussian-rasterization')

In [10]:
!pip install onnxruntime-gpu

Collecting onnxruntime-gpu
  Downloading onnxruntime_gpu-1.20.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting coloredlogs (from onnxruntime-gpu)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime-gpu)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading onnxruntime_gpu-1.20.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (291.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m291.5/291.5 MB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hIns

In [11]:
import torch
import torchvision
import numpy as np
import os
from omegaconf import OmegaConf
from PIL import Image

from utils.app_utils import (
    remove_background,
    resize_foreground,
    set_white_background,
    resize_to_128,
    to_tensor,
    get_source_camera_v2w_rmo_and_quats,
    get_target_cameras,
    export_to_obj
)

import imageio
import rembg
from huggingface_hub import hf_hub_download
from scene.gaussian_predictor import GaussianSplatPredictor
from scene.gaussian_predictor import GaussianSplatPredictor
from gaussian_renderer import render_predicted

## load model

In [12]:
import requests

# URL of the file in the space
url = "https://huggingface.co/spaces/Abdulrahman1989/TextTo3D/resolve/main/fused_text_to_3D.pth"

# Download and save the file locally
response = requests.get(url)
if response.status_code == 200:
    with open("fused_text_to_3D.pth", "wb") as f:
        f.write(response.content)
    print("File downloaded successfully.")
else:
    print(f"Failed to download the file. Status code: {response.status_code}")

File downloaded successfully.


In [13]:
%%capture
# Load model configuration
from omegaconf import OmegaConf
import os

# Specify the path directly
config_path = "/content/splatter-image/gradio_config.yaml"  # Replace with the actual path
model_cfg = OmegaConf.load(config_path)

model_path = "fused_text_to_3D.pth"  # Local path after download
model = GaussianSplatPredictor(model_cfg)
ckpt_loaded = torch.load(model_path, map_location="cuda")
model.load_state_dict(ckpt_loaded["model_state_dict"])
model.to("cuda")

## Inference

In [14]:
import torch
import rembg

@torch.no_grad()
def preprocess(input_image, preprocess_background=True, foreground_ratio=0.65):
    # Create a new Rembg session
    rembg_session = rembg.new_session()

    # Preprocess input image
    if preprocess_background:
        image = input_image.convert("RGB")
        image = remove_background(image, rembg_session)
        image = resize_foreground(image, foreground_ratio)
        image = set_white_background(image)
    else:
        image = input_image
        if image.mode == "RGBA":
            image = set_white_background(image)

    image = resize_to_128(image)

    return image

In [15]:
import pprint

def reconstruct_and_export(image, image_name):
    """
    Passes image through model and outputs the reconstruction.
    """
    device = "cuda"
    image_tensor = to_tensor(image).to(device)
    view_to_world_source, rot_transform_quats = get_source_camera_v2w_rmo_and_quats()
    view_to_world_source = view_to_world_source.to(device)
    rot_transform_quats = rot_transform_quats.to(device)

    reconstruction_unactivated = model(
        image_tensor.unsqueeze(0).unsqueeze(0),
        view_to_world_source,
        rot_transform_quats,
        None,
        activate_output=False
    )


    reconstruction = {k: v[0].contiguous() for k, v in reconstruction_unactivated.items()}
    reconstruction["scaling"] = model.scaling_activation(reconstruction["scaling"])
    reconstruction["opacity"] = model.opacity_activation(reconstruction["opacity"])

    # Render images in a loop
    world_view_transforms, full_proj_transforms, camera_centers = get_target_cameras()
    background = torch.tensor([1, 1, 1], dtype=torch.float32, device=device)
    loop_renders = []
    t_to_512 = torchvision.transforms.Resize(512, interpolation=torchvision.transforms.InterpolationMode.NEAREST)

    for r_idx in range(world_view_transforms.shape[0]):
        rendered_image = render_predicted(
            reconstruction,
            world_view_transforms[r_idx].to(device),
            full_proj_transforms[r_idx].to(device),
            camera_centers[r_idx].to(device),
            background,
            model_cfg,
            focals_pixels=None
        )["render"]
        rendered_image = t_to_512(rendered_image)
        loop_renders.append(torch.clamp(rendered_image * 255, 0.0, 255.0).detach().permute(1, 2, 0).cpu().numpy().astype(np.uint8))

    imageio.mimsave(f"{image_name}.mp4", loop_renders, fps=25)

    export_to_obj(reconstruction_unactivated, f"{image_name}_mesh.ply")

    return f"{image_name}_mesh.ply", f"{image_name}.mp4"

In [16]:
import os
from PIL import Image
import numpy as np

def process_images_in_folder(folder_path):
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):  # Check for image files
            image_path = os.path.join(folder_path, filename)
            print(f"Processing image: {image_path}")

            image = Image.open(image_path)
            process_image = preprocess(image, preprocess_background=True, foreground_ratio=0.65)

            # Perform reconstruction and export results
            ply_out_path, loop_out_path = reconstruct_and_export(np.array(process_image), image_path)

            print(f"3D model saved to {ply_out_path}")
            print(f"Video render saved to {loop_out_path}")

# Usage
folder_path = '/content/SDXL_NBG'
process_images_in_folder(folder_path)

Downloading data from 'https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2net.onnx' to file '/root/.u2net/u2net.onnx'.


Processing image: /content/SDXL_NBG/A_black_hat.png


100%|████████████████████████████████████████| 176M/176M [00:00<00:00, 129GB/s]


*************** EP Error ***************
EP Error /onnxruntime_src/onnxruntime/python/onnxruntime_pybind_state.cc:507 void onnxruntime::python::RegisterTensorRTPluginsAsCustomOps(PySessionOptions&, const onnxruntime::ProviderOptions&) Please install TensorRT libraries as mentioned in the GPU requirements page, make sure they're in the PATH or LD_LIBRARY_PATH, and that your GPU is supported.
 when using ['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']
Falling back to ['CUDAExecutionProvider', 'CPUExecutionProvider'] and retrying.
****************************************
3D model saved to /content/SDXL_NBG/A_black_hat.png_mesh.ply
Video render saved to /content/SDXL_NBG/A_black_hat.png.mp4
