Installations

In [None]:
# Set non-interactive mode
!export DEBIAN_FRONTEND=noninteractive  > /dev/null 2>&1

# Preconfigure the keyboard layout selections directly
!echo "keyboard-configuration keyboard-configuration/layoutcode select English (US)" | sudo debconf-set-selections  > /dev/null 2>&1
!echo "keyboard-configuration keyboard-configuration/variantcode select English (US)" | sudo debconf-set-selections  > /dev/null 2>&1

# Install keyboard-configuration (after preconfiguring it)
!sudo DEBIAN_FRONTEND=noninteractive apt-get install -y keyboard-configuration > /dev/null 2>&1

# Install cuda 11.8
!sudo DEBIAN_FRONTEND=noninteractive apt-get upgrade -y  > /dev/null 2>&1
!sudo DEBIAN_FRONTEND=noninteractive apt-get install -y cuda-11-8  > /dev/null 2>&1

# Configure environment variables for CUDA
import os
os.environ['CUDA_HOME'] = '/usr/local/cuda-11.8'
os.environ['PATH'] = '/usr/local/cuda-11.8/bin:' + os.environ['PATH']
os.environ['LD_LIBRARY_PATH'] = '/usr/local/cuda-11.8/lib64:' + os.environ.get('LD_LIBRARY_PATH', '')

In [None]:
# converting to torch
!pip uninstall -y torch torchvision torchaudio torchtext > /dev/null 2>&1
!pip cache purge > /dev/null 2>&1

!pip install torch==2.1.0+cu118 torchvision==0.16.0+cu118 torchaudio==2.1.0+cu118 --index-url https://download.pytorch.org/whl/cu118 > /dev/null 2>&1
!pip install xformers==0.0.22.post4 --index-url https://download.pytorch.org/whl/cu118 > /dev/null 2>&1

In [None]:
import torch
import torchvision
import torchaudio

print("Torch version:", torch.__version__)
print("Torchvision version:", torchvision.__version__)
print("Torchaudio version:", torchaudio.__version__)
print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)

Torch version: 2.1.0+cu118
Torchvision version: 0.16.0+cu118
Torchaudio version: 2.1.0+cu118
CUDA available: True
CUDA version: 11.8


In [None]:
# datasets
!pip install datasets  > /dev/null 2>&1
!pip install nerfacc  > /dev/null 2>&1
!pip install jedi > /dev/null 2>&1

from datasets import load_dataset
from PIL import Image
import os

# Fourier123
!git clone https://github.com/Ysz2022/Fourier123 > /dev/null 2>&1
%cd Fourier123
!pip install -r requirements.txt > /dev/null 2>&1

# nvdiffrast
!git clone https://github.com/NVlabs/nvdiffrast.git > /dev/null 2>&1
%cd nvdiffrast
!python setup.py install > /dev/null 2>&1
%cd ..

# kiuikit
!git clone https://github.com/ashawkey/kiuikit.git > /dev/null 2>&1
%cd kiuikit
!python setup.py install > /dev/null 2>&1
import kiui
%cd ..

/content/Fourier123
/content/Fourier123/nvdiffrast
/content/Fourier123
/content/Fourier123/kiuikit
/content/Fourier123


In [None]:
# simple knn
%cd simple-knn
!python setup.py install > /dev/null 2>&1

%cd build/lib.linux-x86_64-cpython-310

from simple_knn._C import distCUDA2

%cd ../../..

/content/Fourier123/simple-knn
/content/Fourier123/simple-knn/build/lib.linux-x86_64-cpython-310
/content/Fourier123


In [None]:
# diff-gaussian-rasterization
!git clone --recursive https://github.com/ashawkey/diff-gaussian-rasterization > /dev/null 2>&1
%cd diff-gaussian-rasterization


!CXXFLAGS="-D_GLIBCXX_USE_CXX11_ABI=1 -w" python setup.py build_ext --inplace > /dev/null 2>&1

from diff_gaussian_rasterization import (
    GaussianRasterizationSettings,
    GaussianRasterizer,
)

%cd ..

/content/Fourier123/diff-gaussian-rasterization
/content/Fourier123


Pretrained LGM

In [None]:
!mkdir pretrained && cd pretrained
%cd pretrained
!wget -c https://huggingface.co/ashawkey/LGM/resolve/main/model_fp16_fixrot.safetensors > /dev/null
%cd ..

/content/Fourier123/pretrained
--2024-12-03 19:31:21--  https://huggingface.co/ashawkey/LGM/resolve/main/model_fp16_fixrot.safetensors
Resolving huggingface.co (huggingface.co)... 13.35.210.77, 13.35.210.66, 13.35.210.114, ...
Connecting to huggingface.co (huggingface.co)|13.35.210.77|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs-us-1.hf.co/repos/81/5c/815ca7d304c775226c89dc3b5bdb666e68555d4bf881050ec3d3b414f2c9c57b/744d6324656342c64f871308e73db97f0eb51858d94329b30090e986a6d050ab?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model_fp16_fixrot.safetensors%3B+filename%3D%22model_fp16_fixrot.safetensors%22%3B&Expires=1733513482&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTczMzUxMzQ4Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zLzgxLzVjLzgxNWNhN2QzMDRjNzc1MjI2Yzg5ZGMzYjViZGI2NjZlNjg1NTVkNGJmODgxMDUwZWMzZDNiNDE0ZjJjOWM1N2IvNzQ0ZDYzMjQ2NTYzNDJjNjRmODcxMzA4ZTczZGI5N2Y

Finetuning

In [None]:
from zero123 import Zero123Pipeline

class BrainScanToImagePipeline(Zero123Pipeline):
    def __init__(self, *args, encoder_weights, decoder_weights, **kwargs):
        super().__init__(*args, **kwargs)

        self.vae.decoder.load_state_dict(encoder_weights, strict=False)
        self.vae.encoder.load_state_dict(decoder_weights, strict=False)

    def _encode_brain_scan(self, brain_scan):
        # Normalize the input if needed
        brain_scan = (brain_scan - 0.5) * 2.0  # Scale to [-1, 1]

        # Use the decoder as the encoder
        latents = self.vae.decode(brain_scan).latent_dist.mode()  # Swapped role
        return latents

    def _decode_image(self, latents):
        # Use the encoder as the decoder
        images = self.vae.encode(latents).sample()  # Swapped role

        # Rescale the output to [0, 1]
        images = (images / 2 + 0.5).clamp(0, 1)
        return images

    @torch.no_grad()
    def __call__(self, brain_scan, **kwargs):
        # Encode brain scan into latents
        latents = self._encode_brain_scan(brain_scan)

        # Decode latents into an image
        image = self._decode_image(latents)

        return image

In [None]:
from safetensors.torch import load_file

state_dict = load_file('pretrained/model_fp16_fixrot.safetensors')

# Extract encoder weights
encoder_weights = {k: v for k, v in state_dict.items() if k.startswith('unet.down_blocks') or k.startswith('unet.conv_in')}

# Extract decoder weights
decoder_weights = {k: v for k, v in state_dict.items() if k.startswith('unet.up_blocks') or k.startswith('unet.conv_out')}

# Print the keys to verify
print("Encoder Weights:", encoder_weights.keys())
print("Decoder Weights:", decoder_weights.keys())

Encoder Weights: dict_keys(['unet.conv_in.bias', 'unet.conv_in.weight', 'unet.down_blocks.0.downsample.bias', 'unet.down_blocks.0.downsample.weight', 'unet.down_blocks.0.nets.0.conv1.bias', 'unet.down_blocks.0.nets.0.conv1.weight', 'unet.down_blocks.0.nets.0.conv2.bias', 'unet.down_blocks.0.nets.0.conv2.weight', 'unet.down_blocks.0.nets.0.norm1.bias', 'unet.down_blocks.0.nets.0.norm1.weight', 'unet.down_blocks.0.nets.0.norm2.bias', 'unet.down_blocks.0.nets.0.norm2.weight', 'unet.down_blocks.0.nets.1.conv1.bias', 'unet.down_blocks.0.nets.1.conv1.weight', 'unet.down_blocks.0.nets.1.conv2.bias', 'unet.down_blocks.0.nets.1.conv2.weight', 'unet.down_blocks.0.nets.1.norm1.bias', 'unet.down_blocks.0.nets.1.norm1.weight', 'unet.down_blocks.0.nets.1.norm2.bias', 'unet.down_blocks.0.nets.1.norm2.weight', 'unet.down_blocks.1.downsample.bias', 'unet.down_blocks.1.downsample.weight', 'unet.down_blocks.1.nets.0.conv1.bias', 'unet.down_blocks.1.nets.0.conv1.weight', 'unet.down_blocks.1.nets.0.conv2.b

In [None]:
print(state_dict.keys())

dict_keys(['conv.bias', 'conv.weight', 'unet.conv_in.bias', 'unet.conv_in.weight', 'unet.conv_out.bias', 'unet.conv_out.weight', 'unet.down_blocks.0.downsample.bias', 'unet.down_blocks.0.downsample.weight', 'unet.down_blocks.0.nets.0.conv1.bias', 'unet.down_blocks.0.nets.0.conv1.weight', 'unet.down_blocks.0.nets.0.conv2.bias', 'unet.down_blocks.0.nets.0.conv2.weight', 'unet.down_blocks.0.nets.0.norm1.bias', 'unet.down_blocks.0.nets.0.norm1.weight', 'unet.down_blocks.0.nets.0.norm2.bias', 'unet.down_blocks.0.nets.0.norm2.weight', 'unet.down_blocks.0.nets.1.conv1.bias', 'unet.down_blocks.0.nets.1.conv1.weight', 'unet.down_blocks.0.nets.1.conv2.bias', 'unet.down_blocks.0.nets.1.conv2.weight', 'unet.down_blocks.0.nets.1.norm1.bias', 'unet.down_blocks.0.nets.1.norm1.weight', 'unet.down_blocks.0.nets.1.norm2.bias', 'unet.down_blocks.0.nets.1.norm2.weight', 'unet.down_blocks.1.downsample.bias', 'unet.down_blocks.1.downsample.weight', 'unet.down_blocks.1.nets.0.conv1.bias', 'unet.down_blocks.1

In [None]:
from main import GUI

In [None]:
path = '/path/to/your/model_checkpoint.ply'

!python main.py --config configs/image.yaml --load {path} --train

Inference and prediction

In [None]:
image = "Screenshot_rgba"
image_path = "data/"+image+".png"

!python process.py data/Screenshot.png --size 512
img = Image.open(image_path).convert("RGB")
img.save(image_path, format='PNG')

!python infer_lgm.py big --resume pretrained/model_fp16_fixrot.safetensors --workspace workspace --test_path {image_path} > /dev/null 2>&1

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!chmod +r data/{image}.png

In [None]:
!pwd

/content/Fourier123


In [None]:
!CUDA_VISIBLE_DEVICES=0 python main.py --config configs/image.yaml input={image_path} save_path=output_name load=workspace/{image}.ply

[INFO] load image from data/Screenshot_rgba.png...
Number of points at loading :  43219
[INFO] loading SD...
2024-10-28 00:53:19.724932: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-28 00:53:19.749797: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-28 00:53:19.757549: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
model_index.json: 100% 543/543 [00:00<00:00, 3.36MB/s]
Fetching 13 files:   0% 0/13 [00:00<?, ?it/s]
(…)ature_extractor/preprocessor_config.json: 100% 342/342 [00:00<00:00, 2.95MB/s]
Fetching 13 files:   8% 1/13 [00:00<00:02,  5.16it/s]
text_encoder/config

In [None]:
### 3D Gaussian visualization
!CUDA_VISIBLE_DEVICES=0 python see.py --config configs/image.yaml workspace=workspace load=logs/output_name_model.ply

Number of points at loading :  40168


In [None]:
### Extract glb mesh from ply
!python convert.py big --test_path workspace/{image}.ply

[2K[32m(  ●   )[0m [1;33mGridEncoder: Setting up CUDA (This may take a few minutes the first time)[0m
[1A[2KNumber of points at loading :  43219
[INFO] fitting nerf...
[2K[32m( ●    )[0m [1;33mNerfAcc: Setting up CUDA (This may take a few minutes the first time)[0m
MSE = 0.000844: 100% 512/512 [06:05<00:00,  1.40it/s]
[INFO] finished fitting nerf!
[INFO] marching cubes thresh: 10 (0.13568754494190216 ~ 364.51171875)
[INFO] mesh cleaning: (70102, 3) --> (39408, 3), (140024, 3) --> (78637, 3)
[INFO] mesh decimation: (39408, 3) --> (25066, 3), (78637, 3) --> (50000, 3)
[INFO] fitting mesh...
MSE = 0.003743:  25% 511/2048 [00:08<00:24, 62.06it/s][INFO] mesh cleaning: (25066, 3) --> (31070, 3), (50000, 3) --> (62003, 3)
[INFO] mesh decimation: (31070, 3) --> (25050, 3), (62003, 3) --> (50000, 3)
MSE = 0.003560:  50% 1018/2048 [00:20<00:16, 61.11it/s][INFO] mesh cleaning: (25050, 3) --> (29304, 3), (50000, 3) --> (58501, 3)
[INFO] mesh decimation: (29304, 3) --> (25039, 3), (5850

In [None]:
!ls -lh workspace/
!ls -lh logs/
!ls -lh output/

total 4.5M
-rw-r--r-- 1 root root 1.5M Oct 26 23:25 cifar10_image_0.gif
-rw-r--r-- 1 root root 1.5M Oct 27 00:50 cifar10_image_0.glb
-rw-r--r-- 1 root root 142K Oct 26 23:25 cifar10_image_0.mp4
-rw-r--r-- 1 root root 1.4M Oct 26 23:25 cifar10_image_0.ply
-rw-r--r-- 1 root root  68K Oct 27 00:47 output.mp4
total 596K
-rw-r--r-- 1 root root 596K Oct 27 00:46 output_name_model.ply
ls: cannot access 'output/': No such file or directory
