In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Model Garden - ZipNeRF (Pytorch) Notebook

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_pytorch_zipnerf.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
    </a>
  </td>
  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_pytorch_zipnerf.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
  <td>
    <a href="https://console.cloud.google.com/vertex-ai/notebooks/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/community/model_garden/model_garden_pytorch_zipnerf.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
Open in Vertex AI Workbench
    </a> (A Python-3 CPU notebook is recommended)
  </td>
</table>

**_NOTE_**: This notebook has been tested in the following environment:

* Python version = 3.9

## Overview

This notebook demonstrates a [pytorch implementation](https://github.com/SuLvXiangXin/zipnerf-pytorch) of [Zip-NeRF: Anti-Aliased Grid-Based Neural Radiance Fields](https://jonbarron.info/zipnerf/) for rendering Neural Radiance Fields (NeRFs) more efficiently. It is primarily aimed at addressing some of the limitations of traditional NeRF techniques, which, while powerful for creating detailed 3D models from 2D images, can be computationally intensive and slow.

### Objective

In this tutorial, you learn how to:

- Use [COLMAP](https://colmap.github.io/) to perform Structure from Motion (SfM), a technique that estimates the three-dimensional structure of a scene from a series of two-dimensional images.
- Calibrate, train and render NERF scenes using [Vertex AI custom jobs](https://cloud.google.com/vertex-ai/docs/samples/aiplatform-create-custom-job-sample).
- Render a video along a custom camera path using a series of keyframe photos.

This tutorial uses the following Google Cloud ML services and resources:

- Vertex AI Training
- Vertex AI Custom Job

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing) and [Cloud Storage pricing](https://cloud.google.com/storage/pricing), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Setup

### Installation

Install the following packages required to execute this notebook.

In [None]:
! pip install --upgrade pip
! pip install google-cloud-aiplatform==1.38.1
! pip install google-cloud-storage==2.14.0
! pip install wget==3.2

### Before you begin

**NOTE**: Jupyter runs lines prefixed with `!` as shell commands, and it interpolates Python variables prefixed with `$` into these commands.

### Colab only
Run the following commands for Colab and skip this section if you are using Workbench.

In [None]:
import sys

if "google.colab" in sys.modules:
    ! pip3 install --upgrade google-cloud-aiplatform
    from google.colab import auth as google_auth

    google_auth.authenticate_user()
    # Install gdown for downloading example training images.
    ! pip3 install gdown

    # Restart the notebook kernel after installs.
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

### Setup Google Cloud project

1. [Select or create a Google Cloud project](https://console.cloud.google.com/cloud-resource-manager). When you first create an account, you get a $300 free credit towards your compute/storage costs.

1. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).

1. [Enable the Vertex AI API and Compute Engine API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com,compute_component).

1. [Create a Cloud Storage bucket](https://cloud.google.com/storage/docs/creating-buckets) for storing experiment outputs.

1. If you are running this notebook locally, you need to install the [Cloud SDK](https://cloud.google.com/sdk).

### Authenticate your Google Cloud account

Depending on your Jupyter environment, you may have to manually authenticate. Follow the relevant instructions below.

**1. Vertex AI Workbench**
* Do nothing as you are already authenticated.

**2. Local JupyterLab instance, uncomment and run:**

In [None]:
# ! gcloud auth login

### Set your project parameters

**If you don't know your project ID**, try the following:
* Run `gcloud config list`.
* Run `gcloud projects list`.
* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)

You can also change the `REGION` variable used by Vertex AI. Learn more about [Vertex AI regions](https://cloud.google.com/vertex-ai/docs/general/locations).

Create a storage bucket to store intermediate artifacts such as datasets. Recommended pattern: `gs://cloudnerf-{PROJECT_ID}-unique`

In [None]:
import os

PROJECT_ID = ""  # @param {type:"string"}
REGION = "us-central1"  # @param {type: "string"}

# Enter the name of the bucket without gs://
BUCKET_NAME = ""  # @param {type:"string"}


# Set the project id.
! gcloud config set project {PROJECT_ID}

# Create the bucket if it doesn't already exist.
BUCKET_URI = os.path.join("gs://", BUCKET_NAME)
! gsutil mb -l {REGION} -p {PROJECT_ID} {BUCKET_URI}

### Initialize Vertex AI SDK for Python

Initialize the Vertex AI SDK for Python for your project.

In [None]:
import os

from google.cloud import aiplatform

staging_bucket = os.path.join(BUCKET_URI, "zipnerf_staging")
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=staging_bucket)

### Define constants

In [None]:
# The pre-built calibration docker image.
CALIBRATION_DOCKER_URI = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-cloudnerf-calibrate:latest"
# The pre-built training docker image.
TRAINING_DOCKER_URI = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-cloudnerf-train:latest"
# The pre-built rendering docker image.
RENDERING_DOCKER_URI = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-cloudnerf-render:latest"

### Define common functions

This section defines functions for:

- Custom job naming

In [None]:
import subprocess
from datetime import datetime
from typing import Any, List

IMAGE_EXTENSIONS = (".png", ".jpg", ".jpeg", ".gif", ".bmp")
GCS_API_ENDPOINT = "https://storage.cloud.google.com/"


def get_job_name_with_datetime(prefix: str) -> str:
    """Gets the job name with date time when triggering training or deployment
    jobs in Vertex AI.
    """
    return prefix + datetime.now().strftime("_%Y%m%d_%H%M%S")


def get_mp4_video_link(mp4_rendering_path: str) -> str:
    # Define the gsutil command.
    command = f"gsutil ls {mp4_rendering_path}"

    # Run the command and capture the output.
    try:
        result = subprocess.check_output(command, shell=True, text=True)
        # Split the result by newlines to get a list of files.
        file_list = result.strip().split("\n")
    except subprocess.CalledProcessError as e:
        print(f"An error occurred: {e}")
        file_list = []
    mp4_video_link = file_list[0].replace("gs://", GCS_API_ENDPOINT)
    return mp4_video_link


def write_keyframe_list_to_gcs(
    bucket_path: str, output_gcs_file: str, max_files: int = 10
) -> List[Any]:
    # Get the list of files in the GCS bucket.
    cmd = f"gsutil ls {bucket_path}"
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)

    if result.returncode != 0:
        print("Error listing GCS bucket:", result.stderr)
        return []

    # Filter for image files and extract file names.
    files = result.stdout.splitlines()
    image_files = [
        os.path.basename(f) for f in files if f.lower().endswith(IMAGE_EXTENSIONS)
    ]

    output_file = "out.txt"
    with open(output_file, "w") as file:
        for name in image_files[:max_files]:
            file.write(name + "\n")

    cmd = f"gsutil cp {output_file} {output_gcs_file}"
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)

    if result.returncode != 0:
        print("Error listing GCS bucket:", result.stderr)
        return []

## Prepare dataset
It is necessary to prepare the dataset and store it on Cloud Storage. The following example illustrates the process for the bicycle scene in the [mipnerf360](https://jonbarron.info/mipnerf360/) dataset. For the sake of convenience, each scene in the mipnerf360 has been provided as a separate dataset with its own unique download link. Mip-NeRF 360 dataset contains the following 7 scenes:

- [`bicycle`](http://storage.googleapis.com/gresearch/refraw360/bicycle.zip)
- [`bonsai`](http://storage.googleapis.com/gresearch/refraw360/bonsai.zip)
- [`counter`](http://storage.googleapis.com/gresearch/refraw360/counter.zip)
- [`garden`](http://storage.googleapis.com/gresearch/refraw360/garden.zip)
- [`kitchen`](http://storage.googleapis.com/gresearch/refraw360/kitchen.zip)
- [`room`](http://storage.googleapis.com/gresearch/refraw360/room.zip)
- [`stump`](http://storage.googleapis.com/gresearch/refraw360/stump.zip)

Each scene comes preprocessed with COLMAP information so the calibration step in the following section is optional.

In [None]:
local_mipnerf_data_directory = "./mipnerf360_dataset"  # @param {type:"string"}
MIPNERF_DATA_GCS_PATH = os.path.join(BUCKET_URI, "mipnerf360_dataset")

In [None]:
# Download the bicycle scene data to a local directory.
! rm -rf $local_mipnerf_data_directory
! mkdir -p $local_mipnerf_data_directory
! wget -P $local_mipnerf_data_directory http://storage.googleapis.com/gresearch/refraw360/bicycle.zip

In [None]:
# Unzip the mipnerf360 dataset.
! unzip $local_mipnerf_data_directory/bicycle.zip -d $local_mipnerf_data_directory

In [None]:
# Move mipnerf360 data from local directory to Cloud Storage.
# This step takes a few minutes to finish.
! gsutil -m cp -R $local_mipnerf_data_directory/* $MIPNERF_DATA_GCS_PATH

## NERF pipeline

### Camera pose estimation
As mentioned above, all the scenes in the Mip-NeRF 360 dataset have been preprocessed with colmap information. However, in order to demonstrate how to run the pipeline end-to-end on your own data, we will use the `bicycle` scene to estimate the camera poses.

In [None]:
# Folder containing all the images of the bicycle scene.
INPUT_IMAGES_FOLDER = f"{MIPNERF_DATA_GCS_PATH}/bicycle/images"  # @param {type:"string"}

# Folder for storing experiment outputs for calibration, training and rendering.
OUTPUT_FOLDER = f"{MIPNERF_DATA_GCS_PATH}/exp/bicycle"  # @param {type:"string"}

Once data and experiment paths have been configured, run the custom job below.

The following parameters are required:

* `use_gpu`: Whether to use GPU or not.
* `gcs_dataset_path`: Path to image folder in GCS dataset.
* `gcs_experiment_path`: GCS path for storing experiment outputs.
* `camera`: Type of camera used. `OPENCV` for perspective, `OPENCV_FISHEYE` for fisheye.

The custom job will run on the images in the `gcs_dataset_path` folder and store the colmap outputs in the `gcs_experiment_path/data` folder.

On the scenes in this current dataset, this step takes about 30 minutes.


In [None]:
# This job will run colmap camera pose estimation.
data_calibration_job_name = get_job_name_with_datetime("colmap")

# Worker pool spec.
machine_type = "n1-highmem-64"
num_nodes = 1
gpu_type = "NVIDIA_TESLA_V100"
num_gpus = 8
worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": machine_type,
            "accelerator_type": gpu_type,
            "accelerator_count": num_gpus,
        },
        "replica_count": num_nodes,
        "container_spec": {
            "image_uri": CALIBRATION_DOCKER_URI,
            "args": [
                "-use_gpu",
                "1",
                "-gcs_dataset_path",
                INPUT_IMAGES_FOLDER,
                "-gcs_experiment_path",
                OUTPUT_FOLDER,
                "-camera",
                "OPENCV",
            ],
        },
    }
]

data_calibration_custom_job = aiplatform.CustomJob(
    display_name=data_calibration_job_name,
    project=PROJECT_ID,
    worker_pool_specs=worker_pool_specs,
    staging_bucket=staging_bucket,
)

data_calibration_custom_job.run()

### Training the ZipNeRF model
Once the Colmap pose calibration is completed, we can run training.

The following parameters are required:

* `gcs_experiment_path`: GCS path for loading processed dataset and storing experiment outputs.
* `gin_config_file`: Configuration file for ZipNeRF network. Currents options are:
  * configs/360.gin: Configuration for 360 reconstruction.
  * configs/360_glo.gin: Configuration for 360 reconstruction with [generative latent optimization]
* `factor`: A factor of the downsampled images in the preprocessing step that affects the resolution or detail level of the training pixel ground truth and rendered images. A factor of 2 is recommended for indoor scenes and a factor of 4 for outdoor scenes. **The factor used in training must be the same for rendering.**

The custom job will run on the images in the `gcs_experiment_path/data` colmap dataset and outputs in the checkpoints in `gcs_experiment_path/checkpoints` folder.

Depending on the configuration, this step could take up to 3 hours.

In [None]:
# This job will run zipnerf training.

# This is the nerf training job name. You will use it to load the checkpoints
# in the rendering job for the current run.
nerf_training_job_name = get_job_name_with_datetime("nerf_training")

GIN_CONFIG_FILE = "configs/360.gin"  # @param {type:"string"}
FACTOR = "4"  # @param {type:"string"}

# Worker pool spec.
machine_type = "n1-highmem-64"
num_nodes = 1
gpu_type = "NVIDIA_TESLA_V100"
num_gpus = 8
worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": machine_type,
            "accelerator_type": gpu_type,
            "accelerator_count": num_gpus,
        },
        "replica_count": num_nodes,
        "container_spec": {
            "image_uri": TRAINING_DOCKER_URI,
            "args": [
                "-training_job_name",
                nerf_training_job_name,
                "-gcs_experiment_path",
                OUTPUT_FOLDER,
                "-gin_config_file",
                GIN_CONFIG_FILE,
                "-factor",
                FACTOR,
            ],
        },
    }
]

nerf_training_custom_job = aiplatform.CustomJob(
    display_name=nerf_training_job_name,
    project=PROJECT_ID,
    worker_pool_specs=worker_pool_specs,
    staging_bucket=staging_bucket,
)

nerf_training_custom_job.run(enable_web_access=True)

### Rendering the ZipNeRF model (360)
Once the Colmap pose calibration is completed, we can run training.

The following parameters are required:

* `gcs_experiment_path`: GCS path for loading processed dataset and storing experiment outputs.
* `gin_config_file`: Configuration file for ZipNeRF network. Currents options are:
  * configs/360.gin: Configuration for 360 reconstruction.
  * configs/360_glo.gin: Configuration for 360 reconstruction with [generative latent optimization](https://www.researchgate.net/publication/318527851_Optimizing_the_Latent_Space_of_Generative_Networks).
* `render_video_fps`: Frame rate of rendered video.
* `render_path_frames`: Number of frames to render for a path.
* `factor`: A factor of the downsampled images in the preprocessing step that affects the resolution or detail level of the training pixel ground truth and rendered images. A factor of 2 is recommended for indoor scenes and a factor of 4 for outdoor scenes. **The factor used in training must be the same for rendering.**

The custom job will run on the images in the `gcs_experiment_path/data` colmap dataset and outputs in the checkpoints in `gcs_experiment_path/checkpoints` folder.

In [None]:
# This job will run zipnerf rendering.
nerf_rendering_job_name = get_job_name_with_datetime("nerf_rendering")

RENDER_PATH_FRAMES = "120"  # @param {type:"string"}
RENDER_VIDEO_FPS = "30"  # @param {type:"string"}

# Worker pool spec.
machine_type = "n1-highmem-64"
num_nodes = 1
gpu_type = "NVIDIA_TESLA_V100"
num_gpus = 8
worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": machine_type,
            "accelerator_type": gpu_type,
            "accelerator_count": num_gpus,
        },
        "replica_count": num_nodes,
        "container_spec": {
            "image_uri": RENDERING_DOCKER_URI,
            "args": [
                "-rendering_job_name",
                nerf_rendering_job_name,
                "-training_job_name",
                nerf_training_job_name,
                "-gcs_experiment_path",
                OUTPUT_FOLDER,
                "-gin_config_file",
                GIN_CONFIG_FILE,
                "-render_video_fps",
                RENDER_VIDEO_FPS,
                "-render_path_frames",
                RENDER_PATH_FRAMES,
                "-factor",
                FACTOR,
            ],
        },
    }
]

nerf_rendering_custom_job = aiplatform.CustomJob(
    display_name=nerf_rendering_job_name,
    project=PROJECT_ID,
    worker_pool_specs=worker_pool_specs,
    staging_bucket=staging_bucket,
)

nerf_rendering_custom_job.run(enable_web_access=True)

#### Show rendered video from GCS

In [None]:
from IPython.display import Video

MP4_RENDERING_PATH = f"{OUTPUT_FOLDER}/render/{nerf_rendering_job_name}/*color.mp4"
mp4_video_link = get_mp4_video_link(MP4_RENDERING_PATH)
Video(mp4_video_link)

### Rendering the ZipNeRF model (custom camera trajectory)

#### Create keyframe file list for rendering custom camera trajectories.

To create a custom camera trajectory in a Neural Radiance Field (NeRF) model using images from the same dataset used for training, you can generate a keyframe file list where each keyframe corresponds to the name of an image file stored in a Google Cloud Storage (GCS) bucket. This section will guide you through creating this keyframe file list.

#### Step 1: Identifying keyframe images
First, identify the images within your dataset that you want to use as keyframes. These images should ideally represent the significant views or angles that you want your camera trajectory to include.

#### Step 2: Creating a list of image file names
Access Your GCS Bucket: Navigate to your GCS bucket where the dataset is stored.

Select Image Files: Choose the specific image files that you want to use as keyframes. Remember, these should be files used in training the NeRF model, as they will have corresponding camera parameters already defined.

Compile File Names: Create a list of the file names (not the paths) of these selected images. Ensure that each file name is on a separate line. For example:

In [None]:
# This job will run zipnerf rendering.
nerf_custom_rendering_job_name = get_job_name_with_datetime("nerf_custom_rendering")

In [None]:
# Example usage.
KEYFRAME_IMAGE_FILELIST = (
    f"{OUTPUT_FOLDER}/keyframe_list_{nerf_custom_rendering_job_name}.txt"
)
max_files = 30  # Set this to the number of files you want
write_keyframe_list_to_gcs(
    INPUT_IMAGES_FOLDER, KEYFRAME_IMAGE_FILELIST, max_files=max_files
)

#### Run rendering

Once the training is completed, we can run rendering.

The following parameters are required:

* `gcs_experiment_path`: GCS path for loading processed dataset and storing experiment outputs.
* `gin_config_file`: Configuration file for ZipNeRF network. Currents options are:
  * configs/360.gin: Configuration for 360 reconstruction.
  * configs/360_glo.gin: Configuration for 360 reconstruction with [generative latent optimization](https://www.researchgate.net/publication/318527851_Optimizing_the_Latent_Space_of_Generative_Networks).
* `render_video_fps`: Frame rate of rendered video.
* `factor`: A factor of the downsampled images in the preprocessing step that affects the resolution or detail level of the training pixel ground truth and rendered images. A factor of 2 is recommended for indoor scenes and a factor of 4 for outdoor scenes. **The factor used in training must be the same for rendering.**
* `keyframe_image_list`: List of image filename, one per line, for rendering custom camera path.

With keyframes, an interpolated path is generated. This path represents a smoothly contoured spline that interconnects the specified keyframe camera poses. The process utilizes a configuration variable, `render_spline_n_interp`, which is preset to a default value of 30. As a result, the finalized interpolated path comprises a total of `render_spline_n_interp` * (n - 1) poses. In the specific scenario under discussion, the config.render_spline_n_interp is configured to 30. **With an input of 30 keyframes, the calculation yields a total of 30 * 29, amounting to 870 poses**.

In [None]:
# This job will run zipnerf rendering.
# Worker pool spec.
machine_type = "n1-highmem-64"
num_nodes = 1
gpu_type = "NVIDIA_TESLA_V100"
num_gpus = 8
worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": machine_type,
            "accelerator_type": gpu_type,
            "accelerator_count": num_gpus,
        },
        "replica_count": num_nodes,
        "container_spec": {
            "image_uri": RENDERING_DOCKER_URI,
            "args": [
                "-rendering_job_name",
                nerf_custom_rendering_job_name,
                "-training_job_name",
                nerf_training_job_name,
                "-gcs_experiment_path",
                OUTPUT_FOLDER,
                "-gin_config_file",
                GIN_CONFIG_FILE,
                "-render_video_fps",
                RENDER_VIDEO_FPS,
                "-factor",
                FACTOR,
                "-gcs_keyframes_file",
                KEYFRAME_IMAGE_FILELIST,
                "-render_path_frames",
                RENDER_PATH_FRAMES,
            ],
        },
    }
]

nerf_custom_rendering_custom_job = aiplatform.CustomJob(
    display_name=nerf_custom_rendering_job_name,
    project=PROJECT_ID,
    worker_pool_specs=worker_pool_specs,
    staging_bucket=staging_bucket,
)

nerf_custom_rendering_custom_job.run(enable_web_access=True)

#### Show rendered video from GCS


In [None]:
from IPython.display import Video

MP4_RENDERING_PATH = (
    f"{OUTPUT_FOLDER}/render/{nerf_custom_rendering_job_name}/*color.mp4"
)
mp4_video_link = get_mp4_video_link(MP4_RENDERING_PATH)
Video(mp4_video_link)

## Clean up

In [None]:
# Delete pose estimation, training and rendering custom jobs.
if data_calibration_custom_job.list(
    filter=f'display_name="{data_calibration_job_name}"'
):
    data_calibration_custom_job.delete()
if nerf_training_custom_job.list(filter=f'display_name="{nerf_training_job_name}"'):
    nerf_training_custom_job.delete()
if nerf_rendering_custom_job.list(filter=f'display_name="{nerf_rendering_job_name}"'):
    nerf_rendering_custom_job.delete()
if nerf_custom_rendering_custom_job.list(
    filter=f'display_name="{nerf_custom_rendering_job_name}"'
):
    nerf_custom_rendering_custom_job.delete()