# Predict Inference (single GPU)

In [None]:
import os
from pathlib import Path

output_dir = Path("outputs")

if not (Path.cwd() / "cosmos_predict2").is_dir():
    os.chdir(Path.cwd().parent.parent)  # Change working directory to root
    assert (Path.cwd() / "cosmos_predict2").is_dir(), "Working directory change failed."

os.environ["PYTHONPATH"] = str(os.getcwd())

to use pre-trained checkpoints just specify the model

In [None]:
from cosmos_predict2.config import SetupArguments

setup_args = SetupArguments(
    context_parallel_size=1,
    output_dir=output_dir,
    model="2B/pre-trained",
    keep_going=True,
    experiment="predict2_lora_training_2b_cosmos_nemo_assets",
)

to use a local checkpoint from post-training specify the checkpoint

In [None]:
from cosmos_predict2.config import SetupArguments

checkpoint_path = "checkpoints/nvidia/Cosmos-Predict2.5-2B/consolidated/model.pt"

setup_args = SetupArguments(
    checkpoint_path=checkpoint_path,
    context_parallel_size=1,
    output_dir=output_dir,
    model="2B/pre-trained",
    keep_going=True,
    experiment="predict2_lora_training_2b_cosmos_nemo_assets",
)

In [None]:
from cosmos_predict2.inference import Inference

pipe = Inference(setup_args)

In [None]:
from cosmos_predict2.config import InferenceArguments

inference_samples = InferenceArguments.from_files([Path("datasets/base/bus_terminal.json")])
print(f"inference_samples: {inference_samples}")
output_videos = pipe.generate(inference_samples, Path("outputs"))

In [None]:
from IPython.display import Video, display

if output_videos:
    for video in output_videos:
        display(Video(video, embed=True))

In [None]:
asset_dir = "datasets"
prompt = "A nighttime city bus terminal gradually shifts from stillness to subtle movement. At first, multiple double-decker buses are parked under the glow of overhead lights, with a central bus labeled '87D' facing forward and stationary. As the video progresses, the bus in the middle moves ahead slowly, its headlights brightening the surrounding area and casting reflections onto adjacent vehicles. The motion creates space in the lineup, signaling activity within the otherwise quiet station. It then comes to a smooth stop, resuming its position in line. Overhead signage in Chinese characters remains illuminated, enhancing the vibrant, urban night scene."

args = {
    "inference_type": "image2world",
    "name": "bus_terminal",
    "input_path": os.path.join(asset_dir, "base/bus_terminal.jpg"),
    "prompt": prompt,
}

inference_args = InferenceArguments(**args)
output_videos = pipe.generate([inference_args], Path("outputs"))