In [None]:
import model_loader
import pipeline
import torch
from PIL import Image
from transformers import CLIPTokenizer

### Setting up Device

In [None]:
DEVICE = "cpu"

ALLOW_CUDA = False
ALLOW_MPS = False

if torch.cuda.is_available() and ALLOW_CUDA:
    DEVICE = "cuda"
elif (torch.backends.mps.is_built() or torch.backends.mps.is_available()) and ALLOW_MPS:
    DEVICE = "mps"
print(f"Using device: {DEVICE}")

### Load the models

In [None]:
tokenizer = CLIPTokenizer(
    "../data/tokenizer_vocab.json", merges_file="../data/tokenizer_merges.txt"
)
model_file = "../data/v1-5-pruned-emaonly.ckpt"
models = model_loader.preload_models_from_standard_weights(model_file, DEVICE)

## Text to Image

In [3]:
prompt = "A cat playing football, in 8k resolution"
uncond_prompt = ""
do_cfg = True
cfg_scale = 7.5

## Image to Image

In [4]:
input_image = None
image_path = "../images/cat-image.jpg"
# input_image = Image.open(image_path)
strength = 0.9

### Other Hyperparams

In [5]:
sampler_name = "ddpm"
n_inference_steps = 50
seed = 42

### Generate

In [None]:
output_image = pipeline.generate(
    prompt=prompt,
    uncond_prompt=uncond_prompt,
    input_image=input_image,
    strength=strength,
    do_cfg=do_cfg,
    cfg_scale=cfg_scale,
    sampler_name=sampler_name,
    n_inference_steps=n_inference_steps,
    models=models,
    seed=seed,
    device=DEVICE,
    idle_device="cpu"
)