In [None]:
# installs
!pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121
!pip install diffusers transformers accelerate safetensors
!pip install git+https://github.com/tencent-ailab/IP-Adapter.git

In [2]:
# imports
import torch
from diffusers import StableDiffusionXLPipeline, DDIMScheduler, AutoencoderKL
from PIL import Image
from ip_adapter import IPAdapter

In [None]:
# offloaded storage
from google.colab import drive
drive.mount('/content/drive')
!ln -s /content/drive/MyDrive/AutobioLoraFinetuning AutobioLoraFinetuning

In [None]:
# config

# base model
BASE_DIR = "add-base-dir-here"
MODEL_FILE = "add-model-file-here.safetensors"
# ip adapter
IP_ADAPTER = "h94/IP-Adapter"
IP_ADAPTER_SUBFOLDER = "sdxl_models"
IP_ADAPTER_WEIGHTS = "ip-adapter_sdxl.bin"
# lora
LORA_WEIGHTS = "hf-name/lora-name"
LORA_WEIGHTS_NAME = "lora-weights-name.safetensors"
# resources
PROFILE_IMAGE = "add-profile-image-here.png"
GENERATED_PROFILE_IMAGE = "add-generated-profile-image-here.png"

In [None]:
# spin up the base model

pipeline = StableDiffusionXLPipeline.from_single_file(
  f"/content/{BASE_DIR}/checkpoints/{MODEL_FILE}",
  torch_dtype=torch.float16
).to("cuda")

In [None]:
# IP-Adapters
pipeline.load_ip_adapter(
  IP_ADAPTER,
  subfolder=IP_ADAPTER_SUBFOLDER,
  weight_name=IP_ADAPTER_WEIGHTS
)

In [272]:
# InstantStyle
scale = {
    "down": {"block_2": [0.0, 0.0]},
    "up": {"block_0": [0.0, 0.5, 0.0]},
}
pipeline.set_ip_adapter_scale(0.33) # alternatively, 0.3 might work. TODO: make this dynamic based on the complexity of the prompt. Lower for more complex prompts.

In [None]:
# LoRAs
pipeline.load_lora_weights(
    LORA_WEIGHTS, weight_name=LORA_WEIGHTS_NAME
)



In [None]:
# inference
from diffusers.utils import load_image

# for IP-adapter
image = load_image(f"/content/{BASE_DIR}/images/{GENERATED_PROFILE_IMAGE}")

prompt_style_experiments = [
    "high contrast, minimalistic, colored black and grungy white, stark, graphic novel illustration, cross hatching",
    "monochrome, bright highlights, deep shadows, graphic novel illustration",
    "monochrome, journal entry sketch, graphic novel illustration",
    "highest quality, monochrome, professional sketch, personal, intimate, nostalgic",
    "highest quality, monochrome, professional sketch, personal, nostalgic, clean",
    "highest quality, monochrome, professional sketch, personal, nostalgic, clean",
    "highest quality, monochrome, professional sketch, clean, simple",
    "highest quality, monochrome, professional sketch, clean, simple", # can't decide about adding "stylized"
    "highest quality, professional sketch, monochrome", # can't decide about adding "stylized"
]

negative_prompt_experiments = [
    "worst quality, low quality, error, glitch, mistake, busy, words, writing, photo, photo-realistic",
    "error, glitch, mistake",
]

def prompt_builder(content_prompt, style_prompt, age = -1):
  if age > -1:
    return f"{content_prompt}, age {age}, {style_prompt}"
  return f"{content_prompt}, {style_prompt}"

pipeline(
    prompt = prompt_builder("Getting drinks with my friends", prompt_style_experiments[5]),
    negative_prompt=negative_prompt_experiments[1],
    ip_adapter_image=image,
    num_inference_steps=50
).images[0]

In [None]:
# subject generation
from diffusers.utils import load_image

# found InstantStyle to be detrimental here
scale = 1.0
pipeline.set_ip_adapter_scale(scale)

image = load_image(f"/content/{BASE_DIR}/{PROFILE_IMAGE}")

pipeline(
    prompt="professional sketch portrait", # found short prompt is better
    ip_adapter_image=image,
    negative_prompt="error, glitch, mistake",
    num_inference_steps=50,
    num_images_per_prompt=1,
).images[0]