### Utils

In [2]:
from diffusers import AutoPipelineForText2Image, StableDiffusionPipeline
from datetime import datetime
import torch
import time
import gc
import os
from PIL import Image
from IPython.display import display
import subprocess

def print_gpu_usage(stage=""):
    allocated = torch.cuda.memory_allocated() / 1e9
    reserved = torch.cuda.memory_reserved() / 1e9
    print(f"[{stage}] Allocated: {allocated:.2f} GB | Reserved: {reserved:.2f} GB")

def get_nvidia_smi():
    try:
        result = subprocess.check_output(['nvidia-smi'], encoding='utf-8')
        print(result)
    except Exception as e:
        print("nvidia-smi 호출 실패:", e)


### Setting

In [3]:
model_dict = {
    "sd15": "runwayml/stable-diffusion-v1-5",
    "sdxl": "stabilityai/stable-diffusion-xl-base-1.0",
}
default_image_size = {
    "sd15": (512, 512),
    "sdxl": (1024, 1024),
}


In [4]:
positive_prompt = (
    "a Japanese girl standing in the middle of a path covered with autumn leaves, "
    "a fluffy white cat beside her, soft warm sunlight, cinematic scene, "
    "highly detailed, full body, gentle expression, calm atmosphere"
)
negative_prompt = (
    "blurry, bad anatomy, distorted, low resolution, extra limbs, deformed cat"
)


### Model Test Def

#### No Lora

In [5]:
def test_single_image(model_version="sdxl"):
    print(f"==== 테스트: 단일 이미지 생성 | 모델: {model_version} ====")
    model_id = model_dict[model_version]
    width, height = default_image_size[model_version]

    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()

    t0 = time.time()
    if model_version == "sd15":
        pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
    else:
        pipe = AutoPipelineForText2Image.from_pretrained(model_id, torch_dtype=torch.float16, variant="fp16").to("cuda")

    print_gpu_usage("모델 로딩 직후")

    image = pipe(prompt=positive_prompt, negative_prompt=negative_prompt, height=height, width=width).images[0]
    t1 = time.time()

    display(image)
    image.save(f"single_{model_version}.png")

    print_gpu_usage("이미지 생성 직후")
    print(f"⏱️ 생성 시간: {t1 - t0:.2f}초")
    print(f"📈 Peak Allocated: {torch.cuda.max_memory_allocated() / 1e9:.2f} GB")
    print(f"📈 Peak Reserved : {torch.cuda.max_memory_reserved() / 1e9:.2f} GB")

    print("\n[nvidia-smi 출력]")
    get_nvidia_smi()

    del pipe, image
    gc.collect()
    torch.cuda.empty_cache()

    print('--------------------------')
    print('\n\n\n\n')



In [6]:
def test_multi_image(model_version="sdxl", count=5):
    print(f"==== 테스트: {count}장 이미지 생성 | 모델: {model_version} ====")
    model_id = model_dict[model_version]
    width, height = default_image_size[model_version]

    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()

    t0 = time.time()
    if model_version == "sd15":
        pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
    else:
        pipe = AutoPipelineForText2Image.from_pretrained(model_id, torch_dtype=torch.float16, variant="fp16").to("cuda")

    print_gpu_usage("모델 로딩 직후")

    for i in range(count):
        print(f"→ 생성 중: {i+1}/{count}")
        image = pipe(prompt=positive_prompt, negative_prompt=negative_prompt, height=height, width=width).images[0]
        image.save(f"multi_{model_version}_{i+1}.png")
        display(image)

    t1 = time.time()
    print_gpu_usage("이미지 생성 직후")
    print(f"⏱️ 총 생성 시간: {t1 - t0:.2f}초")
    print(f"📈 Peak Allocated: {torch.cuda.max_memory_allocated() / 1e9:.2f} GB")
    print(f"📈 Peak Reserved : {torch.cuda.max_memory_reserved() / 1e9:.2f} GB")

    print("\n[nvidia-smi 출력]")
    get_nvidia_smi()

    del pipe, image
    gc.collect()
    torch.cuda.empty_cache()

    print('--------------------------')
    print('\n\n\n\n')


#### with Lora

In [None]:
# 모델별 Ghibli LoRA 정보
lora_config = {
    "sd15": {
        "lora_path": "artificialguybr/studioghibli-redmond-1-5v-studio-ghibli-lora-for-liberteredmond-sd-1-5",
        "weight_name": "StudioGhibliRedmond-15V-LiberteRedmond-StdGBRedmAF-StudioGhibli.safetensors",
    },
    "sdxl": {
        "lora_path": "ntc-ai/SDXL-LoRA-slider.Studio-Ghibli-style",
        "weight_name": "Studio Ghibli style.safetensors",
    }
}


In [None]:
def test_single_image_with_lora(model_version="sdxl"):
    print(f"==== 테스트: 단일 이미지 생성 (Ghibli LoRA 적용) | 모델: {model_version} ====")
    model_id = model_dict[model_version]
    width, height = default_image_size[model_version]
    lora_path = lora_config[model_version]["lora_path"]
    weight_name = lora_config[model_version]["weight_name"]

    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()

    t0 = time.time()
    if model_version == "sd15":
        pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
    else:
        pipe = AutoPipelineForText2Image.from_pretrained(model_id, torch_dtype=torch.float16, variant="fp16").to("cuda")

    pipe.load_lora_weights(lora_path, weight_name=weight_name)
    pipe.set_adapters(["default"])

    print_gpu_usage("모델 로딩 + LoRA 적용 후")

    image = pipe(prompt=positive_prompt, negative_prompt=negative_prompt, height=height, width=width).images[0]
    t1 = time.time()

    display(image)
    image.save(f"single_{model_version}_ghibli.png")

    print_gpu_usage("이미지 생성 직후")
    print(f"⏱️ 생성 시간: {t1 - t0:.2f}초")
    print(f"📈 Peak Allocated: {torch.cuda.max_memory_allocated() / 1e9:.2f} GB")
    print(f"📈 Peak Reserved : {torch.cuda.max_memory_reserved() / 1e9:.2f} GB")

    print("\n[nvidia-smi 출력]")
    get_nvidia_smi()

    del pipe, image
    gc.collect()
    torch.cuda.empty_cache()

    print('--------------------------')
    print('\n\n\n\n')


In [None]:
def test_multi_image_with_lora(model_version="sdxl", count=5):
    print(f"==== 테스트: {count}장 이미지 생성 (Ghibli LoRA 적용) | 모델: {model_version} ====")
    model_id = model_dict[model_version]
    width, height = default_image_size[model_version]
    lora_path = lora_config[model_version]["lora_path"]
    weight_name = lora_config[model_version]["weight_name"]

    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()

    t0 = time.time()
    if model_version == "sd15":
        pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
    else:
        pipe = AutoPipelineForText2Image.from_pretrained(model_id, torch_dtype=torch.float16, variant="fp16").to("cuda")

    pipe.load_lora_weights(lora_path, weight_name=weight_name)
    pipe.set_adapters(["default"])

    print_gpu_usage("모델 로딩 + LoRA 적용 후")

    for i in range(count):
        print(f"→ 생성 중: {i+1}/{count}")
        image = pipe(prompt=positive_prompt, negative_prompt=negative_prompt, height=height, width=width).images[0]
        image.save(f"multi_{model_version}_ghibli_{i+1}.png")
        display(image)

    t1 = time.time()
    print_gpu_usage("이미지 생성 직후")
    print(f"⏱️ 총 생성 시간: {t1 - t0:.2f}초")
    print(f"📈 Peak Allocated: {torch.cuda.max_memory_allocated() / 1e9:.2f} GB")
    print(f"📈 Peak Reserved : {torch.cuda.max_memory_reserved() / 1e9:.2f} GB")

    print("\n[nvidia-smi 출력]")
    get_nvidia_smi()

    del pipe, image
    gc.collect()
    torch.cuda.empty_cache()

    print('--------------------------')
    print('\n\n\n\n')


### Single Test

In [6]:
# 단일 테스트 - No Lora
test_single_image("sd15")
test_single_image("sdxl")


Output hidden; open in https://colab.research.google.com to view.

In [None]:
# 단일 테스트 - with Lora
test_single_image_with_lora("sdxl")
test_single_image_with_lora("sd15")


### Multi Test

In [7]:
# 다중 테스트 - No Lora
test_multi_image("sd15", count=5)
test_multi_image("sdxl", count=5)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
# 다중 테스트 - with Lora
test_multi_image_with_lora("sd15", count=5)
test_multi_image_with_lora("sdxl", count=5)
