In [1]:
import time
g_start = time.time()

In [2]:
%%capture
# @title 1. 라이브러리 설치
%pip install -q fastapi uvicorn[standard]
%pip install -q pyngrok>=7.0.0 # diffusers>=0.27.0 transformers accelerate
%pip install -q controlnet_aux Pillow # torch>=2.0.0
%pip install -q safetensors pydantic huggingface_hub python-multipart
# %pip install -U diffusers transformers accelerate safetensors
%pip install -q wget
# %pip install -q peft requests

# # xformers 사용 시 (버전 호환 확인 필요)
# %pip install -q fastapi uvicorn[standard] pyngrok>=7.0.0  # 1) FastAPI / 서버용
# %pip install -q diffusers==0.33.1 transformers==4.51.3 accelerate==1.6.0  # 2) Diffusers + Transformers + Accelerate
# %pip install -q controlnet_aux Pillow safetensors pydantic huggingface_hub python-multipart # 3) Flux용 부가 기능들
# %pip install -q ninja wget  # 4) 기타 도구

# %pip uninstall -y xformers  # 5) 기존 xformers 제거
# %pip install -q git+https://github.com/facebookresearch/xformers.git  # 6) Colab(PyTorch 2.6.0+cu124) 환경에 맞춰 xFormers를 소스에서 빌드 설치
# # %pip install -q peft requests

print("✅ 라이브러리 설치 완료")

In [3]:
# 라이브러리 호환성 확인
# jedi는 상관 없음 (colab 특성 상, 설치 안한다고 함)
!pip check

ipython 7.34.0 requires jedi, which is not installed.


In [4]:
# @title 2. Hugging Face 로그인 / Ngrok 설정 (Authtoken 시크릿 키)
import os
from google.colab import userdata
from huggingface_hub import login
from pyngrok import conf, ngrok

# Hugging Face 및 ngrok 토큰 (Colab Secret에서 가져오기)
hf_token = userdata.get("HF_TOKEN")        # Hugging Face Token
civitai_token = userdata.get("CIVITAI_TOKEN")   # CivitAI Token
ngrok_token = userdata.get("google_ngrok_authtoken")  # ngrok Token

if hf_token == None:
  print('x')
if civitai_token == None:
  print('x')
if ngrok_token == None:
  print('x')

# 포트 설정
PORT = 8000

# hugging face 로그인
print("🔑 Hugging Face 로그인 중...")
login(token=hf_token)

# ngrok 설정
ngrok.set_auth_token(ngrok_token)
print("✅ Ngrok Authtoken 설정 완료")

🔑 Hugging Face 로그인 중...
✅ Ngrok Authtoken 설정 완료


In [5]:
# @title 3. CivitAI Checkpoint 다운로드
import os
os.environ["CIVITAI_API_TOKEN"] = civitai_token

import wget
!wget -O illustration_juaner_flux.safetensors \
"https://civitai.com/api/download/models/1215918?type=Model&format=SafeTensor&size=full&token=$CIVITAI_API_TOKEN"

--2025-05-27 07:59:43--  https://civitai.com/api/download/models/1215918?type=Model&format=SafeTensor&size=full&token=2ab1653b9d579f186d295f4540908157
Resolving civitai.com (civitai.com)... 172.67.12.143, 104.22.19.237, 104.22.18.237, ...
Connecting to civitai.com (civitai.com)|172.67.12.143|:443... connected.
HTTP request sent, awaiting response... 307 Temporary Redirect
Location: https://civitai-delivery-worker-prod.5ac0637cfd0766c97916cefa3764fbdf.r2.cloudflarestorage.com/model/686417/jGhibliV2FluxUltimate.FZyi.safetensors?X-Amz-Expires=86400&response-content-disposition=attachment%3B%20filename%3D%22IllustrationJuanerGhibli_v20.safetensors%22&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=e01358d793ad6966166af8b3064953ad/20250527/us-east-1/s3/aws4_request&X-Amz-Date=20250527T075943Z&X-Amz-SignedHeaders=host&X-Amz-Signature=25b9f1b92a6ee9c35a0a1b75d6c6e6212a2f6d39ee80f8b967bf28d3df17658d [following]
--2025-05-27 07:59:44--  https://civitai-delivery-worker-prod.5ac0637cfd0766c9791

In [6]:
# @title 4. Model Pipeline 정의 - model_loader.py
# 이 셀은 Model Pipeline을 생성하는 코드를 model_loader.py 파일로 저장합니다.

%%writefile model_loader.py
from diffusers import FluxPipeline, DiffusionPipeline, FluxTransformer2DModel
from transformers import T5EncoderModel

# Ghibli (Flux)
def load_ghibli_flux_model(dtype):
    BASE_MODEL_ID = "black-forest-labs/FLUX.1-dev"
    CHECKPOINT_MODEL_ID = "illustration_juaner_flux.safetensors"

    transformer = FluxTransformer2DModel.from_single_file(
        CHECKPOINT_MODEL_ID, torch_dtype=dtype
    )
    text_encoder_2 = T5EncoderModel.from_pretrained(
        BASE_MODEL_ID, subfolder="text_encoder_2", torch_dtype=dtype
    )
    pipe = FluxPipeline.from_pretrained(
        BASE_MODEL_ID,
        transformer=transformer,
        text_encoder_2=text_encoder_2,
        torch_dtype=dtype,
    )
    pipe.enable_model_cpu_offload()
    pipe.enable_attention_slicing()
    return pipe

# AnythingXL (stable-diffusion-xl style)
def load_anything_xl_model(dtype):
    MODEL_ID = "stabilityai/stable-diffusion-xl-base-1.0"
    pipe = DiffusionPipeline.from_pretrained(
        MODEL_ID,
        torch_dtype=dtype,
        variant="fp16"
    )
    pipe.enable_model_cpu_offload()
    pipe.enable_attention_slicing()
    return pipe


Writing model_loader.py


In [7]:
# @title 5. FastAPI 서버 - main_server.py
# 이 셀은 FastAPI 서버 코드를 main_server.py 파일로 저장합니다.

%%writefile main_server.py
import os, io, gc, logging
from contextlib import asynccontextmanager
from typing import Optional, Any

import torch
import uvicorn
from fastapi import FastAPI, HTTPException
from fastapi.responses import Response
from pydantic import BaseModel
from PIL import Image
from pyngrok import ngrok


# --- GPU 설정 ---
device: str = "cuda" if torch.cuda.is_available() else "cpu"
dtype: torch.dtype = torch.bfloat16 if (device == "cuda" and torch.cuda.is_bf16_supported()) else torch.float16

# --- 설정 상수 ---
DEFAULT_STEPS = 30
DEFAULT_GUIDANCE_SCALE = 3.5
IMAGE_WIDTH = 1024
IMAGE_HEIGHT = 1024
PORT = 8000

# --- 로깅 설정 ---
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# --- 유틸 함수 ---
def load_pil_image(image_bytes: bytes) -> Image.Image:
    return Image.open(io.BytesIO(image_bytes)).convert("RGB")

def image_to_bytes(image: Image.Image) -> bytes:
    byte_arr = io.BytesIO()
    image.save(byte_arr, format='PNG')
    byte_arr.seek(0)
    return byte_arr.getvalue()

def get_generator(seed: Optional[int] = None) -> torch.Generator:
    if seed is None:
        seed = torch.randint(0, 2**32 - 1, (1,)).item()
    logger.info(f"Using seed: {seed}")
    return torch.Generator(device="cuda").manual_seed(seed)

# -------------------------- 모델 세팅 관련 --------------------------
# 모델 로딩 모듈 import
from model_loader import load_ghibli_flux_model, load_anything_xl_model

# --- 전역 pipe 세팅 ---
ghibli_flux_pipe: Optional[Any] = None
anything_xl_pipe: Optional[Any] = None

# --- 모델 로딩 ---
def load_models():
    global ghibli_flux_pipe, anything_xl_pipe
    ghibli_flux_pipe = load_ghibli_flux_model(dtype)
    anything_xl_pipe = load_anything_xl_model(dtype)

# --- 모델 해체 ---
# lifespan에서 del 호출 필요

# ----------------------------------------------------------------


# --- 서버 생명주기 ---
@asynccontextmanager
async def lifespan(app: FastAPI):
    logger.info("App starting...")
    load_models()

    public_url = os.environ.get("NGROK_STATIC_URL", None)
    app.state.ngrok_url = public_url
    logger.info(f"Ngrok (external) URL registered: {public_url}")

    yield

    logger.info("App shutting down...")
    del ghibli_flux_pipe, anything_xl_pipe
    gc.collect()
    torch.cuda.empty_cache()

# --- FastAPI 앱 생성 ---
app = FastAPI(lifespan=lifespan)

# --- 요청 모델 ---
class TextToImageRequest(BaseModel):
    model_name: str
    prompt: str
    negative_prompt: Optional[str] = ""
    num_inference_steps: int = DEFAULT_STEPS
    guidance_scale: float = DEFAULT_GUIDANCE_SCALE
    seed: Optional[int] = None

# --- 루트 확인 ---
@app.get("/")
async def read_root():
    return {
        "message": "Image Model(text to image) API is running.",
        "device": device,
        "ngrok_url": app.state.ngrok_url if hasattr(app.state, "ngrok_url") else None,
    }

# --- 텍스트 → 이미지 ---
@app.post("/generate/text-to-image", response_class=Response)
async def generate_text_to_image(request: TextToImageRequest):
    if request.model_name == "ghibli-flux":
        if ghibli_flux_pipe is None:
            raise HTTPException(status_code=503, detail="Ghibli-Flux pipeline not loaded.")
        pipe = ghibli_flux_pipe
    elif request.model_name == "anything-xl":
        if anything_xl_pipe is None:
            raise HTTPException(status_code=503, detail="Anything-XL pipeline not loaded.")
        pipe = anything_xl_pipe
    else:
        raise HTTPException(status_code=400, detail=f"Invalid mode: {request.mode}")

    generator = get_generator(request.seed)

    try:
        with torch.inference_mode():
            result = pipe(
                prompt=request.prompt,
                negative_prompt=request.negative_prompt,
                width=IMAGE_WIDTH,
                height=IMAGE_HEIGHT,
                num_inference_steps=request.num_inference_steps,
                guidance_scale=request.guidance_scale,
                generator=generator,
            )
            image_bytes = image_to_bytes(result.images[0])
    finally:
        del result
        torch.cuda.empty_cache()
        gc.collect()

    return Response(content=image_bytes, media_type="image/png")

# --- 서버 실행 ---
if __name__ == "__main__":
    logger.info("Starting Uvicorn server...")
    uvicorn.run("main_server:app", host="0.0.0.0", port=PORT, reload=False)


Writing main_server.py


In [8]:
# @title Fin.
import subprocess
from pyngrok import ngrok

# ngrok 프로세스 종료
try:
    ngrok.kill()
    print("✅ ngrok 프로세스 종료 완료")
except Exception as e:
    print(f"⚠️ ngrok 종료 중 오류 (무시 가능): {e}")

# uvicorn 서버 프로세스 종료
subprocess.run(['pkill', '-f', 'uvicorn main_server:app'], stderr=subprocess.DEVNULL)
subprocess.run(['pkill', '-f', 'ngrok'], stderr=subprocess.DEVNULL)

import gc
import torch

torch.cuda.empty_cache()
gc.collect()

print("🧹 GPU 메모리 및 캐시 정리 완료")


!ps -ef | grep -E "main_server.py|ngrok" | grep -v grep



import subprocess

# main_server.py 종료
subprocess.run(['pkill', '-f', 'main_server.py'])
print("✅ FastAPI 서버 프로세스 종료 완료")

!ps -ef | grep -E "main_server.py" | grep -v grep


✅ ngrok 프로세스 종료 완료
🧹 GPU 메모리 및 캐시 정리 완료
✅ FastAPI 서버 프로세스 종료 완료


In [9]:
# @title 🚀 ngrok 연결 및 FastAPI 서버 실행
# 환경 변수 설정
%env PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

import time
import os
import subprocess
import logging
from pyngrok import ngrok

# --- 로깅 설정 ---
logging.basicConfig(level=logging.INFO)
pyngrok_logger = logging.getLogger("pyngrok")
pyngrok_logger.setLevel(logging.INFO)

# --- 설정값 ---
PORT = 8000
LOG_FILE = "uvicorn_server.log"
STATIC_NGROK_DOMAIN = "clam-talented-promptly.ngrok-free.app"  # 사용자 고정 도메인
MAX_WAIT_SECONDS = 300  # 최대 대기 시간
READY_KEYWORD = "Uvicorn running on"

# --- ngrok 및 서버 종료 ---
print("🛠 기존 ngrok / 서버 종료 시도...")
try:
    ngrok.kill()
    print("✅ ngrok 종료 완료")
except Exception as e:
    print(f"⚠️ ngrok 종료 중 오류 (무시 가능): {e}")

subprocess.run(['pkill', '-f', 'uvicorn main_server:app'], stderr=subprocess.DEVNULL)
subprocess.run(['pkill', '-f', f'ngrok.*http.*{PORT}'], stderr=subprocess.DEVNULL)
subprocess.run(['pkill', '-f', '/root/.config/ngrok/ngrok'], stderr=subprocess.DEVNULL)
time.sleep(3)

# --- ngrok 고정 도메인 연결 ---
print(f"🌐 ngrok 고정 도메인 연결 시도: {STATIC_NGROK_DOMAIN}...")
public_url = ngrok.connect(
    addr=PORT,
    proto="http",
    domain=STATIC_NGROK_DOMAIN
).public_url
print(f"✅ ngrok 연결 완료: {public_url}")

# ✅ ngrok URL을 환경 변수로 등록 (main_server.py에서 사용함)
os.environ["NGROK_STATIC_URL"] = public_url

# --- FastAPI 서버 백그라운드 실행 ---
print(f"🚀 FastAPI 서버 실행 중... 로그 파일: {LOG_FILE}")
if os.path.exists(LOG_FILE):
    os.remove(LOG_FILE)

subprocess.Popen(
    f"nohup python main_server.py > {LOG_FILE} 2>&1 &",
    shell=True
)
time.sleep(5)  # 초기 부팅 대기

# --- 서버 준비 대기 ---
print(f"⏳ 모델 로딩 대기 중... ('{READY_KEYWORD}' 감지)")
ready_detected = False

for i in range(MAX_WAIT_SECONDS):
    print(f"{i+1}", end=" ", flush=True)
    if (i + 1) % 30 == 0:
        print()

    if (i + 1) % 5 == 0 and os.path.exists(LOG_FILE):
        try:
            with open(LOG_FILE, 'r') as f:
                if READY_KEYWORD in f.read():
                    print("\n✅ 서버 준비 완료!")
                    ready_detected = True
                    break
        except Exception as e:
            print(f"\n⚠️ 로그 확인 오류: {e}")

    time.sleep(1)

if not ready_detected:
    print("\n⚠️ 최대 대기 시간 초과: 모델 로딩 실패 가능성 있음")

# --- 안정화 대기 ---
print("⌛ 안정화를 위해 10초 대기 중...")
time.sleep(10)

# --- 결과 출력 ---
print(f"\n🎯 서버 실행 완료! 외부 접속 URL:")
print(f"🔗 {public_url}")

print("\n📋 현재 실행 중인 프로세스:")
!ps -ef | grep -E "main_server.py|ngrok" | grep -v grep

g_end = time.time()
print(f"\nTotal time : {(g_end - g_start):.2f} seconds")


env: PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
🛠 기존 ngrok / 서버 종료 시도...
✅ ngrok 종료 완료


INFO:pyngrok.ngrok:Opening tunnel named: http-8000-edf0a590-8e40-4bcc-a40c-4d4603db2c36
INFO:pyngrok.process.ngrok:t=2025-05-27T08:03:05+0000 lvl=info msg="no configuration paths supplied"
INFO:pyngrok.process.ngrok:t=2025-05-27T08:03:05+0000 lvl=info msg="using configuration at default config path" path=/root/.config/ngrok/ngrok.yml
INFO:pyngrok.process.ngrok:t=2025-05-27T08:03:05+0000 lvl=info msg="open config file" path=/root/.config/ngrok/ngrok.yml err=nil
INFO:pyngrok.process.ngrok:t=2025-05-27T08:03:05+0000 lvl=info msg="starting web service" obj=web addr=127.0.0.1:4040 allow_hosts=[]


🌐 ngrok 고정 도메인 연결 시도: clam-talented-promptly.ngrok-free.app...


INFO:pyngrok.process.ngrok:t=2025-05-27T08:03:06+0000 lvl=info msg="client session established" obj=tunnels.session
INFO:pyngrok.process.ngrok:t=2025-05-27T08:03:06+0000 lvl=info msg="tunnel session started" obj=tunnels.session
INFO:pyngrok.process.ngrok:t=2025-05-27T08:03:06+0000 lvl=info msg=start pg=/api/tunnels id=09d7b73e5be89387
INFO:pyngrok.process.ngrok:t=2025-05-27T08:03:06+0000 lvl=info msg=end pg=/api/tunnels id=09d7b73e5be89387 status=200 dur=300.023µs
INFO:pyngrok.process.ngrok:t=2025-05-27T08:03:06+0000 lvl=info msg=start pg=/api/tunnels id=a32c1be786dbd981
INFO:pyngrok.process.ngrok:t=2025-05-27T08:03:06+0000 lvl=info msg=end pg=/api/tunnels id=a32c1be786dbd981 status=200 dur=112.174µs
INFO:pyngrok.process.ngrok:t=2025-05-27T08:03:06+0000 lvl=info msg=start pg=/api/tunnels id=e7cad0985e1cd2d8
INFO:pyngrok.process.ngrok:t=2025-05-27T08:03:06+0000 lvl=info msg="started tunnel" obj=tunnels name=http-8000-edf0a590-8e40-4bcc-a40c-4d4603db2c36 addr=http://localhost:8000 url=ht

✅ ngrok 연결 완료: https://clam-talented-promptly.ngrok-free.app
🚀 FastAPI 서버 실행 중... 로그 파일: uvicorn_server.log
⏳ 모델 로딩 대기 중... ('Uvicorn running on' 감지)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 
✅ 서버 준비 완료!
⌛ 안정화를 위해 10초 대기 중...

🎯 서버 실행 완료! 외부 접속 URL:
🔗 https://clam-talented-promptly.ngrok-free.app

📋 현재 실행 중인 프로세스:
root        3922    2408  0 08:03 ?        00:00:00 /root/.config/ngrok/ngrok start --none --log stdout
root        3951       1 99 08:03 ?        00:02:32 python3 main_server.py

Total time : 398.92 seconds


In [10]:
!nvidia-smi

Tue May 27 08:04:45 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off |   00000000:00:04.0 Off |                    0 |
| N/A   33C    P0             46W /  400W |       5MiB /  40960MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

In [11]:
# @title [구버전 - to('cuda') 사용 중] FastAPI 애플리케이션 코드 작성 (main_server.py 파일 생성)
'''
# 이 셀은 FastAPI 서버 코드를 main_server.py 파일로 저장합니다.
%%writefile main_server.py
import os
import io
import logging
import gc
from contextlib import asynccontextmanager
from typing import Optional, Any

import torch
import uvicorn
from fastapi import FastAPI, HTTPException, UploadFile, File, Form
from fastapi.responses import Response
from pydantic import BaseModel
from PIL import Image
from pyngrok import ngrok

# Diffusers - Flux + ControlNet
from diffusers import FluxPipeline, FluxControlNetModel, FluxControlNetPipeline
from controlnet_aux import CannyDetector

# IP-Adapter
from pipeline_flux_ipa import FluxPipeline as FluxPipelineIP
from transformer_flux import FluxTransformer2DModel
from infer_flux_ipa_siglip import resize_img, IPAdapter

# --- Configuration ---
BASE_MODEL_ID = "black-forest-labs/FLUX.1-dev"
CONTROLNET_MODEL_ID = "Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro-2.0"
IMAGE_ENCODER_PATH = "google/siglip-so400m-patch14-384"
IPADAPTER_PATH = "./ip-adapter.bin"

DEFAULT_STEPS = 30
DEFAULT_GUIDANCE_SCALE = 3.5
DEFAULT_CONTROLNET_SCALE = 0.7
DEFAULT_IPADAPTER_SCALE = 0.7
IMAGE_WIDTH = 1024
IMAGE_HEIGHT = 1024
PORT = 8000

# --- Global State ---
base_pipe: Optional[FluxPipeline] = None
controlnet_pipe: Optional[FluxControlNetPipeline] = None
controlnet_preprocessor: Optional[Any] = None
pipe_ip: Optional[FluxPipelineIP] = None
ip_model: Optional[IPAdapter] = None
device: Optional[str] = None

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# --- Helper Functions ---
def load_pil_image(image_bytes: bytes) -> Image.Image:
    return Image.open(io.BytesIO(image_bytes)).convert("RGB")

def image_to_bytes(image: Image.Image) -> bytes:
    byte_arr = io.BytesIO()
    image.save(byte_arr, format='PNG')
    byte_arr.seek(0)
    return byte_arr.getvalue()

def get_generator(seed: Optional[int] = None) -> torch.Generator:
    if seed is None:
        seed = torch.randint(0, 2**32 - 1, (1,)).item()
    logger.info(f"Using seed: {seed}")
    return torch.Generator(device=device).manual_seed(seed)

async def prepare_reference_image(uploaded_image: UploadFile) -> Image.Image:
    image = load_pil_image(await uploaded_image.read())
    return resize_img(image)

def prepare_control_image(uploaded_image: UploadFile) -> Image.Image:
    if controlnet_preprocessor is None:
        raise RuntimeError("ControlNet preprocessor not loaded.")
    image = load_pil_image(uploaded_image.file.read())
    control_image = controlnet_preprocessor(image)
    return control_image

# --- Model Loading ---
def load_models():
    global base_pipe, controlnet_pipe, controlnet_preprocessor, pipe_ip, ip_model, device

    device = "cuda" if torch.cuda.is_available() else "cpu"
    logger.info(f"Using device: {device}")
    dtype = torch.bfloat16 if device == "cuda" and torch.cuda.is_bf16_supported() else torch.float16

    colab_log_flag = 'O'
    try:
        logger.info("Loading Flux base model...")
        base_pipe = FluxPipeline.from_pretrained(BASE_MODEL_ID, torch_dtype=dtype)
        base_pipe.to("cpu")
        base_pipe.enable_model_cpu_offload()
        base_pipe.enable_attention_slicing()

        logger.info("Loading Flux ControlNet model...")
        controlnet_model = FluxControlNetModel.from_pretrained(CONTROLNET_MODEL_ID, torch_dtype=dtype)
        controlnet_pipe = FluxControlNetPipeline.from_pretrained(BASE_MODEL_ID, controlnet=controlnet_model, torch_dtype=dtype)
        controlnet_pipe.to("cpu")
        controlnet_pipe.enable_model_cpu_offload()
        controlnet_pipe.enable_attention_slicing()

        logger.info("Loading Canny Preprocessor...")
        controlnet_preprocessor = CannyDetector()

        logger.info("Loading Flux IP-Adapter model...")
        transformer = FluxTransformer2DModel.from_pretrained(BASE_MODEL_ID, subfolder="transformer", torch_dtype=dtype)
        pipe_ip = FluxPipelineIP.from_pretrained(BASE_MODEL_ID, transformer=transformer, torch_dtype=dtype)
        ip_model = IPAdapter(
            pipe_ip,
            IMAGE_ENCODER_PATH,
            IPADAPTER_PATH,
            device=device,
            num_tokens=128
        )
        pipe_ip.to("cpu")
        pipe_ip.enable_model_cpu_offload()
        pipe_ip.enable_attention_slicing()

    except Exception as e:
        logger.exception("Fatal error during model loading")
        colab_log_flag = 'X'
        raise RuntimeError(f"Failed to load models: {e}")

    logger.info("✅ All models loaded.")
    print(colab_log_flag)

# --- FastAPI Setup ---
@asynccontextmanager
async def lifespan(app: FastAPI):
    logger.info("Application startup...")
    load_models()
    ngrok_auth_token = os.environ.get("NGROK_AUTHTOKEN")
    if ngrok_auth_token:
        public_url = ngrok.connect(PORT, "http")
        logger.info(f"Ngrok tunnel active at: {public_url}")
        app.state.ngrok_url = public_url
    else:
        app.state.ngrok_url = None
    yield
    logger.info("Application shutdown...")
    ngrok.kill()
    global base_pipe, controlnet_pipe, pipe_ip
    del base_pipe, controlnet_pipe, pipe_ip
    if device == "cuda":
        torch.cuda.empty_cache()

app = FastAPI(lifespan=lifespan, title="Flux Unified API")

# --- Pydantic Model ---
class TextToImageRequest(BaseModel):
    prompt: str
    negative_prompt: Optional[str] = ""
    num_inference_steps: int = DEFAULT_STEPS
    guidance_scale: float = DEFAULT_GUIDANCE_SCALE
    seed: Optional[int] = None

# --- API Endpoints ---
@app.get("/")
async def read_root():
    return {
        "message": "Flux Unified API running.",
        "device": device,
        "ngrok_url": app.state.ngrok_url if hasattr(app.state, "ngrok_url") else None
    }

@app.post("/generate/text-to-image", response_class=Response)
async def generate_text_to_image(request: TextToImageRequest):
    if base_pipe is None:
        raise HTTPException(status_code=503, detail="Base pipeline not ready.")
    base_pipe.to("cuda")
    generator = get_generator(request.seed)

    try:
        with torch.inference_mode():
            result = base_pipe(
                prompt=request.prompt,
                negative_prompt=request.negative_prompt,
                width=IMAGE_WIDTH,
                height=IMAGE_HEIGHT,
                num_inference_steps=request.num_inference_steps,
                guidance_scale=request.guidance_scale,
                generator=generator,
            )
        output_image = result.images[0]
        img_bytes = image_to_bytes(output_image)
        return Response(content=img_bytes, media_type="image/png")
    finally:
        base_pipe.to("cpu")
        torch.cuda.empty_cache()
        gc.collect()

@app.post("/generate/image-to-image", response_class=Response)
async def generate_image_to_image(
    prompt: str = Form(...),
    negative_prompt: Optional[str] = Form(""),
    controlnet_scale: float = Form(DEFAULT_CONTROLNET_SCALE),
    num_inference_steps: int = Form(DEFAULT_STEPS),
    guidance_scale: float = Form(DEFAULT_GUIDANCE_SCALE),
    seed: Optional[int] = Form(None),
    image: UploadFile = File(...)
):
    if controlnet_pipe is None:
        raise HTTPException(status_code=503, detail="ControlNet pipeline not ready.")

    controlnet_pipe.to("cuda")
    control_image = prepare_control_image(image)
    generator = get_generator(seed)

    try:
        with torch.inference_mode():
            result = controlnet_pipe(
                prompt=prompt,
                image=control_image,
                width=IMAGE_WIDTH,
                height=IMAGE_HEIGHT,
                num_inference_steps=num_inference_steps,
                guidance_scale=guidance_scale,
                controlnet_conditioning_scale=controlnet_scale,
                generator=generator,
            )
        output_image = result.images[0]
        img_bytes = image_to_bytes(output_image)
        return Response(content=img_bytes, media_type="image/png")
    finally:
        controlnet_pipe.to("cpu")
        torch.cuda.empty_cache()
        gc.collect()

@app.post("/generate/ip-adapter-image", response_class=Response)
async def generate_ip_adapter_image(
    prompt: str = Form(...),
    ipadapter_scale: float = Form(DEFAULT_IPADAPTER_SCALE),
    num_inference_steps: int = Form(DEFAULT_STEPS),
    seed: Optional[int] = Form(None),
    image: UploadFile = File(...)
):
    if ip_model is None:
        raise HTTPException(status_code=503, detail="IP-Adapter not ready.")

    pipe_ip.to("cuda")
    reference_image = await prepare_reference_image(image)

    try:
        with torch.inference_mode():
            output_images = ip_model.generate(
                pil_image=reference_image,
                prompt=prompt,
                scale=ipadapter_scale,
                width=IMAGE_WIDTH,
                height=IMAGE_HEIGHT,
                seed=seed,
            )
        output_image = output_images[0]
        img_bytes = image_to_bytes(output_image)
        return Response(content=img_bytes, media_type="image/png")
    finally:
        pipe_ip.to("cpu")
        torch.cuda.empty_cache()
        gc.collect()

# --- Main ---
if __name__ == "__main__":
    logger.info("Starting Uvicorn server...")
    uvicorn.run("main_server:app", host="0.0.0.0", port=PORT, reload=False)
'''

'\n# 이 셀은 FastAPI 서버 코드를 main_server.py 파일로 저장합니다.\n%%writefile main_server.py\nimport os\nimport io\nimport logging\nimport gc\nfrom contextlib import asynccontextmanager\nfrom typing import Optional, Any\n\nimport torch\nimport uvicorn\nfrom fastapi import FastAPI, HTTPException, UploadFile, File, Form\nfrom fastapi.responses import Response\nfrom pydantic import BaseModel\nfrom PIL import Image\nfrom pyngrok import ngrok\n\n# Diffusers - Flux + ControlNet\nfrom diffusers import FluxPipeline, FluxControlNetModel, FluxControlNetPipeline\nfrom controlnet_aux import CannyDetector\n\n# IP-Adapter\nfrom pipeline_flux_ipa import FluxPipeline as FluxPipelineIP\nfrom transformer_flux import FluxTransformer2DModel\nfrom infer_flux_ipa_siglip import resize_img, IPAdapter\n\n# --- Configuration ---\nBASE_MODEL_ID = "black-forest-labs/FLUX.1-dev"\nCONTROLNET_MODEL_ID = "Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro-2.0"\nIMAGE_ENCODER_PATH = "google/siglip-so400m-patch14-384"\nIPADAPTER_PATH = ".