## Import

In [2]:
from tqdm import tqdm
import random
import os
import zipfile
import json
import torch
import pandas as pd
import numpy as np
from PIL import Image
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
from controlnet_aux import CannyDetector
import open_clip
import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm
  return register_model(fn_wrapper)
  return register_model(fn_wrapper)
  return register_model(fn_wrapper)
  return register_model(fn_wrapper)
  return register_model(fn_wrapper)


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


## Hyperparameter Setting

In [4]:
CFG = {
    'SUB_DIR' : './submission',
    'SEED' : 42
}

## Fixed RandomSeed

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG['SEED']) # Seed 고정

## Load Pre-trained Model (Stable-Diffusion-V1-5)

In [6]:
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16).to(device)

pipe = StableDiffusionControlNetPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
).to(device)

pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) # 스케줄러 최적화

Fetching 15 files: 100%|██████████| 15/15 [01:06<00:00,  4.44s/it]
Loading pipeline components...: 100%|██████████| 7/7 [00:01<00:00,  4.55it/s]


## Pre-Processing Input Image (Controlnet)

In [8]:
def preprocess_for_controlnet(image: Image.Image, detector_type: str = "canny") -> Image.Image:
    if detector_type == "canny":
        canny_detector = CannyDetector()
        image_np = np.array(image)
        control_image_np = canny_detector(image_np)
        return Image.fromarray(control_image_np)
    elif detector_type == "hed":
        hed_detector = HEDdetector.from_pretrained('lllyasviel/Annotator').to(device)
        image_np = np.array(image)
        control_image_np = hed_detector(image_np)
        return Image.fromarray(control_image_np)
    else:
        raise ValueError("Unsupported detector_type. Choose 'canny' or 'hed'.")

## Inference

In [13]:
import os
os.chdir(r'C:\Users\user\Desktop\연구\6. Colorization') # pcrl

test_df = pd.read_csv('./test.csv')
test_df

Unnamed: 0,ID,input_img_path,caption
0,TEST_001,./test/input_image/TEST_001.png,what is the item of furniture that is to the l...
1,TEST_002,./test/input_image/TEST_002.png,person wearing white hat. do you see any pans ...
2,TEST_003,./test/input_image/TEST_003.png,in this image i can see few persons standing. ...
3,TEST_004,./test/input_image/TEST_004.png,a girl lying on a bench. dark skin man sitting...
4,TEST_005,./test/input_image/TEST_005.png,dog standing on a car. steering wheel in the t...
...,...,...,...
195,TEST_196,./test/input_image/TEST_196.png,woman and child wearing ski goggles. two hands...
196,TEST_197,./test/input_image/TEST_197.png,the bear is white. black nose of bear. green g...
197,TEST_198,./test/input_image/TEST_198.png,man wearing a white shirt. what is the vehicle...
198,TEST_199,./test/input_image/TEST_199.png,set of wedding invitations and cards laid out ...


In [14]:
out_imgs = []
out_img_names = []
for img_id, img_path, caption in zip(test_df['ID'], test_df['input_img_path'], test_df['caption']):
    input_img  = Image.open(img_path).convert("RGB")

    control_image = preprocess_for_controlnet(input_img, detector_type="canny")

    full_prompt = f"realistic, high quality, detailed, Do not change the structure. Only Colorize. {caption}"
    output_img = pipe(
            prompt=full_prompt,
            image=control_image, # ControlNet에 의해 전처리된 이미지
            guidance_scale=7.5, # 텍스트 프롬프트 충실도
            num_inference_steps=50, # 생성 스텝 수
        ).images[0]

    out_imgs.append(output_img)
    out_img_names.append(img_id)
print('✅ Test 데이터셋에 대한 모든 이미지 생성 완료.')

Token indices sequence length is longer than the specified maximum sequence length for this model (78 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['.']
100%|██████████| 50/50 [00:06<00:00,  8.32it/s]
100%|██████████| 50/50 [00:05<00:00,  9.22it/s]
100%|██████████| 50/50 [00:05<00:00,  9.23it/s]
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['bench .']
100%|██████████| 50/50 [00:05<00:00,  9.20it/s]
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['light of an suv .']
100%|██████████| 50/50 [00:05<00:00,  9.10it/s]
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['. a light blue t - shirt .']
100%|██████████| 50/50 [00:05<00:00,  9.01it/s]
100%|██████████| 50/50 [00:05<00:00,  9.11it/s]
T

✅ Test 데이터셋에 대한 모든 이미지 생성 완료.


## Submission

In [15]:
# 추론 결과물 디렉토리 생성
os.makedirs(CFG['SUB_DIR'], exist_ok=True)

In [16]:
# **중요** 추론 이미지 평가용 Embedding 추출 모델
clip_model, _, clip_preprocess = open_clip.create_model_and_transforms("ViT-L-14", pretrained="openai") # 모델명을 반드시 일치시켜야합니다.

In [17]:
clip_model.to(device)
# 평가 제출을 위해 추론된 이미지들을 ViT-L-14 모델로 임베딩 벡터(Feature)를 추출합니다.
feat_imgs = []
for output_img, img_id in tqdm(zip(out_imgs, out_img_names)):
    path_out_img = CFG['SUB_DIR'] + '/' + img_id + '.png' 
    output_img.save(path_out_img)
    # 평가용 임베딩 생성 및 저장
    output_img = clip_preprocess(output_img).unsqueeze(0).cuda()
    with torch.no_grad():
        feat_img = clip_model.encode_image(output_img)
        feat_img /= feat_img.norm(dim=-1, keepdim=True) # L2 정규화 필수

    feat_img = feat_img.detach().cpu().numpy().reshape(-1)
    feat_imgs.append(feat_img)

200it [00:14, 13.35it/s]


In [18]:
feat_imgs = np.array(feat_imgs)
vec_columns = [f'vec_{i}' for i in range(feat_imgs.shape[1])]
feat_submission = pd.DataFrame(feat_imgs, columns=vec_columns)
feat_submission.insert(0, 'ID', out_img_names)

In [19]:
feat_submission.to_csv(CFG['SUB_DIR']+'/embed_submission.csv', index=False)

## 리더보드 제출을 위한 ZIP 파일 생성

In [20]:
# 최종 제출물 (ZIP) 생성 경로
# 제출물 (ZIP) 내에는 디렉토리(폴더)가 없이 구성해야합니다.
zip_path = './submission.zip'

# zip 파일 생성
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for file_name in os.listdir(CFG['SUB_DIR']):
        file_path = os.path.join(CFG['SUB_DIR'], file_name)

        # 일반 파일이며 숨김 파일이 아닌 경우만 포함
        if os.path.isfile(file_path) and not file_name.startswith('.'):
            zipf.write(file_path, arcname=file_name)

print(f"✅ 압축 완료: {zip_path}")

✅ 압축 완료: ./submission.zip
