In [None]:
!pip install gradio faiss-cpu transformers torch pillow


Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.meta

## 경로 및 데이터 로드

In [None]:
import numpy as np
import json

# 경로 정의
root_dir = "/content/drive/MyDrive/aiku-pokemon-demo"
image_folder = f"{root_dir}/aiku-pokemon/pokemonimage"
metadata_path = f"{root_dir}/aiku-pokemon/pokemon_metadata_full.json"
sbert_emb_path = f"{root_dir}/pokemon-embedding-1/krsbert_text_embeddings.npy"
clip_emb_path  = f"{root_dir}/pokemon-embedding-1/clip_image_embeddings.npy"
sbert_index_path = f"{root_dir}/pokemon-embedding-1/sbert.index"
clip_index_path  = f"{root_dir}/pokemon-embedding-1/clip.index"

# 임베딩 로드
sbert_text_embeddings = np.load(sbert_emb_path, allow_pickle=True).item()
clip_img_embeddings   = np.load(clip_emb_path, allow_pickle=True).item()
id_list = list(sbert_text_embeddings.keys())

# 메타데이터 로드
with open(metadata_path, encoding="utf-8") as f:
    metadata = json.load(f)
id2meta = {entry['id']: entry for entry in metadata}


## 모델 로딩 (한국어 SBERT/CLIP)

In [None]:
import torch
from transformers import AutoTokenizer, AutoModel, AutoProcessor, AutoModelForZeroShotImageClassification

# SBERT (한국어)
sbert_tokenizer = AutoTokenizer.from_pretrained("snunlp/KR-SBERT-V40K-klueNLI-augSTS")
sbert_model = AutoModel.from_pretrained("snunlp/KR-SBERT-V40K-klueNLI-augSTS")

# CLIP (한국어)
clip_processor = AutoProcessor.from_pretrained("Bingsu/clip-vit-large-patch14-ko")
clip_model = AutoModelForZeroShotImageClassification.from_pretrained("Bingsu/clip-vit-large-patch14-ko")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/394 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/336k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/967k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/707 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/467M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/467M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/380 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


tokenizer_config.json:   0%|          | 0.00/877 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.21M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/870k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.91M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.88k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.71G [00:00<?, ?B/s]

## 텍스트 임베딩 함수 구현

In [None]:
def sbert_encode(text):
    # SBERT 임베딩: Mean pooling
    encoded_input = sbert_tokenizer(text, padding=True, truncation=True, return_tensors='pt')
    with torch.no_grad():
        model_output = sbert_model(**encoded_input)
    embeddings = model_output.last_hidden_state
    attention_mask = encoded_input['attention_mask']
    mask = attention_mask.unsqueeze(-1).expand(embeddings.size()).float()
    masked_embeddings = embeddings * mask
    summed = torch.sum(masked_embeddings, 1)
    counts = torch.clamp(mask.sum(1), min=1e-9)
    mean_pooled = summed / counts
    return mean_pooled[0].cpu().numpy()

def clip_text_encode(text):
    # CLIP 텍스트 임베딩
    inputs = clip_processor(text=[text], return_tensors="pt", padding=True)
    with torch.no_grad():
        text_emb = clip_model.get_text_features(**inputs)
    return text_emb[0].cpu().numpy()


## FAISS 인덱스 불러오기

In [None]:
import faiss

# 저장해둔 인덱스 파일에서 불러오기!
sbert_index = faiss.read_index(sbert_index_path)
clip_index  = faiss.read_index(clip_index_path)

## 검색 함수 (Gradio용)

In [None]:
from PIL import Image

def pokemon_search(user_query, topn=5, alpha=0.3, beta=0.7):
    # 1. SBERT 임베딩
    sbert_query_emb = sbert_encode(user_query)
    sbert_query_emb = sbert_query_emb / np.linalg.norm(sbert_query_emb)
    # 2. CLIP 임베딩
    clip_query_emb = clip_text_encode(user_query)
    clip_query_emb = clip_query_emb / np.linalg.norm(clip_query_emb)

    # 3. top-k 후보군
    k = 50
    _, sbert_top_idx = sbert_index.search(sbert_query_emb[np.newaxis, :].astype(np.float32), k)
    _, clip_top_idx  = clip_index.search(clip_query_emb[np.newaxis, :].astype(np.float32), k)
    candidate_idx = set(sbert_top_idx[0]) | set(clip_top_idx[0])
    candidate_ids = [id_list[idx] for idx in candidate_idx]

    # 4. 결합 점수 계산
    final_scores = {}
    for pid in candidate_ids:
        sbert_score = np.dot(sbert_query_emb, sbert_text_embeddings[pid])
        clip_score  = np.dot(clip_query_emb, clip_img_embeddings[pid])
        final_scores[pid] = alpha * sbert_score + beta * clip_score

    # 5. Top N 추출
    topn_results = sorted(final_scores.items(), key=lambda x: x[1], reverse=True)[:topn]
    images, names, descs = [], [], []
    for pid, score in topn_results:
        info = id2meta[pid]
        name = f"{info['name']} ({pid}) - 점수: {score:.4f}"
        desc = info.get("full_description_ko", "")
        img_path = f"{image_folder}/{pid}.png"
        try:
            img = Image.open(img_path).convert("RGBA")
        except:
            img = Image.new("RGBA", (256,256), (255,255,255,0))
        images.append(img)
        names.append(name)
        descs.append(desc)
    return images, "\n".join(names), "\n\n".join(descs)


## Gradio 인터페이스 정의 및 실행

In [None]:
import gradio as gr

custom_theme = gr.themes.Monochrome(
    primary_hue="pink"
)

demo = gr.Interface(
    fn=pokemon_search,
    inputs=gr.Textbox(label="포켓몬을 묘사해보세요!", placeholder="예: 보라색 풍선, 솜사탕, 분홍색..."),
    outputs=[
        gr.Gallery(label="Top 5 포켓몬 이미지", columns=5, height="auto"),
        gr.Textbox(label="포켓몬 이름 + 점수", lines=7),
        gr.Textbox(label="포켓몬 설명", lines=7),
    ],
    title="포켓몬 외형 설명 기반 검색기",
    description="포켓몬의 외형/특징을 한국어로 입력하면, 가장 비슷한 포켓몬 Top 5를 이미지와 함께 보여줍니다."
)

demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://12c9b43468d498776c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


