# 모델 불러오기

In [1]:

print("hello world")

hello world


In [2]:
import onnxruntime as ort

# ONNX 모델 로드 (경로 수정 필요)
session = ort.InferenceSession("static_model.onnx", providers=["CPUExecutionProvider"])

# 입력 이름 확인 (모델마다 다를 수 있음)
input_name = session.get_inputs()[0].name
print("Input name:", input_name)


Input name: pixel_values


https://www.kaggle.com/code/ivanpan/pytorch-clip-onnx-to-speed-up-inference

## 이미지 전처리 함수 (uint8 유지, 정규화 x)

In [13]:
import numpy as np
from PIL import Image
def preprocess_image(path):
    img = Image.open(path).convert("RGB").resize((224, 224))
    img_np = np.array(img).astype(np.float32) / 255.0          # [0, 1]
    img_np = (img_np - 0.5) / 0.5                               # [-1, 1]
    img_np = np.transpose(img_np, (2, 0, 1))                    # (HWC) → (CHW)
    img_np = np.expand_dims(img_np, axis=0)                    # (1, C, H, W)
    return img_np

## CLIP 임베딩 추출 함수

In [4]:
# 3. CLIP 임베딩 추출 함수
def get_clip_embedding(img_tensor):
    output = session.run(None, {input_name: img_tensor})
    return output[0].squeeze()  # shape: (512,) or (1024,)

## 이미지 데이터 셋 로드

In [5]:
import os
print("현재 작업 디렉토리:", os.getcwd())
print("존재 여부:", os.path.exists("sample_image_set"))

현재 작업 디렉토리: c:\Users\hnn07\Documents\clip_embeddings
존재 여부: True


In [None]:
import os
import glob
IMAGE_DIR = "sample_image_set/"

image_paths = []
labels = []

for class_dir in os.listdir(IMAGE_DIR):
    class_path = os.path.join(IMAGE_DIR, class_dir)
    if not os.path.isdir(class_path):
        continue

    for img_path in glob.glob(os.path.join(class_path, "*.*")):
        image_paths.append(img_path)
        labels.append(class_dir)  # 폴더 이름이 곧 클래스 이름

## 임베딩 추출 함수

In [7]:
def get_clip_embedding(img_tensor):
    output = session.run(None, {input_name: img_tensor})
    return output[0].squeeze()  # (512,) 또는 (1024,)


## 이미지별 임베딩 추출

In [14]:
# 5. 이미지별 임베딩 추출
embeddings = []
valid_labels = []


for path, label in zip(image_paths, labels):
    try:
        img_tensor = preprocess_image(path)
        emb = get_clip_embedding(img_tensor)
        embeddings.append(emb)
        valid_labels.append(label)
    except Exception as e:
        print(f"[ERROR] {path}: {e}")

embeddings = np.array(embeddings)
valid_labels = np.array(valid_labels)

In [24]:
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, perplexity=30, random_state=42)
reduced = tsne.fit_transform(embeddings)

# 시각화
plt.figure(figsize=(12, 8))
unique_labels = list(set(valid_labels))
color_map = {label: idx for idx, label in enumerate(unique_labels)}
colors = [color_map[lbl] for lbl in valid_labels]

scatter = plt.scatter(reduced[:, 0], reduced[:, 1], c=colors, cmap='tab10', s=40)
plt.title("t-SNE of CLIP Image Embeddings (float32 model)", fontsize=14)
plt.colorbar(scatter, ticks=range(len(unique_labels)), label="Class")
plt.grid(True)

# 범례 표시
handles = [plt.Line2D([], [], marker='o', linestyle='', label=lbl,
                      color=plt.cm.tab10(color_map[lbl]/10.0)) for lbl in unique_labels]
plt.legend(handles=handles, title="Class")
plt.show()

ModuleNotFoundError: No module named 'matplotlib'