# Запуск сервиса подбора аналогов из Jupyter

Ниже — ячейки, которые:
- устанавливают зависимости,
- поднимают FastAPI-сервис с endpoint `/predict`,
- запускают Gradio UI для проверки,
- содержат пример запроса через requests.


In [None]:
%%bash
python -V
pip -V
pip install --quiet fastapi==0.115.2 uvicorn==0.30.6 pydantic==2.9.2 httpx==0.27.2 tenacity==9.0.0 orjson==3.10.7 gradio==3.50.2 scikit-learn==1.3.2 pandas==2.2.3 numpy==1.26.4 openpyxl==3.1.5


In [None]:
import threading, time, os, json
from typing import List, Dict, Any

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import uvicorn

# ---- Minimal services (extractor + matcher + catalog loader) ----

class PredictRequest(BaseModel):
    name: str
    manufacturer: str
    article: str

class PredictResponse(BaseModel):
    analogs: List[str]

CRITICAL_PARAMETERS: Dict[str, float] = {
    "напряжение": 3.0,
    "ток": 3.0,
    "частота": 1.0,
    "класс_защиты": 2.0,
}

def calculate_match_score(target_char: Dict[str, Any], candidate_char: Dict[str, Any]) -> float:
    score = 0.0
    total = 0.0
    for param, weight in CRITICAL_PARAMETERS.items():
        total += weight
        if param in target_char and param in candidate_char and str(target_char[param]).strip().lower() == str(candidate_char[param]).strip().lower():
            score += weight
    return (score / total) if total > 0 else 0.0

# Very simple in-notebook catalog
CATALOG: List[Dict[str, Any]] = [
    {"article": "SE-001", "characteristics": {"напряжение": "220В", "ток": "10А", "класс_защиты": "IP20"}},
    {"article": "SE-002", "characteristics": {"напряжение": "380В", "ток": "16А", "класс_защиты": "IP54"}},
    {"article": "SE-003", "characteristics": {"напряжение": "220В", "ток": "16А", "класс_защиты": "IP20"}},
]

class CharacteristicExtractor:
    def __init__(self):
        self.cache: Dict[str, Dict[str, Any]] = {}
    def extract(self, product_name: str, manufacturer: str, article: str) -> Dict[str, Any]:
        # Minimal heuristic extractor (stub). Real impl would call GigaChat.
        # For demo, map common tokens.
        text = f"{product_name} {manufacturer} {article}".lower()
        char = {}
        if "220" in text:
            char["напряжение"] = "220В"
        if "380" in text:
            char["напряжение"] = "380В"
        if "10a" in text or "10а" in text:
            char["ток"] = "10А"
        if "16a" in text or "16а" in text:
            char["ток"] = "16А"
        if "ip54" in text:
            char["класс_защиты"] = "IP54"
        if "ip20" in text:
            char["класс_защиты"] = "IP20"
        return char

extractor = CharacteristicExtractor()

app = FastAPI(title="Notebook Analog Matcher")

@app.get("/health")
def health():
    return {"status": "ok"}

@app.post("/predict", response_model=PredictResponse)
def predict(req: PredictRequest) -> PredictResponse:
    if not req.name or not req.manufacturer or not req.article:
        raise HTTPException(status_code=400, detail="Missing required fields")
    target_char = extractor.extract(req.name, req.manufacturer, req.article)
    scored = []
    for p in CATALOG:
        sc = calculate_match_score(target_char, p.get("characteristics", {}))
        scored.append({**p, "score": sc})
    scored.sort(key=lambda x: x["score"], reverse=True)
    top = [p["article"] for p in scored[:6]]
    return PredictResponse(analogs=top)

# ---- Background server thread ----

def run_server():
    uvicorn.run(app, host="0.0.0.0", port=8000, log_level="warning")

if "_server_thread" not in globals():
    _server_thread = threading.Thread(target=run_server, daemon=True)
    _server_thread.start()
    time.sleep(1)
print("FastAPI server started on http://localhost:8000")


In [None]:
import requests

url = "http://localhost:8000/predict"
payload = {
    "name": "Автоматический выключатель 220V 16A IP20",
    "manufacturer": "EKF",
    "article": "A1234"
}

r = requests.post(url, json=payload, timeout=10)
r.status_code, r.json()


In [None]:
import gradio as gr


def client_predict(name, manufacturer, article):
    import requests
    url = "http://localhost:8000/predict"
    payload = {"name": name, "manufacturer": manufacturer, "article": article}
    try:
        r = requests.post(url, json=payload, timeout=10)
        data = r.json()
        return "\n".join(data.get("analogs", []))
    except Exception as e:
        return f"Ошибка: {e}"

with gr.Blocks(title="Analog Matcher — Notebook UI") as demo:
    gr.Markdown("### Поиск аналогов через локальный API")
    with gr.Row():
        name = gr.Textbox(label="Наименование", value="Автоматический выключатель 220V 16A IP20")
        manufacturer = gr.Textbox(label="Производитель", value="EKF")
        article = gr.Textbox(label="Артикул", value="A1234")
    btn = gr.Button("Выполнить /predict")
    out = gr.Textbox(label="Ответ (артикулы)")
    btn.click(client_predict, inputs=[name, manufacturer, article], outputs=out)

demo.launch(share=False)


In [2]:
import gradio as gr
from PIL import Image
import torch
import numpy as np
import faiss

from transformers import (
    BlipProcessor,
    BlipForConditionalGeneration,
    CLIPProcessor,
    CLIPModel
)
from datasets import load_dataset

wikiart_dataset = load_dataset("huggan/wikiart", split="train")
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device).eval()

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device).eval()
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

image_index = faiss.read_index("../create_index/image_index.faiss")
text_index = faiss.read_index("../create_index/text_index.faiss")

def generate_caption(image: Image.Image):
    inputs = blip_processor(image, return_tensors="pt").to(device)
    with torch.no_grad():
        caption_ids = blip_model.generate(**inputs)
    caption = blip_processor.decode(caption_ids[0], skip_special_tokens=True)
    return caption

def get_clip_text_embedding(text):
    inputs = clip_processor(text=[text], return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        features = clip_model.get_text_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_clip_image_embedding(image):
    inputs = clip_processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        features = clip_model.get_image_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_results_with_images(embedding, index, top_k=2):
    D, I = index.search(embedding, top_k)
    results = []
    for idx in I[0]:
        try:
            idx_int = int(idx)
            item = wikiart_dataset[idx_int]
            img = item["image"]
            caption = f"ID: {idx_int}"
            results.append((img, caption))
        except IndexError:
            continue
    return results

def search_similar_images(image: Image.Image):
    caption = generate_caption(image)

    text_emb = get_clip_text_embedding(caption)
    image_emb = get_clip_image_embedding(image)

    text_results = get_results_with_images(text_emb, text_index)
    image_results = get_results_with_images(image_emb, image_index)

    return caption, text_results, image_results

demo = gr.Interface(
    fn=search_similar_images,
    inputs=gr.Image(label="Загрузите изображение", type="pil"),
    outputs=[
        gr.Textbox(label="📜 Сгенерированное описание"),
        gr.Gallery(label="🔍 Похожие по описанию (CLIP)", height="auto", columns=2),
        gr.Gallery(label="🎨 Похожие по изображению (CLIP)", height="auto", columns=2)
    ],
    title="🎨 Semantic WikiArt Search (BLIP + CLIP)",
    description="Загрузите изображение. Модель BLIP сгенерирует описание, а CLIP найдёт похожие картины по тексту и изображению."
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.




In [None]:
import gradio as gr
from PIL import Image
import torch
import numpy as np
import faiss

from transformers import (
    BlipProcessor,
    BlipForConditionalGeneration,
    CLIPProcessor,
    CLIPModel
)
from datasets import load_dataset

wikiart_dataset = load_dataset("huggan/wikiart", split="train")
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device).eval()

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device).eval()
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

image_index = faiss.read_index("../create_index/image_index.faiss")
text_index = faiss.read_index("../create_index/text_index.faiss")

def generate_caption(image: Image.Image):
    inputs = blip_processor(image, return_tensors="pt").to(device)
    with torch.no_grad():
        caption_ids = blip_model.generate(**inputs)
    caption = blip_processor.decode(caption_ids[0], skip_special_tokens=True)
    return caption

def get_clip_text_embedding(text):
    inputs = clip_processor(text=[text], return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        features = clip_model.get_text_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_clip_image_embedding(image):
    inputs = clip_processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        features = clip_model.get_image_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_results_with_images(embedding, index, top_k=2):
    D, I = index.search(embedding, top_k)
    results = []
    for idx in I[0]:
        try:
            idx_int = int(idx)
            item = wikiart_dataset[idx_int]
            img = item["image"]
            caption = f"ID: {idx_int}"
            results.append((img, caption))
        except IndexError:
            continue
    return results

def search_similar_images(image: Image.Image):
    caption = generate_caption(image)

    text_emb = get_clip_text_embedding(caption)
    image_emb = get_clip_image_embedding(image)

    text_results = get_results_with_images(text_emb, text_index)
    image_results = get_results_with_images(image_emb, image_index)

    return caption, text_results, image_results

demo = gr.Interface(
    fn=search_similar_images,
    inputs=gr.Image(label="Загрузите изображение", type="pil"),
    outputs=[
        gr.Textbox(label="📜 Сгенерированное описание"),
        gr.Gallery(label="🔍 Похожие по описанию (CLIP)", height="auto", columns=2),
        gr.Gallery(label="🎨 Похожие по изображению (CLIP)", height="auto", columns=2)
    ],
    title="🎨 Semantic WikiArt Search (BLIP + CLIP)",
    description="Загрузите изображение. Модель BLIP сгенерирует описание, а CLIP найдёт похожие картины по тексту и изображению."
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.




In [None]:
import gradio as gr
from PIL import Image
import torch
import numpy as np
import faiss

from transformers import (
    BlipProcessor,
    BlipForConditionalGeneration,
    CLIPProcessor,
    CLIPModel
)
from datasets import load_dataset

wikiart_dataset = load_dataset("huggan/wikiart", split="train")
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device).eval()

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device).eval()
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

image_index = faiss.read_index("../create_index/image_index.faiss")
text_index = faiss.read_index("../create_index/text_index.faiss")

def generate_caption(image: Image.Image):
    inputs = blip_processor(image, return_tensors="pt").to(device)
    with torch.no_grad():
        caption_ids = blip_model.generate(**inputs)
    caption = blip_processor.decode(caption_ids[0], skip_special_tokens=True)
    return caption

def get_clip_text_embedding(text):
    inputs = clip_processor(text=[text], return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        features = clip_model.get_text_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_clip_image_embedding(image):
    inputs = clip_processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        features = clip_model.get_image_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_results_with_images(embedding, index, top_k=2):
    D, I = index.search(embedding, top_k)
    results = []
    for idx in I[0]:
        try:
            idx_int = int(idx)
            item = wikiart_dataset[idx_int]
            img = item["image"]
            caption = f"ID: {idx_int}"
            results.append((img, caption))
        except IndexError:
            continue
    return results

def search_similar_images(image: Image.Image):
    caption = generate_caption(image)

    text_emb = get_clip_text_embedding(caption)
    image_emb = get_clip_image_embedding(image)

    text_results = get_results_with_images(text_emb, text_index)
    image_results = get_results_with_images(image_emb, image_index)

    return caption, text_results, image_results

demo = gr.Interface(
    fn=search_similar_images,
    inputs=gr.Image(label="Загрузите изображение", type="pil"),
    outputs=[
        gr.Textbox(label="📜 Сгенерированное описание"),
        gr.Gallery(label="🔍 Похожие по описанию (CLIP)", height="auto", columns=2),
        gr.Gallery(label="🎨 Похожие по изображению (CLIP)", height="auto", columns=2)
    ],
    title="🎨 Semantic WikiArt Search (BLIP + CLIP)",
    description="Загрузите изображение. Модель BLIP сгенерирует описание, а CLIP найдёт похожие картины по тексту и изображению."
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.




In [None]:
import gradio as gr
from PIL import Image
import torch
import numpy as np
import faiss

from transformers import (
    BlipProcessor,
    BlipForConditionalGeneration,
    CLIPProcessor,
    CLIPModel
)
from datasets import load_dataset

wikiart_dataset = load_dataset("huggan/wikiart", split="train")
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device).eval()

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device).eval()
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

image_index = faiss.read_index("../create_index/image_index.faiss")
text_index = faiss.read_index("../create_index/text_index.faiss")

def generate_caption(image: Image.Image):
    inputs = blip_processor(image, return_tensors="pt").to(device)
    with torch.no_grad():
        caption_ids = blip_model.generate(**inputs)
    caption = blip_processor.decode(caption_ids[0], skip_special_tokens=True)
    return caption

def get_clip_text_embedding(text):
    inputs = clip_processor(text=[text], return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        features = clip_model.get_text_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_clip_image_embedding(image):
    inputs = clip_processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        features = clip_model.get_image_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_results_with_images(embedding, index, top_k=2):
    D, I = index.search(embedding, top_k)
    results = []
    for idx in I[0]:
        try:
            idx_int = int(idx)
            item = wikiart_dataset[idx_int]
            img = item["image"]
            caption = f"ID: {idx_int}"
            results.append((img, caption))
        except IndexError:
            continue
    return results

def search_similar_images(image: Image.Image):
    caption = generate_caption(image)

    text_emb = get_clip_text_embedding(caption)
    image_emb = get_clip_image_embedding(image)

    text_results = get_results_with_images(text_emb, text_index)
    image_results = get_results_with_images(image_emb, image_index)

    return caption, text_results, image_results

demo = gr.Interface(
    fn=search_similar_images,
    inputs=gr.Image(label="Загрузите изображение", type="pil"),
    outputs=[
        gr.Textbox(label="📜 Сгенерированное описание"),
        gr.Gallery(label="🔍 Похожие по описанию (CLIP)", height="auto", columns=2),
        gr.Gallery(label="🎨 Похожие по изображению (CLIP)", height="auto", columns=2)
    ],
    title="🎨 Semantic WikiArt Search (BLIP + CLIP)",
    description="Загрузите изображение. Модель BLIP сгенерирует описание, а CLIP найдёт похожие картины по тексту и изображению."
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.




In [None]:
import gradio as gr
from PIL import Image
import torch
import numpy as np
import faiss

from transformers import (
    BlipProcessor,
    BlipForConditionalGeneration,
    CLIPProcessor,
    CLIPModel
)
from datasets import load_dataset

wikiart_dataset = load_dataset("huggan/wikiart", split="train")
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device).eval()

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device).eval()
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

image_index = faiss.read_index("../create_index/image_index.faiss")
text_index = faiss.read_index("../create_index/text_index.faiss")

def generate_caption(image: Image.Image):
    inputs = blip_processor(image, return_tensors="pt").to(device)
    with torch.no_grad():
        caption_ids = blip_model.generate(**inputs)
    caption = blip_processor.decode(caption_ids[0], skip_special_tokens=True)
    return caption

def get_clip_text_embedding(text):
    inputs = clip_processor(text=[text], return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        features = clip_model.get_text_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_clip_image_embedding(image):
    inputs = clip_processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        features = clip_model.get_image_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_results_with_images(embedding, index, top_k=2):
    D, I = index.search(embedding, top_k)
    results = []
    for idx in I[0]:
        try:
            idx_int = int(idx)
            item = wikiart_dataset[idx_int]
            img = item["image"]
            caption = f"ID: {idx_int}"
            results.append((img, caption))
        except IndexError:
            continue
    return results

def search_similar_images(image: Image.Image):
    caption = generate_caption(image)

    text_emb = get_clip_text_embedding(caption)
    image_emb = get_clip_image_embedding(image)

    text_results = get_results_with_images(text_emb, text_index)
    image_results = get_results_with_images(image_emb, image_index)

    return caption, text_results, image_results

demo = gr.Interface(
    fn=search_similar_images,
    inputs=gr.Image(label="Загрузите изображение", type="pil"),
    outputs=[
        gr.Textbox(label="📜 Сгенерированное описание"),
        gr.Gallery(label="🔍 Похожие по описанию (CLIP)", height="auto", columns=2),
        gr.Gallery(label="🎨 Похожие по изображению (CLIP)", height="auto", columns=2)
    ],
    title="🎨 Semantic WikiArt Search (BLIP + CLIP)",
    description="Загрузите изображение. Модель BLIP сгенерирует описание, а CLIP найдёт похожие картины по тексту и изображению."
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.




In [None]:
import gradio as gr
from PIL import Image
import torch
import numpy as np
import faiss

from transformers import (
    BlipProcessor,
    BlipForConditionalGeneration,
    CLIPProcessor,
    CLIPModel
)
from datasets import load_dataset

wikiart_dataset = load_dataset("huggan/wikiart", split="train")
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device).eval()

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device).eval()
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

image_index = faiss.read_index("../create_index/image_index.faiss")
text_index = faiss.read_index("../create_index/text_index.faiss")

def generate_caption(image: Image.Image):
    inputs = blip_processor(image, return_tensors="pt").to(device)
    with torch.no_grad():
        caption_ids = blip_model.generate(**inputs)
    caption = blip_processor.decode(caption_ids[0], skip_special_tokens=True)
    return caption

def get_clip_text_embedding(text):
    inputs = clip_processor(text=[text], return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        features = clip_model.get_text_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_clip_image_embedding(image):
    inputs = clip_processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        features = clip_model.get_image_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_results_with_images(embedding, index, top_k=2):
    D, I = index.search(embedding, top_k)
    results = []
    for idx in I[0]:
        try:
            idx_int = int(idx)
            item = wikiart_dataset[idx_int]
            img = item["image"]
            caption = f"ID: {idx_int}"
            results.append((img, caption))
        except IndexError:
            continue
    return results

def search_similar_images(image: Image.Image):
    caption = generate_caption(image)

    text_emb = get_clip_text_embedding(caption)
    image_emb = get_clip_image_embedding(image)

    text_results = get_results_with_images(text_emb, text_index)
    image_results = get_results_with_images(image_emb, image_index)

    return caption, text_results, image_results

demo = gr.Interface(
    fn=search_similar_images,
    inputs=gr.Image(label="Загрузите изображение", type="pil"),
    outputs=[
        gr.Textbox(label="📜 Сгенерированное описание"),
        gr.Gallery(label="🔍 Похожие по описанию (CLIP)", height="auto", columns=2),
        gr.Gallery(label="🎨 Похожие по изображению (CLIP)", height="auto", columns=2)
    ],
    title="🎨 Semantic WikiArt Search (BLIP + CLIP)",
    description="Загрузите изображение. Модель BLIP сгенерирует описание, а CLIP найдёт похожие картины по тексту и изображению."
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.




In [None]:
import gradio as gr
from PIL import Image
import torch
import numpy as np
import faiss

from transformers import (
    BlipProcessor,
    BlipForConditionalGeneration,
    CLIPProcessor,
    CLIPModel
)
from datasets import load_dataset

wikiart_dataset = load_dataset("huggan/wikiart", split="train")
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device).eval()

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device).eval()
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

image_index = faiss.read_index("../create_index/image_index.faiss")
text_index = faiss.read_index("../create_index/text_index.faiss")

def generate_caption(image: Image.Image):
    inputs = blip_processor(image, return_tensors="pt").to(device)
    with torch.no_grad():
        caption_ids = blip_model.generate(**inputs)
    caption = blip_processor.decode(caption_ids[0], skip_special_tokens=True)
    return caption

def get_clip_text_embedding(text):
    inputs = clip_processor(text=[text], return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        features = clip_model.get_text_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_clip_image_embedding(image):
    inputs = clip_processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        features = clip_model.get_image_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_results_with_images(embedding, index, top_k=2):
    D, I = index.search(embedding, top_k)
    results = []
    for idx in I[0]:
        try:
            idx_int = int(idx)
            item = wikiart_dataset[idx_int]
            img = item["image"]
            caption = f"ID: {idx_int}"
            results.append((img, caption))
        except IndexError:
            continue
    return results

def search_similar_images(image: Image.Image):
    caption = generate_caption(image)

    text_emb = get_clip_text_embedding(caption)
    image_emb = get_clip_image_embedding(image)

    text_results = get_results_with_images(text_emb, text_index)
    image_results = get_results_with_images(image_emb, image_index)

    return caption, text_results, image_results

demo = gr.Interface(
    fn=search_similar_images,
    inputs=gr.Image(label="Загрузите изображение", type="pil"),
    outputs=[
        gr.Textbox(label="📜 Сгенерированное описание"),
        gr.Gallery(label="🔍 Похожие по описанию (CLIP)", height="auto", columns=2),
        gr.Gallery(label="🎨 Похожие по изображению (CLIP)", height="auto", columns=2)
    ],
    title="🎨 Semantic WikiArt Search (BLIP + CLIP)",
    description="Загрузите изображение. Модель BLIP сгенерирует описание, а CLIP найдёт похожие картины по тексту и изображению."
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.




In [None]:
import gradio as gr
from PIL import Image
import torch
import numpy as np
import faiss

from transformers import (
    BlipProcessor,
    BlipForConditionalGeneration,
    CLIPProcessor,
    CLIPModel
)
from datasets import load_dataset

wikiart_dataset = load_dataset("huggan/wikiart", split="train")
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device).eval()

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device).eval()
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

image_index = faiss.read_index("../create_index/image_index.faiss")
text_index = faiss.read_index("../create_index/text_index.faiss")

def generate_caption(image: Image.Image):
    inputs = blip_processor(image, return_tensors="pt").to(device)
    with torch.no_grad():
        caption_ids = blip_model.generate(**inputs)
    caption = blip_processor.decode(caption_ids[0], skip_special_tokens=True)
    return caption

def get_clip_text_embedding(text):
    inputs = clip_processor(text=[text], return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        features = clip_model.get_text_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_clip_image_embedding(image):
    inputs = clip_processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        features = clip_model.get_image_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_results_with_images(embedding, index, top_k=2):
    D, I = index.search(embedding, top_k)
    results = []
    for idx in I[0]:
        try:
            idx_int = int(idx)
            item = wikiart_dataset[idx_int]
            img = item["image"]
            caption = f"ID: {idx_int}"
            results.append((img, caption))
        except IndexError:
            continue
    return results

def search_similar_images(image: Image.Image):
    caption = generate_caption(image)

    text_emb = get_clip_text_embedding(caption)
    image_emb = get_clip_image_embedding(image)

    text_results = get_results_with_images(text_emb, text_index)
    image_results = get_results_with_images(image_emb, image_index)

    return caption, text_results, image_results

demo = gr.Interface(
    fn=search_similar_images,
    inputs=gr.Image(label="Загрузите изображение", type="pil"),
    outputs=[
        gr.Textbox(label="📜 Сгенерированное описание"),
        gr.Gallery(label="🔍 Похожие по описанию (CLIP)", height="auto", columns=2),
        gr.Gallery(label="🎨 Похожие по изображению (CLIP)", height="auto", columns=2)
    ],
    title="🎨 Semantic WikiArt Search (BLIP + CLIP)",
    description="Загрузите изображение. Модель BLIP сгенерирует описание, а CLIP найдёт похожие картины по тексту и изображению."
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.




In [None]:
import gradio as gr
from PIL import Image
import torch
import numpy as np
import faiss

from transformers import (
    BlipProcessor,
    BlipForConditionalGeneration,
    CLIPProcessor,
    CLIPModel
)
from datasets import load_dataset

wikiart_dataset = load_dataset("huggan/wikiart", split="train")
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device).eval()

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device).eval()
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

image_index = faiss.read_index("../create_index/image_index.faiss")
text_index = faiss.read_index("../create_index/text_index.faiss")

def generate_caption(image: Image.Image):
    inputs = blip_processor(image, return_tensors="pt").to(device)
    with torch.no_grad():
        caption_ids = blip_model.generate(**inputs)
    caption = blip_processor.decode(caption_ids[0], skip_special_tokens=True)
    return caption

def get_clip_text_embedding(text):
    inputs = clip_processor(text=[text], return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        features = clip_model.get_text_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_clip_image_embedding(image):
    inputs = clip_processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        features = clip_model.get_image_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_results_with_images(embedding, index, top_k=2):
    D, I = index.search(embedding, top_k)
    results = []
    for idx in I[0]:
        try:
            idx_int = int(idx)
            item = wikiart_dataset[idx_int]
            img = item["image"]
            caption = f"ID: {idx_int}"
            results.append((img, caption))
        except IndexError:
            continue
    return results

def search_similar_images(image: Image.Image):
    caption = generate_caption(image)

    text_emb = get_clip_text_embedding(caption)
    image_emb = get_clip_image_embedding(image)

    text_results = get_results_with_images(text_emb, text_index)
    image_results = get_results_with_images(image_emb, image_index)

    return caption, text_results, image_results

demo = gr.Interface(
    fn=search_similar_images,
    inputs=gr.Image(label="Загрузите изображение", type="pil"),
    outputs=[
        gr.Textbox(label="📜 Сгенерированное описание"),
        gr.Gallery(label="🔍 Похожие по описанию (CLIP)", height="auto", columns=2),
        gr.Gallery(label="🎨 Похожие по изображению (CLIP)", height="auto", columns=2)
    ],
    title="🎨 Semantic WikiArt Search (BLIP + CLIP)",
    description="Загрузите изображение. Модель BLIP сгенерирует описание, а CLIP найдёт похожие картины по тексту и изображению."
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.




In [None]:
import gradio as gr
from PIL import Image
import torch
import numpy as np
import faiss

from transformers import (
    BlipProcessor,
    BlipForConditionalGeneration,
    CLIPProcessor,
    CLIPModel
)
from datasets import load_dataset

wikiart_dataset = load_dataset("huggan/wikiart", split="train")
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device).eval()

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device).eval()
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

image_index = faiss.read_index("../create_index/image_index.faiss")
text_index = faiss.read_index("../create_index/text_index.faiss")

def generate_caption(image: Image.Image):
    inputs = blip_processor(image, return_tensors="pt").to(device)
    with torch.no_grad():
        caption_ids = blip_model.generate(**inputs)
    caption = blip_processor.decode(caption_ids[0], skip_special_tokens=True)
    return caption

def get_clip_text_embedding(text):
    inputs = clip_processor(text=[text], return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        features = clip_model.get_text_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_clip_image_embedding(image):
    inputs = clip_processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        features = clip_model.get_image_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_results_with_images(embedding, index, top_k=2):
    D, I = index.search(embedding, top_k)
    results = []
    for idx in I[0]:
        try:
            idx_int = int(idx)
            item = wikiart_dataset[idx_int]
            img = item["image"]
            caption = f"ID: {idx_int}"
            results.append((img, caption))
        except IndexError:
            continue
    return results

def search_similar_images(image: Image.Image):
    caption = generate_caption(image)

    text_emb = get_clip_text_embedding(caption)
    image_emb = get_clip_image_embedding(image)

    text_results = get_results_with_images(text_emb, text_index)
    image_results = get_results_with_images(image_emb, image_index)

    return caption, text_results, image_results

demo = gr.Interface(
    fn=search_similar_images,
    inputs=gr.Image(label="Загрузите изображение", type="pil"),
    outputs=[
        gr.Textbox(label="📜 Сгенерированное описание"),
        gr.Gallery(label="🔍 Похожие по описанию (CLIP)", height="auto", columns=2),
        gr.Gallery(label="🎨 Похожие по изображению (CLIP)", height="auto", columns=2)
    ],
    title="🎨 Semantic WikiArt Search (BLIP + CLIP)",
    description="Загрузите изображение. Модель BLIP сгенерирует описание, а CLIP найдёт похожие картины по тексту и изображению."
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.




In [None]:
import gradio as gr
from PIL import Image
import torch
import numpy as np
import faiss

from transformers import (
    BlipProcessor,
    BlipForConditionalGeneration,
    CLIPProcessor,
    CLIPModel
)
from datasets import load_dataset

wikiart_dataset = load_dataset("huggan/wikiart", split="train")
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device).eval()

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device).eval()
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

image_index = faiss.read_index("../create_index/image_index.faiss")
text_index = faiss.read_index("../create_index/text_index.faiss")

def generate_caption(image: Image.Image):
    inputs = blip_processor(image, return_tensors="pt").to(device)
    with torch.no_grad():
        caption_ids = blip_model.generate(**inputs)
    caption = blip_processor.decode(caption_ids[0], skip_special_tokens=True)
    return caption

def get_clip_text_embedding(text):
    inputs = clip_processor(text=[text], return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        features = clip_model.get_text_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_clip_image_embedding(image):
    inputs = clip_processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        features = clip_model.get_image_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_results_with_images(embedding, index, top_k=2):
    D, I = index.search(embedding, top_k)
    results = []
    for idx in I[0]:
        try:
            idx_int = int(idx)
            item = wikiart_dataset[idx_int]
            img = item["image"]
            caption = f"ID: {idx_int}"
            results.append((img, caption))
        except IndexError:
            continue
    return results

def search_similar_images(image: Image.Image):
    caption = generate_caption(image)

    text_emb = get_clip_text_embedding(caption)
    image_emb = get_clip_image_embedding(image)

    text_results = get_results_with_images(text_emb, text_index)
    image_results = get_results_with_images(image_emb, image_index)

    return caption, text_results, image_results

demo = gr.Interface(
    fn=search_similar_images,
    inputs=gr.Image(label="Загрузите изображение", type="pil"),
    outputs=[
        gr.Textbox(label="📜 Сгенерированное описание"),
        gr.Gallery(label="🔍 Похожие по описанию (CLIP)", height="auto", columns=2),
        gr.Gallery(label="🎨 Похожие по изображению (CLIP)", height="auto", columns=2)
    ],
    title="🎨 Semantic WikiArt Search (BLIP + CLIP)",
    description="Загрузите изображение. Модель BLIP сгенерирует описание, а CLIP найдёт похожие картины по тексту и изображению."
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.




In [None]:
import gradio as gr
from PIL import Image
import torch
import numpy as np
import faiss

from transformers import (
    BlipProcessor,
    BlipForConditionalGeneration,
    CLIPProcessor,
    CLIPModel
)
from datasets import load_dataset

wikiart_dataset = load_dataset("huggan/wikiart", split="train")
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device).eval()

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device).eval()
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

image_index = faiss.read_index("../create_index/image_index.faiss")
text_index = faiss.read_index("../create_index/text_index.faiss")

def generate_caption(image: Image.Image):
    inputs = blip_processor(image, return_tensors="pt").to(device)
    with torch.no_grad():
        caption_ids = blip_model.generate(**inputs)
    caption = blip_processor.decode(caption_ids[0], skip_special_tokens=True)
    return caption

def get_clip_text_embedding(text):
    inputs = clip_processor(text=[text], return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        features = clip_model.get_text_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_clip_image_embedding(image):
    inputs = clip_processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        features = clip_model.get_image_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_results_with_images(embedding, index, top_k=2):
    D, I = index.search(embedding, top_k)
    results = []
    for idx in I[0]:
        try:
            idx_int = int(idx)
            item = wikiart_dataset[idx_int]
            img = item["image"]
            caption = f"ID: {idx_int}"
            results.append((img, caption))
        except IndexError:
            continue
    return results

def search_similar_images(image: Image.Image):
    caption = generate_caption(image)

    text_emb = get_clip_text_embedding(caption)
    image_emb = get_clip_image_embedding(image)

    text_results = get_results_with_images(text_emb, text_index)
    image_results = get_results_with_images(image_emb, image_index)

    return caption, text_results, image_results

demo = gr.Interface(
    fn=search_similar_images,
    inputs=gr.Image(label="Загрузите изображение", type="pil"),
    outputs=[
        gr.Textbox(label="📜 Сгенерированное описание"),
        gr.Gallery(label="🔍 Похожие по описанию (CLIP)", height="auto", columns=2),
        gr.Gallery(label="🎨 Похожие по изображению (CLIP)", height="auto", columns=2)
    ],
    title="🎨 Semantic WikiArt Search (BLIP + CLIP)",
    description="Загрузите изображение. Модель BLIP сгенерирует описание, а CLIP найдёт похожие картины по тексту и изображению."
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.




In [None]:
import gradio as gr
from PIL import Image
import torch
import numpy as np
import faiss

from transformers import (
    BlipProcessor,
    BlipForConditionalGeneration,
    CLIPProcessor,
    CLIPModel
)
from datasets import load_dataset

wikiart_dataset = load_dataset("huggan/wikiart", split="train")
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device).eval()

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device).eval()
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

image_index = faiss.read_index("../create_index/image_index.faiss")
text_index = faiss.read_index("../create_index/text_index.faiss")

def generate_caption(image: Image.Image):
    inputs = blip_processor(image, return_tensors="pt").to(device)
    with torch.no_grad():
        caption_ids = blip_model.generate(**inputs)
    caption = blip_processor.decode(caption_ids[0], skip_special_tokens=True)
    return caption

def get_clip_text_embedding(text):
    inputs = clip_processor(text=[text], return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        features = clip_model.get_text_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_clip_image_embedding(image):
    inputs = clip_processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        features = clip_model.get_image_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_results_with_images(embedding, index, top_k=2):
    D, I = index.search(embedding, top_k)
    results = []
    for idx in I[0]:
        try:
            idx_int = int(idx)
            item = wikiart_dataset[idx_int]
            img = item["image"]
            caption = f"ID: {idx_int}"
            results.append((img, caption))
        except IndexError:
            continue
    return results

def search_similar_images(image: Image.Image):
    caption = generate_caption(image)

    text_emb = get_clip_text_embedding(caption)
    image_emb = get_clip_image_embedding(image)

    text_results = get_results_with_images(text_emb, text_index)
    image_results = get_results_with_images(image_emb, image_index)

    return caption, text_results, image_results

demo = gr.Interface(
    fn=search_similar_images,
    inputs=gr.Image(label="Загрузите изображение", type="pil"),
    outputs=[
        gr.Textbox(label="📜 Сгенерированное описание"),
        gr.Gallery(label="🔍 Похожие по описанию (CLIP)", height="auto", columns=2),
        gr.Gallery(label="🎨 Похожие по изображению (CLIP)", height="auto", columns=2)
    ],
    title="🎨 Semantic WikiArt Search (BLIP + CLIP)",
    description="Загрузите изображение. Модель BLIP сгенерирует описание, а CLIP найдёт похожие картины по тексту и изображению."
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.




In [None]:
import gradio as gr
from PIL import Image
import torch
import numpy as np
import faiss

from transformers import (
    BlipProcessor,
    BlipForConditionalGeneration,
    CLIPProcessor,
    CLIPModel
)
from datasets import load_dataset

wikiart_dataset = load_dataset("huggan/wikiart", split="train")
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device).eval()

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device).eval()
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

image_index = faiss.read_index("../create_index/image_index.faiss")
text_index = faiss.read_index("../create_index/text_index.faiss")

def generate_caption(image: Image.Image):
    inputs = blip_processor(image, return_tensors="pt").to(device)
    with torch.no_grad():
        caption_ids = blip_model.generate(**inputs)
    caption = blip_processor.decode(caption_ids[0], skip_special_tokens=True)
    return caption

def get_clip_text_embedding(text):
    inputs = clip_processor(text=[text], return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        features = clip_model.get_text_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_clip_image_embedding(image):
    inputs = clip_processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        features = clip_model.get_image_features(**inputs)
    features = features.cpu().numpy().astype("float32")
    faiss.normalize_L2(features)
    return features

def get_results_with_images(embedding, index, top_k=2):
    D, I = index.search(embedding, top_k)
    results = []
    for idx in I[0]:
        try:
            idx_int = int(idx)
            item = wikiart_dataset[idx_int]
            img = item["image"]
            caption = f"ID: {idx_int}"
            results.append((img, caption))
        except IndexError:
            continue
    return results

def search_similar_images(image: Image.Image):
    caption = generate_caption(image)

    text_emb = get_clip_text_embedding(caption)
    image_emb = get_clip_image_embedding(image)

    text_results = get_results_with_images(text_emb, text_index)
    image_results = get_results_with_images(image_emb, image_index)

    return caption, text_results, image_results

demo = gr.Interface(
    fn=search_similar_images,
    inputs=gr.Image(label="Загрузите изображение", type="pil"),
    outputs=[
        gr.Textbox(label="📜 Сгенерированное описание"),
        gr.Gallery(label="🔍 Похожие по описанию (CLIP)", height="auto", columns=2),
        gr.Gallery(label="🎨 Похожие по изображению (CLIP)", height="auto", columns=2)
    ],
    title="🎨 Semantic WikiArt Search (BLIP + CLIP)",
    description="Загрузите изображение. Модель BLIP сгенерирует описание, а CLIP найдёт похожие картины по тексту и изображению."
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.


