# Инференс

In [14]:
import os
import json
import numpy as np
import pandas as pd
from pathlib import Path
from PIL import Image
from transformers import pipeline
from ultralytics import YOLOWorld
import torch
from IPython.display import clear_output
import torch.nn.functional as F

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = YOLOWorld("yolov8l-worldv2.pt").to(device)
image_classifier = pipeline(
    task="zero-shot-image-classification", model="google/siglip-base-patch16-224"
)

DATA_CSV = pd.read_csv("/probnik/purple/metadata/test_data.csv")
DATA_DIR = "/probnik/purple/test_data"

categories = {
    "столы": ["table", "desk", "dining table", "coffee table"],
    "стулья": ["chair", "armchair", "dining chair", "office chair"],
    "сумки": ["suitcase", "backpack", "bag", "handbag", "wallet"],
    "одежда для девочек": [
        "kids clothes(female)",
        "female clothes",
        "woman clothes",
        "clothes",
    ],
}

category2english = {
    "столы": "table",
    "стулья": "chair",
    "сумки": "bags",
    "одежда для девочек": "girls clothes",
}

colors = [
    "white",
    "black",
    "blue",
    "brown",
    "burgundy",
    "gold",
    "gray",
    "green",
    "light blue",
    "multicolored",
    "orange",
    "pink",
    "purple",
    "red",
    "silver",
    "turquoise",
    "yellow",
]

index_classname_english = {
    "chernyi": "black",
    "belyi": "white",
    "korichnevyi": "brown",
    "raznocvetnyi": "multicolored",
    "bezhevyi": "beige",
    "sinii": "blue",
    "rozovyi": "pink",
    "seryi": "gray",
    "zelenyi": "green",
    "krasnyi": "red",
    "goluboi": "light blue",
    "fioletovyi": "purple",
    "bordovyi": "burgundy",
    "zheltyi": "yellow",
    "oranzhevyi": "orange",
    "serebristyi": "silver",
    "zolotoi": "gold",
    "biryuzovyi": "turquoise",
}

english_to_russian = {
    "black": "chernyi",
    "white": "belyi",
    "brown": "korichnevyi",
    "multicolored": "raznocvetnyi",
    "beige": "bezhevyi",
    "blue": "sinii",
    "pink": "rozovyi",
    "gray": "seryi",
    "green": "zelenyi",
    "red": "krasnyi",
    "light blue": "goluboi",
    "purple": "fioletovyi",
    "burgundy": "bordovyi",
    "yellow": "zheltyi",
    "orange": "oranzhevyi",
    "silver": "serebristyi",
    "gold": "zolotoi",
    "turquoise": "biryuzovyi",
}


def build_label_to_color(colors, category, category2english):
    return {f"{color} {category2english[category]}": color for color in colors}


results_data = []

for filename in os.listdir(DATA_DIR):
    img_path = os.path.join(DATA_DIR, filename)
    img_name = Path(img_path).stem

    category = DATA_CSV.loc[DATA_CSV["id"] == int(img_name), "category"].item()

    model.set_classes(categories[category])
    results = model.predict(img_path)

    image = Image.open(img_path)

    if len(results[0].boxes.data) > 0:
        coords = results[0].boxes.data[0].cpu().numpy()
        x_min, y_min, x_max, y_max = map(int, coords[:4])
        cropped = image.crop((x_min, y_min, x_max, y_max))
    else:
        cropped = image

    candidate_labels = [f"{color} {category2english[category]}" for color in colors]
    label_to_color = build_label_to_color(colors, category, category2english)

    outputs = image_classifier(cropped, candidate_labels=candidate_labels)

    scores = np.array([item["score"] for item in outputs])
    logits = torch.tensor(scores)
    normalized_logits = logits / torch.max(logits)
    probabilities = F.softmax(normalized_logits, dim=0).numpy()

    results_with_probs = []
    for item, prob in zip(outputs, probabilities):
        new_item = item.copy()
        new_item["prob"] = float(prob)
        results_with_probs.append(new_item)

    proba_dict = {
        label_to_color.get(item["label"], item["label"]): item["prob"]
        for item in results_with_probs
    }
    proba_dict_russian = {
        english_to_russian.get(color, color): prob for color, prob in proba_dict.items()
    }

    max_item = max(results_with_probs, key=lambda x: x["prob"])
    predicted_color = label_to_color.get(max_item["label"], max_item["label"])
    predicted_color_russian = english_to_russian[predicted_color]

    row = {
        "id": img_name,
        "category": category,
        "predict_proba": json.dumps(proba_dict_russian, ensure_ascii=False),
        "predict_color": predicted_color_russian,
    }
    results_data.append(row)
clear_output()
df = pd.DataFrame(
    results_data, columns=["id", "category", "predict_proba", "predict_color"]
)
df.to_csv("submission.csv", index=False)

# Ноут для подгрузки одного фото. Подразумевается, что фото будет подаваться вместе с категорией. 

In [15]:
import numpy as np
from pathlib import Path
from PIL import Image
from transformers import pipeline
from ultralytics import YOLOWorld
import torch
import time

IMAGE_PATH = "/probnik/purple/test_data/36508368250.jpg"
USER_CATEGORY = "столы"

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = YOLOWorld("yolov8l-worldv2.pt").to(device)
image_classifier = pipeline(
    task="zero-shot-image-classification", model="google/siglip-base-patch16-224"
)

categories = {
    "столы": ["table", "desk", "dining table", "coffee table"],
    "стулья": ["chair", "armchair", "dining chair", "office chair"],
    "сумки": ["suitcase", "backpack", "bag", "handbag", "wallet"],
    "одежда для девочек": [
        "kids clothes(female)",
        "female clothes",
        "woman clothes",
        "clothes",
    ],
}

category2english = {
    "столы": "table",
    "стулья": "chair",
    "сумки": "bags",
    "одежда для девочек": "girls clothes",
}

colors = [
    "white",
    "black",
    "blue",
    "brown",
    "burgundy",
    "gold",
    "gray",
    "green",
    "light blue",
    "multicolored",
    "orange",
    "pink",
    "purple",
    "red",
    "silver",
    "turquoise",
    "yellow",
]

index_classname_english = {
    "chernyi": "black",
    "belyi": "white",
    "korichnevyi": "brown",
    "raznocvetnyi": "multicolored",
    "bezhevyi": "beige",
    "sinii": "blue",
    "rozovyi": "pink",
    "seryi": "gray",
    "zelenyi": "green",
    "krasnyi": "red",
    "goluboi": "light blue",
    "fioletovyi": "purple",
    "bordovyi": "burgundy",
    "zheltyi": "yellow",
    "oranzhevyi": "orange",
    "serebristyi": "silver",
    "zolotoi": "gold",
    "biryuzovyi": "turquoise",
}

english_to_russian = {
    "black": "chernyi",
    "white": "belyi",
    "brown": "korichnevyi",
    "multicolored": "raznocvetnyi",
    "beige": "bezhevyi",
    "blue": "sinii",
    "pink": "rozovyi",
    "gray": "seryi",
    "green": "zelenyi",
    "red": "krasnyi",
    "light blue": "goluboi",
    "purple": "fioletovyi",
    "burgundy": "bordovyi",
    "yellow": "zheltyi",
    "orange": "oranzhevyi",
    "silver": "serebristyi",
    "gold": "zolotoi",
    "turquoise": "biryuzovyi",
}


def build_label_to_color(colors, category, category2english):
    return {f"{color} {category2english[category]}": color for color in colors}


if USER_CATEGORY not in categories:
    raise ValueError(
        f"Категория {USER_CATEGORY} не поддерживается. Используйте одну из: {list(categories.keys())}"
    )
model.set_classes(categories[USER_CATEGORY])
results = model.predict(IMAGE_PATH)

image = Image.open(IMAGE_PATH)

if len(results[0].boxes.data) > 0:
    coords = results[0].boxes.data[0].cpu().numpy()
    x_min, y_min, x_max, y_max = map(int, coords[:4])
    cropped = image.crop((x_min, y_min, x_max, y_max))
else:
    cropped = image

candidate_labels = [f"{color} {category2english[USER_CATEGORY]}" for color in colors]
label_to_color = build_label_to_color(colors, USER_CATEGORY, category2english)

outputs = image_classifier(cropped, candidate_labels=candidate_labels)
scores = np.array([item["score"] for item in outputs])
logits = torch.tensor(scores)
normalized_logits = logits / torch.max(logits)
probabilities = F.softmax(normalized_logits, dim=0).numpy()
results_with_probs = []
for item, prob in zip(outputs, probabilities):
    new_item = item.copy()
    new_item["prob"] = float(prob)
    results_with_probs.append(new_item)

proba_dict = {
    label_to_color.get(item["label"], item["label"]): item["prob"]
    for item in results_with_probs
}
proba_dict_russian = {
    english_to_russian.get(color, color): prob for color, prob in proba_dict.items()
}

max_item = max(results_with_probs, key=lambda x: x["prob"])
predicted_color = label_to_color.get(max_item["label"], max_item["label"])
predicted_color_russian = english_to_russian[predicted_color]

top5 = sorted(proba_dict_russian.items(), key=lambda x: x[1], reverse=True)[:5]
clear_output()
print("Предсказанный цвет:", predicted_color_russian)
print("Топ-5 цветов по вероятности:")
for color, prob in top5:
    print(f"{color}: {prob:.4f}")

Предсказанный цвет: korichnevyi
Топ-5 цветов по вероятности:
korichnevyi: 0.1371
zheltyi: 0.0682
zelenyi: 0.0595
raznocvetnyi: 0.0571
oranzhevyi: 0.0547
