## 1. Настройка среды и загрузка библиотек

In [None]:
import os
import glob
from PIL import Image
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

import torch
import torchvision.transforms as T
import torchvision.models as models
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Используется устройство: {DEVICE}")

## 2. Подготовка данных

In [None]:
IMAGE_DIR = "images_data" 

assert  os.path.exists(IMAGE_DIR)
    

image_files = sorted([os.path.basename(f) for f in glob.glob(os.path.join(IMAGE_DIR, "*.png"))])
print(f"Найдено {len(image_files)} изображений.")
if image_files:
    print(f"Примеры имен файлов: {image_files[:5]}")
else:
    print(f"Изображения не найдены в {IMAGE_DIR}. Проверьте путь и содержимое архива.")

## 3. Извлечение признаков

In [None]:
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
# Удаляем последний классификационный слой, чтобы получить признаки
model = torch.nn.Sequential(*(list(model.children())[:-1]))
model.eval()
model.to(DEVICE)

print("Модель ResNet50 загружена и готова к извлечению признаков.")


In [None]:
preprocess = T.Compose([
    T.Resize(256),
    T.CenterCrop(224),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
def get_embedding(image_path, model, preprocess_fn, device):
    try:
        img = Image.open(image_path).convert('RGB')
        img_t = preprocess_fn(img)
        batch_t = torch.unsqueeze(img_t, 0).to(device)

        with torch.no_grad():
            embedding = model(batch_t)

        embedding_np = embedding.squeeze().cpu().numpy()
        return embedding_np
    except Exception as e:
        print(f"Ошибка при обработке {image_path}: {e}")

        return np.zeros(2048)

In [None]:
embeddings = {}
if not image_files:
    print("Список файлов изображений пуст. Пропуск извлечения признаков.")
else:
    print("Начало извлечения признаков...")
    for filename in tqdm(image_files, desc="Извлечение признаков"):
        filepath = os.path.join(IMAGE_DIR, filename)
        embeddings[filename] = get_embedding(filepath, model, preprocess, DEVICE)
    print(f"Извлечено {len(embeddings)} эмбеддингов.")
    if embeddings:
        first_key = list(embeddings.keys())[0]
        print(f"Размерность эмбеддинга для {first_key}: {embeddings[first_key].shape}")


In [None]:
ordered_filenames = list(embeddings.keys())
all_embeddings_np = np.array([embeddings[fn] for fn in ordered_filenames])
similarity_matrix = cosine_similarity(all_embeddings_np)

In [None]:
K = 6 
results = []
for i, query_filename in tqdm(enumerate(ordered_filenames), total=len(ordered_filenames)):
    similarities = similarity_matrix[i]
    
    sorted_indices = np.argsort(similarities)[::-1]
    
    recommended_files = []
    for idx in sorted_indices:
        
        if ordered_filenames[idx] == query_filename:
            continue
        recommended_files.append(ordered_filenames[idx])
        if len(recommended_files) == K:
            break
    
    results.append({
        "filename": query_filename,
        "ranking": " ".join(recommended_files)
    })