In [7]:
!pip install faiss-gpu scikit-image



In [8]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

Looking in indexes: https://download.pytorch.org/whl/cu124


In [9]:
import glob
import math
import os
import pickle
from concurrent.futures import ThreadPoolExecutor, as_completed

import shutil
import cv2
import faiss
import imagehash
import numpy as np
import torch
from PIL import Image
from PIL import ImageOps
from sklearn.preprocessing import MinMaxScaler
from torchvision import models, transforms

In [10]:
model = models.resnet50(pretrained=True)
model.eval()

preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
])


def neural_hash(img):
    input_tensor = preprocess(img)
    input_batch = input_tensor.unsqueeze(0)
    with torch.no_grad():
        output = model(input_batch)
    return output.numpy().flatten()



def calculate_hashes_and_features(image_path):
    try:
        # Открытие изображения и конвертация в RGB
        img = Image.open(image_path).convert('RGB')

        # Вычисление хешей
        phash = imagehash.phash(img, hash_size=8, highfreq_factor=4)
        whash = imagehash.whash(img, hash_size=8, mode='db2')
        colorhash = imagehash.colorhash(img, binbits=8)

        neural = neural_hash(img)

        # Отражение по горизонтали
        mirror_hash = imagehash.phash(ImageOps.mirror(img))

        flip_hash = imagehash.phash(ImageOps.flip(img))

        rotate90_hash = imagehash.phash(img.rotate(90, expand=True))

        # Поворот на 180 градусов
        rotate180_hash = imagehash.phash(img.rotate(180, expand=True))

        # Поворот на 270 градусов
        rotate270_hash = imagehash.phash(img.rotate(270, expand=True))

        # Негатив
        inverted_hash = imagehash.phash(ImageOps.invert(img))

        # Преобразование хешей в плоские массивы
        features = [
            # cr_hash,
            whash.hash.flatten(),
            phash.hash.flatten(),
            colorhash.hash.flatten(),
            mirror_hash.hash.flatten(),
            flip_hash.hash.flatten(),
            rotate90_hash.hash.flatten(),
            rotate180_hash.hash.flatten(),
            rotate270_hash.hash.flatten(),
            inverted_hash.hash.flatten(),
            neural
        ]

        return features
    except Exception as e:
        print(f"Ошибка при обработке {image_path}: {e}")
        return None


def process_image(image_path):
    h = calculate_hashes_and_features(image_path)
    if h:
        features = np.concatenate(h)
        return features, image_path
    return None


def process_directory(directory_path):
    image_paths = glob.glob(os.path.join(directory_path, '**', '*.jpg'), recursive=True)
    hashes = []
    paths = []
    count = 1
    max_threads = 40

    with ThreadPoolExecutor(max_workers=max_threads) as executor:
        # Создаем генератор задач
        future_to_image = {executor.submit(process_image, img_path): img_path for img_path in image_paths}

        for future in as_completed(future_to_image):
            result = future.result()
            if result:
                combined_hash, image_path = result
                hashes.append(combined_hash)
                paths.append(image_path)
                print(count, len(image_paths) / count, image_path)
                count += 1

    return np.array(hashes), paths


def create_faiss_index(data):
    dimension = data.shape[1]
    index = faiss.IndexFlatL2(dimension)  # Используем L2 расстояние
    index.add(data.astype('float32'))
    return index

def remove_dir(path):
    try:
        shutil.rmtree(path)
        print(f"Директория '{path}' и все её содержимое успешно удалены.")
    except Exception as e:
        print(f"Ошибка при удалении директории: {e}")

# Пример использования
if __name__ == "__main__":
    directory = '../dataset_new_test/'
    dataset, paths = process_directory(directory)

    # Инициализация скалера, если не передан
    scaler = MinMaxScaler()

    # Применение нормализации
    dataset = scaler.fit_transform(dataset)

    remove_dir('../main/current_faiss')
    os.mkdir('../main/current_faiss')

    with open('../main/current_faiss/minmax_scaler.pkl', 'wb') as file:
        pickle.dump(scaler, file)

    np.save('../main/current_faiss/image_dataset.npy', dataset)
    np.save('../main/current_faiss/path_dataset.npy', paths)
    print(f"Количество обработанных изображений: {len(dataset)}")

    # Создание индекса FAISS
    index = create_faiss_index(dataset)
    print("Индекс FAISS создан и обучен.")

    # Сохранение индекса и данных, если необходимо
    faiss.write_index(index, '../main/current_faiss/image_index.faiss')



1 7401.0 ../dataset_new_test/10c29e82-b992-4a56-a342-99b04044a303_54.166666666666664.jpg
2 3700.5 ../dataset_new_test/5317f402-8ef3-42c5-8133-cf559e32feab_0.03333333333333333.jpg
3 2467.0 ../dataset_new_test/0e1850d4-7eb0-4d0f-9d5b-a50628f4d0fb_1.1.jpg
4 1850.25 ../dataset_new_test/30c3ee7a-37ec-4975-8335-8324a81e2afd_11.133333333333333.jpg
5 1480.2 ../dataset_new_test/4fbbfca7-70c1-44dd-9ff8-71d4fb98df69_1.5333333333333334.jpg
6 1233.5 ../dataset_new_test/50de0457-2a74-4e7d-b670-a1b5552c0c52_10.3.jpg
7 1057.2857142857142 ../dataset_new_test/e866c207-3dec-494d-8dc5-5f7999e65d6b_2.7.jpg
8 925.125 ../dataset_new_test/7b55554b-80ae-4d00-b551-ad90b2d82dd2_49.2.jpg
9 822.3333333333334 ../dataset_new_test/366875f3-ec6b-4061-ad9c-f16a8d107aa7_37.43333333333333.jpg
10 740.1 ../dataset_new_test/cf02616a-b0d6-43fe-b532-1457a864d656_6.333333333333333.jpg
11 672.8181818181819 ../dataset_new_test/2b5fa967-ccab-4a13-8344-800f01fb3d67_46.833333333333336.jpg
12 616.75 ../dataset_new_test/27c17fe0-ea06