In [2]:
import mlflow
# from faiss_service import VectorDBService
from routes import get_embedding_from_image
import numpy as np
import os
import time
from typing import List
from facenet_pytorch import MTCNN, InceptionResnetV1

# Устанавливаем URI для MLflow, чтобы использовать локальный сервер
mlflow.set_tracking_uri('http://127.0.0.1:5000')

# Устанавливаем название эксперимента
mlflow.set_experiment('Face_Recognition_Experiment 2')  # Название эксперимента

2025/04/04 00:05:20 INFO mlflow.tracking.fluent: Experiment with name 'Face_Recognition_Experiment 2' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/727893366955203987', creation_time=1743714320297, experiment_id='727893366955203987', last_update_time=1743714320297, lifecycle_stage='active', name='Face_Recognition_Experiment 2', tags={}>

In [3]:
mtcnn = MTCNN()
resnet = InceptionResnetV1(pretrained='casia-webface').eval()

In [4]:
def extract_embeddings_from_folder(folder_path: str) -> List[np.ndarray]:
    embeddings = []
    labels = []

    # Получаем список всех папок в folder_path
    subfolders = [f.path for f in os.scandir(folder_path) if f.is_dir()]

    # Ограничиваемся только первыми тремя папками
    subfolders = subfolders[:3]

    for folder in subfolders:
        # Получаем список файлов в папке и выбираем первый файл изображения
        image_files = [f for f in os.listdir(folder) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

        # Если папка содержит изображения, выбираем первое
        if image_files:
            image_path = os.path.join(folder, image_files[0])

            with open(image_path, 'rb') as f:
                image_bytes = f.read()
                embedding = get_embedding_from_image(image_bytes)

                # Используем имя папки как метку
                label = os.path.basename(folder)

                embeddings.append(embedding)
                labels.append(label)

    print('Закончили извлечение эмбеддингов')
    return np.array(embeddings), labels

In [5]:
import faiss
import numpy as np
import os
import logging
from typing import List

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


class VectorDBService:
    """
    Class for working with a FAISS database using cosine similarity.
    """

    def __init__(self, index_file_path: str = "faiss_index.index", dim: int = 512):
        """
        Initializes the FAISS database.

        :param index_file_path: Path to the file for saving the index.
        :param dim: Dimensionality of the embeddings.
        """
        logger.info(f"Index path: {index_file_path}")
        self.index_file_path = index_file_path
        self.dim = dim
        self.index = None
        self.load_index()

    def load_index(self):
        """
        Loads the index from a file if it exists.
        If the index file does not exist, a new index is created.
        """
        if os.path.exists(self.index_file_path):
            try:
                self.index = faiss.read_index(self.index_file_path)
                if self.index.metric_type != faiss.METRIC_INNER_PRODUCT:
                    logger.warning("Loaded index is not using inner product metric! Rebuilding...")
                    self.index = faiss.IndexFlatIP(self.dim)
                logger.info(f"Index loaded from {self.index_file_path}")
            except Exception as e:
                logger.error(f"Failed to load index: {str(e)}")
                self.index = faiss.IndexFlatIP(self.dim)
        else:
            self.index = faiss.IndexFlatIP(self.dim)

    def save_index(self):
        """
        Saves the index to a file.
        """
        try:
            faiss.write_index(self.index, self.index_file_path)
            logger.info(f"Index saved to {self.index_file_path}")
        except Exception as e:
            logger.error(f"Error saving index: {str(e)}")

    def add_embeddings(self, embeddings: np.ndarray):
        """
        Adds embeddings to FAISS.

        :param embeddings: Embedding vectors to be added.
        """
        try:
            embeddings = embeddings.astype(np.float32)
            faiss.normalize_L2(embeddings)  # Normalize embeddings
            self.index.add(embeddings)
            logger.info(f"Added {embeddings.shape[0]} embeddings to FAISS.")
            self.save_index()
        except Exception as e:
            logger.error(f"Error adding embeddings to FAISS: {str(e)}")

    def search_embedding(self, embedding: np.ndarray, k: int = 1, threshold: float = 0.6) -> List[int]:
        """
        Searches for the most similar embeddings in FAISS using cosine similarity.

        :param embedding: Embedding to search for.
        :param k: Number of nearest neighbors.
        :param threshold: Distance threshold (values closer to 0 are more similar).
        :return: List of indices of found embeddings that meet the threshold.
        """
        try:
            embedding = embedding.astype(np.float32).reshape(1, -1)
            faiss.normalize_L2(embedding)

            distances, indices = self.index.search(embedding, k)

            # Convert similarity score (inner product) to cosine distance (1 - similarity)
            cosine_distances = 1 - distances

            logger.info("Cosine Distances: %s, Indices: %s", cosine_distances, indices)

            # Filter by threshold (distances should be <= threshold for similarity)
            valid_indices = [idx for dist, idx in zip(cosine_distances[0], indices[0]) if dist <= threshold]

            if valid_indices:
                logger.info(f"Found {len(valid_indices)} nearest neighbors: {valid_indices}")
                return valid_indices
            else:
                logger.info("No embeddings found within the threshold.")
                return [-1]  # If no embedding passes the threshold, return -1

        except Exception as e:
            logger.error(f"Error searching in FAISS: {str(e)}")
            return []

    def remove_embedding_by_embedding(self, embedding: np.ndarray):
        """
        Removes an embedding from FAISS if it exists.

        :param embedding: Embedding vector to remove.
        """
        try:
            if self.index.ntotal == 0:
                logger.error("FAISS index is empty. Nothing to remove.")
                return

            embedding = embedding.astype(np.float32).reshape(1, -1)
            faiss.normalize_L2(embedding)

            _, indices = self.index.search(embedding, 1)  # Find the closest match
            idx = indices[0][0]

            if idx == -1:
                logger.error("Embedding not found in FAISS. No changes made.")
                return

            # Get all embeddings, remove the found one
            embeddings = self.index.reconstruct_n(0, self.index.ntotal)
            embeddings = np.delete(embeddings, idx, axis=0)

            # Rebuild index with remaining embeddings
            self.index = faiss.IndexFlatIP(self.dim)
            faiss.normalize_L2(embeddings)  # Normalize before re-adding
            self.index.add(embeddings)

            logger.info(f"Removed embedding at index {idx}.")
            self.save_index()

        except Exception as e:
            logger.error(f"Error removing embedding: {str(e)}")

In [6]:
data_folder = "Original Images"  # Папка с изображениями для тренировки

# Извлечение эмбеддингов из папки данных и добавление их в индекс
train_embeddings, labels = extract_embeddings_from_folder(data_folder)

Закончили извлечение эмбеддингов


In [7]:
labels

['Akshay Kumar', 'Alexandra Daddario', 'Alia Bhatt']

In [8]:
len(train_embeddings)

3

In [9]:
# Создание индекса FAISS
index_service = VectorDBService()
index_service.add_embeddings(train_embeddings)

INFO:__main__:Index path: faiss_index.index
INFO:__main__:Added 3 embeddings to FAISS.
INFO:__main__:Index saved to faiss_index.index


In [10]:
# Пороги для поиска
thresholds = [0.2, 0.5, 0.7]  # Пример пороговых значений

In [11]:
import gc
gc.collect()  # Сборщик мусора для освобождения памяти

1555

# baseline

In [50]:
import os
import time
import numpy as np
import mlflow
import matplotlib.pyplot as plt
from typing import List

def evaluate_accuracy_and_speed(query_folder: str, index_service, labels: List[str], thresholds: List[float], k=5):
    correct = {threshold: 0 for threshold in thresholds}
    total_queries = {threshold: 0 for threshold in thresholds}
    times = {threshold: [] for threshold in thresholds}
    accuracy_progress = {threshold: [] for threshold in thresholds}

    subfolders = [f.path for f in os.scandir(query_folder) if f.is_dir()][:3]
    
    with mlflow.start_run():
        for threshold in thresholds:
            with mlflow.start_run(nested=True):
                print(f'Оценка с порогом: {threshold}')
                mlflow.log_param("threshold", threshold)
                
                for folder in subfolders:
                    for image_file in os.listdir(folder):
                        image_path = os.path.join(folder, image_file)
                        
                        if image_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                            with open(image_path, 'rb') as f:
                                image_bytes = f.read()
                                query_embedding = get_embedding_from_image(image_bytes)

                                start_time = time.time()
                                found_indices = index_service.search_embedding(query_embedding, k=k, threshold=threshold)
                                elapsed_time = time.time() - start_time
                                times[threshold].append(elapsed_time)

                                found_labels = [labels[i] for i in found_indices]
                                if image_file.split('_')[0] in found_labels:
                                    correct[threshold] += 1
                                total_queries[threshold] += 1

                                accuracy_progress[threshold].append(correct[threshold])
                
                # Нормализация точности на общее количество итераций
                normalized_accuracy = [acc / total_queries[threshold] for acc in accuracy_progress[threshold]]
                
                # Логирование точности на каждой итерации
                for iteration in range(len(normalized_accuracy)):
                    mlflow.log_metric(f'accuracy_{threshold}', normalized_accuracy[iteration], step=iteration)
                
                # Построение и сохранение графика нормализованной точности
                plt.figure()
                plt.plot(normalized_accuracy, label=f'Threshold {threshold}')
                plt.xlabel("Iterations")
                plt.ylabel("Normalized Accuracy")
                plt.legend()
                plt.title(f"Normalized Accuracy Progress (Threshold {threshold})")
                plot_path = f"normalized_accuracy_progress_{threshold}.png"
                plt.savefig(plot_path)
                plt.close()
                mlflow.log_artifact(plot_path)

        accuracy = {threshold: correct[threshold] / total_queries[threshold] if total_queries[threshold] else 0 for threshold in thresholds}
        average_time = {threshold: np.mean(times[threshold]) for threshold in thresholds}
        
        for threshold in thresholds:
            mlflow.log_metric(f"accuracy_{threshold}_final", accuracy[threshold] * 100)
            mlflow.log_metric(f"average_search_time_{threshold}", average_time[threshold])

    return accuracy, average_time


In [51]:
evaluate_accuracy_and_speed(data_folder, index_service, labels, thresholds=thresholds, k=1)

Оценка с порогом: 0.2


INFO:__main__:Cosine Distances: [[-1.1920929e-07]], Indices: [[0]]
INFO:__main__:Found 1 nearest neighbors: [0]
INFO:__main__:Cosine Distances: [[0.42853034]], Indices: [[0]]
INFO:__main__:No embeddings found within the threshold.
INFO:__main__:Cosine Distances: [[0.46904683]], Indices: [[0]]
INFO:__main__:No embeddings found within the threshold.
INFO:__main__:Cosine Distances: [[0.4453746]], Indices: [[0]]
INFO:__main__:No embeddings found within the threshold.
INFO:__main__:Cosine Distances: [[0.6369537]], Indices: [[0]]
INFO:__main__:No embeddings found within the threshold.
INFO:__main__:Cosine Distances: [[0.5609802]], Indices: [[0]]
INFO:__main__:No embeddings found within the threshold.
INFO:__main__:Cosine Distances: [[0.4646327]], Indices: [[0]]
INFO:__main__:No embeddings found within the threshold.
INFO:__main__:Cosine Distances: [[0.5242552]], Indices: [[0]]
INFO:__main__:No embeddings found within the threshold.
INFO:__main__:Cosine Distances: [[0.43620944]], Indices: [[0

🏃 View run spiffy-elk-328 at: http://127.0.0.1:5000/#/experiments/968677774182020642/runs/c7a09ca2ba554689afeb8141dfd91e43
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/968677774182020642
Оценка с порогом: 0.5


INFO:__main__:Cosine Distances: [[-1.1920929e-07]], Indices: [[0]]
INFO:__main__:Found 1 nearest neighbors: [0]
INFO:__main__:Cosine Distances: [[0.42853034]], Indices: [[0]]
INFO:__main__:Found 1 nearest neighbors: [0]
INFO:__main__:Cosine Distances: [[0.46904683]], Indices: [[0]]
INFO:__main__:Found 1 nearest neighbors: [0]
INFO:__main__:Cosine Distances: [[0.4453746]], Indices: [[0]]
INFO:__main__:Found 1 nearest neighbors: [0]
INFO:__main__:Cosine Distances: [[0.6369537]], Indices: [[0]]
INFO:__main__:No embeddings found within the threshold.
INFO:__main__:Cosine Distances: [[0.5609802]], Indices: [[0]]
INFO:__main__:No embeddings found within the threshold.
INFO:__main__:Cosine Distances: [[0.4646327]], Indices: [[0]]
INFO:__main__:Found 1 nearest neighbors: [0]
INFO:__main__:Cosine Distances: [[0.5242552]], Indices: [[0]]
INFO:__main__:No embeddings found within the threshold.
INFO:__main__:Cosine Distances: [[0.43620944]], Indices: [[0]]
INFO:__main__:Found 1 nearest neighbors: 

🏃 View run luxuriant-chimp-41 at: http://127.0.0.1:5000/#/experiments/968677774182020642/runs/cbbb560552a14d48b825139383c11e5c
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/968677774182020642
Оценка с порогом: 0.7


INFO:__main__:Cosine Distances: [[-1.1920929e-07]], Indices: [[0]]
INFO:__main__:Found 1 nearest neighbors: [0]
INFO:__main__:Cosine Distances: [[0.42853034]], Indices: [[0]]
INFO:__main__:Found 1 nearest neighbors: [0]
INFO:__main__:Cosine Distances: [[0.46904683]], Indices: [[0]]
INFO:__main__:Found 1 nearest neighbors: [0]
INFO:__main__:Cosine Distances: [[0.4453746]], Indices: [[0]]
INFO:__main__:Found 1 nearest neighbors: [0]
INFO:__main__:Cosine Distances: [[0.6369537]], Indices: [[0]]
INFO:__main__:Found 1 nearest neighbors: [0]
INFO:__main__:Cosine Distances: [[0.5609802]], Indices: [[0]]
INFO:__main__:Found 1 nearest neighbors: [0]
INFO:__main__:Cosine Distances: [[0.4646327]], Indices: [[0]]
INFO:__main__:Found 1 nearest neighbors: [0]
INFO:__main__:Cosine Distances: [[0.5242552]], Indices: [[0]]
INFO:__main__:Found 1 nearest neighbors: [0]
INFO:__main__:Cosine Distances: [[0.43620944]], Indices: [[0]]
INFO:__main__:Found 1 nearest neighbors: [0]
INFO:__main__:Cosine Distance

🏃 View run resilient-frog-188 at: http://127.0.0.1:5000/#/experiments/968677774182020642/runs/dec439eb4c2d421cbd25be7b95d6fb5e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/968677774182020642
🏃 View run overjoyed-chimp-472 at: http://127.0.0.1:5000/#/experiments/968677774182020642/runs/2982e7a87ee048228b4d2b6f2c3634f0
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/968677774182020642


({0.2: 0.37104072398190047, 0.5: 0.7782805429864253, 0.7: 0.9864253393665159},
 {0.2: 0.0020625170539407167,
  0.5: 0.002133338160104881,
  0.7: 0.0023433644307684575})

# shuffle

In [12]:
import os
import shutil
import random

In [15]:
source_folder = 'Original Images'  # Путь к вашей папке с подпапками
shuffled_folder = 'Shuffled'    # Путь для новой папки перемешанных изображений

if not os.path.exists(shuffled_folder):
        os.makedirs(shuffled_folder)

# Получаем список всех подпапок (папок с лицами)
subfolders = [f.path for f in os.scandir(source_folder) if f.is_dir()][:3]

# Список для хранения изображений
image_files = []

# Проходим по всем подпапкам и собираем все изображения
for subfolder in subfolders:
    for image_file in os.listdir(subfolder):
        if image_file.lower().endswith(('.jpg', '.jpeg', '.png')):
            # Сохраняем путь к изображению и его имя
            image_files.append(os.path.join(subfolder, image_file))

# Перемешиваем список изображений
random.shuffle(image_files)

In [16]:
for index, image_path in enumerate(image_files):
    # Получаем имя файла из пути
    image_name = os.path.basename(image_path)
    # Строим новый путь для файла в целевой папке
    target_path = os.path.join(shuffled_folder, f"{index + 1}_{image_name}")  # Добавим индекс для уникальности
    # Копируем файл
    shutil.copy(image_path, target_path)

print(f'Все изображения успешно перемешаны и скопированы в {shuffled_folder}')


Все изображения успешно перемешаны и скопированы в Shuffled


In [89]:
import os
import time
import numpy as np
import mlflow
import matplotlib.pyplot as plt
from typing import List

def evaluate_accuracy_and_speed(query_folder: str, index_service, labels: List[str], thresholds: List[float], k=5):
    correct = {threshold: 0 for threshold in thresholds}
    total_queries = {threshold: 0 for threshold in thresholds}
    times = {threshold: [] for threshold in thresholds}
    accuracy_progress = {threshold: [] for threshold in thresholds}
    labels_n = [i for i in enumerate(labels)]
    with mlflow.start_run():
        for threshold in thresholds:
            with mlflow.start_run(nested=True):
                print(f'Оценка с порогом: {threshold}')
                mlflow.log_param("threshold", threshold)
                for image in os.listdir(query_folder):
                    image_file = os.path.join(shuffled_folder, image)
                    with open(image_path, 'rb') as f:
                        image_bytes = f.read()
                        query_embedding = get_embedding_from_image(image_bytes)

                        start_time = time.time()
                        found_indices = index_service.search_embedding(query_embedding, k=k, threshold=threshold)
                        elapsed_time = time.time() - start_time
                        times[threshold].append(elapsed_time)

                        if labels_n[found_indices[0]][1] in image_file:
                            correct[threshold] += 1
                        total_queries[threshold] += 1

                        accuracy_progress[threshold].append(correct[threshold])
                
                # Нормализация точности на общее количество итераций
                normalized_accuracy = [acc / total_queries[threshold] for acc in accuracy_progress[threshold]]
                
                # Логирование точности на каждой итерации
                for iteration in range(len(normalized_accuracy)):
                    mlflow.log_metric(f'accuracy', normalized_accuracy[iteration], step=iteration)
                
                # Построение и сохранение графика нормализованной точности
                plt.figure()
                plt.plot(normalized_accuracy, label=f'Threshold {threshold}')
                plt.xlabel("Iterations")
                plt.ylabel("Normalized Accuracy")
                plt.legend()
                plt.title(f"Normalized Accuracy Progress (Threshold {threshold})")
                plot_path = f"normalized_accuracy_progress_{threshold}.png"
                plt.savefig(plot_path)
                plt.close()
                mlflow.log_artifact(plot_path)

        accuracy = {threshold: correct[threshold] / total_queries[threshold] if total_queries[threshold] else 0 for threshold in thresholds}
        average_time = {threshold: np.mean(times[threshold]) for threshold in thresholds}
        
        for threshold in thresholds:
            mlflow.log_metric(f"accuracy_{threshold}_final", accuracy[threshold] * 100)
            mlflow.log_metric(f"average_search_time_{threshold}", average_time[threshold])

    return accuracy, average_time

In [None]:
evaluate_accuracy_and_speed(shuffled_folder, index_service, labels, thresholds=thresholds, k=1)

In [110]:
import os
import time
import mlflow
import matplotlib.pyplot as plt

# Примерная структура, функции get_embedding_from_image, index_service.search_embedding и т. д. предполагаются определенными где-то еще.

def run_query_experiment(query_folder, thresholds, labels, get_embedding_from_image, index_service):
    # Словари для хранения результатов
    correct = {threshold: 0 for threshold in thresholds}
    total_queries = {threshold: 0 for threshold in thresholds}
    times = {threshold: [] for threshold in thresholds}
    accuracy_progress = {threshold: [] for threshold in thresholds}
    
    labels_n = [i for i in enumerate(labels)]

    # Запуск эксперимента в MLflow
    with mlflow.start_run():
        mlflow.log_param("query_folder", query_folder)
        mlflow.log_param("thresholds", thresholds)

        # Для каждого порога точности
        for threshold in thresholds:
            mlflow.start_run(nested=True)  # Вложенный эксперимент для каждого порога
            mlflow.log_param("threshold", threshold)

            # Инициализация переменных для накопленной точности
            accumulated_correct = 0
            accumulated_total = 0

            # Для каждого изображения в папке query_folder
            for image in os.listdir(query_folder):
                image_file = os.path.join(query_folder, image)
                with open(image_file, 'rb') as f:
                    image_bytes = f.read()
                    query_embedding = get_embedding_from_image(image_bytes)

                    start_time = time.time()
                    found_indices = index_service.search_embedding(query_embedding, k=1, threshold=threshold)
                    elapsed_time = time.time() - start_time

                    # Сохраняем время
                    times[threshold].append(elapsed_time)
                    
                    # Проверка правильности
                    if labels_n[found_indices[0]][1] in image_file:
                        accumulated_correct += 1
                    accumulated_total += 1

                    # Накопленная точность
                    accumulated_accuracy = accumulated_correct / accumulated_total
                    accuracy_progress[threshold].append(accumulated_accuracy)

            # Нормализованная точность (весь список, а не одно значение на итерацию)
            normalized_accuracy = [acc / accumulated_total for acc in accuracy_progress[threshold]]

            # Логируем список нормализованной точности
            for iteration in range(len(normalized_accuracy)):
                if iteration == 0:
                    acc = normalized_accuracy[iteration]
                else:
                    acc = normalized_accuracy[iteration] + normalized_accuracy[iteration-1]
                    normalized_accuracy[iteration] = acc
                mlflow.log_metric(f'accuracy', acc, step=iteration)

            # Логируем график точности
            plt.figure()
            plt.plot(normalized_accuracy)
            plt.title(f"Normalized Accuracy Progress (Threshold {threshold})")
            plt.xlabel('Iterations')
            plt.ylabel('Normalized Accuracy')
            plt.savefig(f"accuracy_progress_{threshold}.png")
            mlflow.log_artifact(f"accuracy_progress_{threshold}.png")
            plt.close()

            mlflow.end_run()  # Закрытие вложенного эксперимента

        # Логируем данные по времени
        for threshold, elapsed_times in times.items():
            mlflow.log_metric(f"time_{threshold}", sum(elapsed_times) / len(elapsed_times))

    return correct, total_queries, times, accuracy_progress

In [113]:
result = run_query_experiment(query_folder='Shuffled', thresholds=[0.2, 0.5, 0.7], labels=labels, get_embedding_from_image=get_embedding_from_image, index_service=index_service)

INFO:__main__:Cosine Distances: [[0.2890327]], Indices: [[1]]
INFO:__main__:No embeddings found within the threshold.
INFO:__main__:Cosine Distances: [[0.43563533]], Indices: [[1]]
INFO:__main__:No embeddings found within the threshold.
INFO:__main__:Cosine Distances: [[0.37183]], Indices: [[2]]
INFO:__main__:No embeddings found within the threshold.
INFO:__main__:Cosine Distances: [[0.44898403]], Indices: [[0]]
INFO:__main__:No embeddings found within the threshold.
INFO:__main__:Cosine Distances: [[0.35572666]], Indices: [[2]]
INFO:__main__:No embeddings found within the threshold.
INFO:__main__:Cosine Distances: [[0.45631003]], Indices: [[1]]
INFO:__main__:No embeddings found within the threshold.
INFO:__main__:Cosine Distances: [[0.33735454]], Indices: [[2]]
INFO:__main__:No embeddings found within the threshold.
INFO:__main__:Cosine Distances: [[0.4313804]], Indices: [[1]]
INFO:__main__:No embeddings found within the threshold.
INFO:__main__:Cosine Distances: [[0.409549]], Indices

🏃 View run fearless-auk-530 at: http://127.0.0.1:5000/#/experiments/727893366955203987/runs/3cd33f0ea8924bf287928b3a99e5e703
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/727893366955203987


INFO:__main__:Cosine Distances: [[0.2890327]], Indices: [[1]]
INFO:__main__:Found 1 nearest neighbors: [1]
INFO:__main__:Cosine Distances: [[0.43563533]], Indices: [[1]]
INFO:__main__:Found 1 nearest neighbors: [1]
INFO:__main__:Cosine Distances: [[0.37183]], Indices: [[2]]
INFO:__main__:Found 1 nearest neighbors: [2]
INFO:__main__:Cosine Distances: [[0.44898403]], Indices: [[0]]
INFO:__main__:Found 1 nearest neighbors: [0]
INFO:__main__:Cosine Distances: [[0.35572666]], Indices: [[2]]
INFO:__main__:Found 1 nearest neighbors: [2]
INFO:__main__:Cosine Distances: [[0.45631003]], Indices: [[1]]
INFO:__main__:Found 1 nearest neighbors: [1]
INFO:__main__:Cosine Distances: [[0.33735454]], Indices: [[2]]
INFO:__main__:Found 1 nearest neighbors: [2]
INFO:__main__:Cosine Distances: [[0.4313804]], Indices: [[1]]
INFO:__main__:Found 1 nearest neighbors: [1]
INFO:__main__:Cosine Distances: [[0.409549]], Indices: [[2]]
INFO:__main__:Found 1 nearest neighbors: [2]
INFO:__main__:Cosine Distances: [[0

🏃 View run ambitious-wolf-990 at: http://127.0.0.1:5000/#/experiments/727893366955203987/runs/f04b1041bb7f4f4ab6291c0bd695b381
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/727893366955203987


INFO:__main__:Cosine Distances: [[0.2890327]], Indices: [[1]]
INFO:__main__:Found 1 nearest neighbors: [1]
INFO:__main__:Cosine Distances: [[0.43563533]], Indices: [[1]]
INFO:__main__:Found 1 nearest neighbors: [1]
INFO:__main__:Cosine Distances: [[0.37183]], Indices: [[2]]
INFO:__main__:Found 1 nearest neighbors: [2]
INFO:__main__:Cosine Distances: [[0.44898403]], Indices: [[0]]
INFO:__main__:Found 1 nearest neighbors: [0]
INFO:__main__:Cosine Distances: [[0.35572666]], Indices: [[2]]
INFO:__main__:Found 1 nearest neighbors: [2]
INFO:__main__:Cosine Distances: [[0.45631003]], Indices: [[1]]
INFO:__main__:Found 1 nearest neighbors: [1]
INFO:__main__:Cosine Distances: [[0.33735454]], Indices: [[2]]
INFO:__main__:Found 1 nearest neighbors: [2]
INFO:__main__:Cosine Distances: [[0.4313804]], Indices: [[1]]
INFO:__main__:Found 1 nearest neighbors: [1]
INFO:__main__:Cosine Distances: [[0.409549]], Indices: [[2]]
INFO:__main__:Found 1 nearest neighbors: [2]
INFO:__main__:Cosine Distances: [[0

🏃 View run gifted-crab-926 at: http://127.0.0.1:5000/#/experiments/727893366955203987/runs/957d901ef79f48b6a6840fbf416582c5
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/727893366955203987
🏃 View run monumental-snail-668 at: http://127.0.0.1:5000/#/experiments/727893366955203987/runs/62b1886959244d40b26d4e61d277ffb8
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/727893366955203987


In [112]:
mlflow.end_run()

🏃 View run secretive-squid-254 at: http://127.0.0.1:5000/#/experiments/727893366955203987/runs/5771295aea2242688f060d225763e8b0
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/727893366955203987


RestException: INVALID_PARAMETER_VALUE: The run 5771295aea2242688f060d225763e8b0 must be in 'active' lifecycle_stage.

In [None]:
query_folder = 'Shuffled'
threshold = 0.5
correct = {threshold: 0 for threshold in thresholds}
total_queries = {threshold: 0 for threshold in thresholds}
times = {threshold: [] for threshold in thresholds}
accuracy_progress = {threshold: [] for threshold in thresholds}
labels_n = [i for i in enumerate(labels)]
for image in os.listdir(query_folder):
    image_file = os.path.join(shuffled_folder, image)
    with open(image_file, 'rb') as f:
        image_bytes = f.read()
        query_embedding = get_embedding_from_image(image_bytes)

        start_time = time.time()
        found_indices = index_service.search_embedding(query_embedding, k=1, threshold=0.2)
        elapsed_time = time.time() - start_time
        times[threshold].append(elapsed_time)
        if labels_n[found_indices[0]][1] in image_file:
            correct[threshold] += 1
        total_queries[threshold] += 1

INFO:__main__:Cosine Distances: [[0.2890327]], Indices: [[1]]
INFO:__main__:Found 1 nearest neighbors: [1]
INFO:__main__:Cosine Distances: [[0.43563533]], Indices: [[1]]
INFO:__main__:Found 1 nearest neighbors: [1]
INFO:__main__:Cosine Distances: [[0.37183]], Indices: [[2]]
INFO:__main__:Found 1 nearest neighbors: [2]
INFO:__main__:Cosine Distances: [[0.44898403]], Indices: [[0]]
INFO:__main__:Found 1 nearest neighbors: [0]
INFO:__main__:Cosine Distances: [[0.35572666]], Indices: [[2]]
INFO:__main__:Found 1 nearest neighbors: [2]
INFO:__main__:Cosine Distances: [[0.45631003]], Indices: [[1]]
INFO:__main__:Found 1 nearest neighbors: [1]
INFO:__main__:Cosine Distances: [[0.33735454]], Indices: [[2]]
INFO:__main__:Found 1 nearest neighbors: [2]
INFO:__main__:Cosine Distances: [[0.4313804]], Indices: [[1]]
INFO:__main__:Found 1 nearest neighbors: [1]
INFO:__main__:Cosine Distances: [[0.409549]], Indices: [[2]]
INFO:__main__:Found 1 nearest neighbors: [2]
INFO:__main__:Cosine Distances: [[0