# Qdrant Vector Database Integration

Dieses Notebook demonstriert die Integration mit Qdrant für die Speicherung und Suche von Embeddings.

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, PointStruct
import numpy as np
import json
from pathlib import Path
import yaml
import uuid

# Konfiguration laden
with open('../config/qdrant_config.yaml', 'r') as f:
    qdrant_config = yaml.safe_load(f)

# Qdrant Client initialisieren
client = QdrantClient(
    host=qdrant_config['host'],
    port=qdrant_config['port']
)

COLLECTION_NAME = qdrant_config['collection_name']

## Qdrant Collection erstellen

In [None]:
def create_collection(embedding_size=512):
    """Erstellt eine neue Qdrant Collection"""
    try:
        client.recreate_collection(
            collection_name=COLLECTION_NAME,
            vectors_config=VectorParams(
                size=embedding_size,
                distance=Distance.COSINE
            )
        )
        print(f"Collection '{COLLECTION_NAME}' erfolgreich erstellt.")
    except Exception as e:
        print(f"Fehler beim Erstellen der Collection: {e}")

# Collection erstellen (auskommentiert für Demo)
# create_collection()

## Embeddings in Qdrant hochladen

In [None]:
def upload_embeddings_to_qdrant(embeddings_file='../data/processed/embeddings.json'):
    """Lädt Embeddings in Qdrant hoch"""
    with open(embeddings_file, 'r') as f:
        embeddings_data = json.load(f)
    
    points = []
    for i, data in enumerate(embeddings_data):
        point = PointStruct(
            id=str(uuid.uuid4()),
            vector=data['combined_embedding'],
            payload={
                'image_path': data['image_path'],
                'category': data['category'],
                'split': data['split'],
                'index': i
            }
        )
        points.append(point)
    
    # Batch-Upload
    try:
        client.upsert(
            collection_name=COLLECTION_NAME,
            points=points
        )
        print(f"{len(points)} Embeddings erfolgreich hochgeladen.")
    except Exception as e:
        print(f"Fehler beim Hochladen: {e}")

# Embeddings hochladen (auskommentiert für Demo)
# upload_embeddings_to_qdrant()

## Ähnlichkeitssuche durchführen

In [None]:
def search_similar_images(query_embedding, top_k=5):
    """Sucht ähnliche Bilder basierend auf Embedding"""
    try:
        search_result = client.search(
            collection_name=COLLECTION_NAME,
            query_vector=query_embedding,
            limit=top_k
        )
        
        results = []
        for hit in search_result:
            results.append({
                'score': hit.score,
                'image_path': hit.payload['image_path'],
                'category': hit.payload['category'],
                'split': hit.payload['split']
            })
        
        return results
    except Exception as e:
        print(f"Fehler bei der Suche: {e}")
        return []

# Beispiel für Ähnlichkeitssuche
def demo_similarity_search():
    """Demonstration der Ähnlichkeitssuche"""
    # Dummy-Embedding für Demo
    dummy_embedding = np.random.rand(512).tolist()
    
    results = search_similar_images(dummy_embedding, top_k=3)
    
    print("Top 3 ähnliche Bilder:")
    for i, result in enumerate(results, 1):
        print(f"{i}. {result['image_path']} (Score: {result['score']:.3f}, Kategorie: {result['category']})")

# Demo ausführen (auskommentiert)
# demo_similarity_search()

## Collection-Statistiken

In [None]:
def get_collection_info():
    """Zeigt Informationen über die Collection an"""
    try:
        info = client.get_collection(COLLECTION_NAME)
        print(f"Collection: {COLLECTION_NAME}")
        print(f"Anzahl Vektoren: {info.points_count}")
        print(f"Vektor-Dimension: {info.config.params.vectors.size}")
        print(f"Distanz-Metrik: {info.config.params.vectors.distance}")
    except Exception as e:
        print(f"Collection nicht gefunden oder Fehler: {e}")

# Collection-Info anzeigen (auskommentiert)
# get_collection_info()