In [1]:
from enums import CollectionNames, EmbeddingModels
from qdrant_client.models import Filter, FieldCondition, MatchValue

from retrieval_util import (
    get_qdrant_client,
    get_embeddings_function,
    retrieve_with_score_without_filters,
    retrieve_with_score_with_filters,
    retrieve_with_score_with_self_query_retriever,
    create_llm_for_self_query_retriever
)

import os
import json
import logging
from ranx import Qrels, Run, evaluate, compare

os.environ[
    "TOKENIZERS_PARALLELISM"] = "false"  # Verhindert Fehler beim Erzeugen von Embeddings (Tritt sporadisch auf, konnte ich nicht gezielt reproduzieren)
logging.basicConfig(level=logging.INFO)

In [2]:
qdrant_url = "http://localhost:6333"

In [3]:
# Infloat-Modell für die Embeddings der Fragen
embeddings = get_embeddings_function(model_name=EmbeddingModels.INFLOAT.value)

  from .autonotebook import tqdm as notebook_tqdm
INFO:datasets:PyTorch version 2.6.0 available.
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: mps
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: intfloat/multilingual-e5-large


In [4]:
token_client = get_qdrant_client(embeddings=embeddings, collection_name=CollectionNames.INFLOAT_TOKEN_BASED.value,
                                 qdrant_url=qdrant_url)

INFO:httpx:HTTP Request: GET http://localhost:6333 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:6333/collections/intfloat_multilingual-e5-large_token_based_chunks "HTTP/1.1 200 OK"


In [5]:
recursive_client = get_qdrant_client(embeddings=embeddings, collection_name=CollectionNames.INFLOAT_RECURSIVE.value,
                                     qdrant_url=qdrant_url)

INFO:httpx:HTTP Request: GET http://localhost:6333 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:6333/collections/intfloat_multilingual-e5-large_recursive_chunks "HTTP/1.1 200 OK"


In [6]:
fragen = {
    1: "Wann kann ich mich für den Master in Elektrotechnik in Meschede einschreiben?",
    2: "Welche Literatur ist relevant für das Modul Mathematik im Bachelorstudiengang Elektrotechnik in Soest?",
    3: "Welche Fachgebiete sind an der FH vertreten?",
    4: "Ist eine Beurlaubung während des Studiums möglich?",
    5: "Welche Studienmodelle werden angeboten?",
    6: "Wie viele Seiten muss ich in meiner Bachelorarbeit in Wirtschaftsinformatik in Hagen schreiben?",
    7: "Wie sind Portfolioprüfungen im Bachelorstudiengang Elektrotechnik in Soest aufgebaut?",
    8: "Was muss ich beachten, wenn ich bei einer Prüfung krank bin?",
    9: "Was sind die Inhalte im Modul IT-Sicherheit im Bachelorstudiengang Elektrotechnik in Hagen?",
    10: "Wie lange dauern die Klausuren im Bachelorstudiengang Wirtschaftsinformatik in Hagen?"
}

In [7]:
def save_results_to_file(file_path, results_dict):
    try:
        with open(file_path, "w") as file:
            json.dump(results_dict, file, indent=4)
        print(f"Ergebnisse wurden erfolgreich in {file_path} gespeichert.")
    except Exception as e:
        print(f"Fehler beim Speichern der Datei: {e}")

# Abfragen der Dokumente ohne einen Filter zu nutzen

Beginnt mit der Collection, welche mit dem infloat-Modell und dem SentenceTransformersTokenTextSplitter (Chunk-Größe=500Token, Overlap=50Token) erstellt wurde.

In [64]:
def retrieve_all_scores_no_filters(client, fragen_dict):
    ergebnisse = {}

    for q_id, frage in fragen_dict.items():
        docs = retrieve_with_score_without_filters(client=client, max_documents=10, query=frage)

        # Dictionary für die Dokumente mit ihren Scores
        doc_scores = {f"d_{doc.metadata["id"]}": score for doc, score in docs}

        # Speichern im Hauptdictionary unter q_x
        ergebnisse[f"q_{q_id}"] = doc_scores

    return ergebnisse

In [9]:
run_without_filters = retrieve_all_scores_no_filters(client=token_client, fragen_dict=fragen)

INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_token_based_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_token_based_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_token_based_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_token_based_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_token_based_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_token_based_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_token_based_chunks/points/query "HT

In [41]:
print(run_without_filters)
print(json.dumps(run_without_filters, indent=4))
run_without_filters_path = "./token-based-chunks/run_no_filters.json"
save_results_to_file(run_without_filters_path, run_without_filters)

{'q_1': {'d_1021': 0.8931459, 'd_1020': 0.8903258, 'd_787': 0.8899049, 'd_788': 0.88495636, 'd_1024': 0.8829783, 'd_793': 0.8752167, 'd_1023': 0.8701733, 'd_1026': 0.8666712, 'd_1055': 0.86644125, 'd_789': 0.86356777}, 'q_2': {'d_1557': 0.8314544, 'd_1487': 0.8202493, 'd_1556': 0.81963754, 'd_2': 0.80611885, 'd_1558': 0.80551267, 'd_540': 0.7993765, 'd_542': 0.7968593, 'd_6': 0.79636025, 'd_543': 0.7953263, 'd_539': 0.79418266}, 'q_3': {'d_240': 0.85005033, 'd_1025': 0.8214303, 'd_1481': 0.82099384, 'd_265': 0.8202428, 'd_1183': 0.8172393, 'd_793': 0.8165607, 'd_1561': 0.8115437, 'd_61': 0.8095254, 'd_563': 0.80919445, 'd_31': 0.80904526}, 'q_4': {'d_1499': 0.8608073, 'd_1500': 0.8334881, 'd_1555': 0.8237432, 'd_1183': 0.82201755, 'd_1554': 0.82075715, 'd_240': 0.81968564, 'd_1498': 0.8190165, 'd_1502': 0.8187066, 'd_1025': 0.81730103, 'd_1553': 0.8160151}, 'q_5': {'d_1482': 0.85370076, 'd_1486': 0.8361856, 'd_1480': 0.83425295, 'd_1483': 0.8339882, 'd_1431': 0.81845486, 'd_268': 0.815

In [11]:
qrels_dict = {
    "q_1": {
        "d_1021": 10,
        "d_1022": 1
    },
    "q_2": {
        "d_1301": 10
    },
    "q_3": {
        "d_1480": 10,
        "d_1481": 10
    },
    "q_4": {
        "d_1498": 10,
        "d_1499": 10,
        "d_1500": 1
    },
    "q_5": {
        "d_1482": 10,
        "d_1483": 10,
        "d_1484": 10,
        "d_1485": 10
    },
    "q_6": {
        "d_18": 10
    },
    "q_7": {
        "d_1192": 10,
        "d_1193": 8
    },
    "q_8": {
        "d_1553": 10,
        "d_1554": 10,
        "d_1555": 10
    },
    "q_9": {
        "d_704": 10
    },
    "q_10": {
        "d_14": 10,
        "d_15": 10
    }
}

In [42]:
qrels_path = "./token-based-chunks/qrels.json"
save_results_to_file(qrels_path, qrels_dict)

Ergebnisse wurden erfolgreich in ./token-based-chunks/qrels.json gespeichert.


Ich nutze die Metriken, die beim Retrieval im MTEB auch genutzt werden:
1. nDCG@k
2. MRR@k
3. MAP@k,
4. precision@k
5. recall@k

Hauptmetrik im MTEB war immer nDCG@10 für Evaluation der Retrieval-Fähigkeiten eines Embedding Modells

Zusätzlich nehme ich die Hits@k und HitRate@k auf.

In [13]:
qrels = Qrels(qrels_dict)

In [14]:
run = Run(run_without_filters)

In [15]:
score_dict_10 = evaluate(qrels, run,
                         ["ndcg@10", "mrr@10", "map@10", "precision@10", "recall@10", "hits@10", "hit_rate@10"])

In [43]:
print(score_dict_10)
scores_no_filters_path = "./token-based-chunks/scores_no_filters.json"
save_results_to_file(scores_no_filters_path, score_dict_10)

{'ndcg@10': np.float64(0.4597542719325121), 'mrr@10': np.float64(0.5333333333333333), 'map@10': np.float64(0.3801190476190476), 'precision@10': np.float64(0.14), 'recall@10': np.float64(0.45), 'hits@10': np.float64(1.4), 'hit_rate@10': np.float64(0.6)}
Ergebnisse wurden erfolgreich in ./token-based-chunks/scores_no_filters.json gespeichert.


# Abfrage der Dokumente mit idealen Filtern (händisch angelegt in dem Wissen, wo relevante Informationen stehen)

Beginnt mit der Collection, welche mit dem infloat-Modell und dem SentenceTransformersTokenTextSplitter (Chunk-Größe=500Token, Overlap=50Token) erstellt wurde.

In [17]:
def create_field_condition(field, value):
    return FieldCondition(
        key=f"metadata.{field}",
        match=MatchValue(value=value)
    )


filters = {
    1: Filter(must=[
        create_field_condition("studiengang", "Elektrotechnik"),
        create_field_condition("standort", "Meschede"),
        create_field_condition("abschluss", "Master")
    ]),
    2: Filter(must=[
        create_field_condition("standort", "Soest"),
        create_field_condition("studiengang", "Elektrotechnik"),
        create_field_condition("abschluss", "Soest")
    ]),
    3: Filter(must=[
        create_field_condition("studiengang", "Alle"),
        create_field_condition("standort", "Alle"),
        create_field_condition("abschluss", "Alle")
    ]),
    4: Filter(must=[
        create_field_condition("studiengang", "Alle"),
        create_field_condition("standort", "Alle"),
        create_field_condition("abschluss", "Alle")
    ]),
    5: Filter(must=[
        create_field_condition("studiengang", "Alle"),
        create_field_condition("standort", "Alle"),
        create_field_condition("abschluss", "Alle")
    ]),
    6: Filter(must=[
        create_field_condition("studiengang", "Wirtschaftsinformatik"),
        create_field_condition("standort", "Hagen"),
        create_field_condition("abschluss", "Bachelor")
    ]),
    7: Filter(must=[
        create_field_condition("studiengang", "Elektrotechnik"),
        create_field_condition("standort", "Soest"),
        create_field_condition("abschluss", "Bachelor")
    ]),
    8: Filter(must=[
        create_field_condition("studiengang", "Alle"),
        create_field_condition("standort", "Alle"),
        create_field_condition("abschluss", "Alle")
    ]),
    9: Filter(must=[
        create_field_condition("studiengang", "Elektrotechnik"),
        create_field_condition("standort", "Hagen"),
        create_field_condition("abschluss", "Bachelor")
    ]),
    10: Filter(must=[
        create_field_condition("studiengang", "Wirtschaftsinformatik"),
        create_field_condition("standort", "Hagen"),
        create_field_condition("abschluss", "Bachelor")
    ]),
}

In [18]:
def retrieve_all_scores_optimal_filters(client, fragen_dict, filter_dict):
    ergebnisse = {}

    for q_id, frage in fragen_dict.items():
        docs = retrieve_with_score_with_filters(client=client, max_documents=10, filters=filter_dict[q_id], query=frage)

        # Dictionary für die Dokumente mit ihren Scores
        doc_scores = {f"d_{doc.metadata["id"]}": score for doc, score in docs}

        # Speichern im Hauptdictionary unter q_x
        ergebnisse[f"q_{q_id}"] = doc_scores

    return ergebnisse

In [19]:
run_with_optimal_filters = retrieve_all_scores_optimal_filters(client=token_client, fragen_dict=fragen,
                                                               filter_dict=filters)

INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_token_based_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_token_based_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_token_based_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_token_based_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_token_based_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_token_based_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_token_based_chunks/points/query "HT

In [44]:
run_with_filter = Run(run_with_optimal_filters)
run_with_filter_path = "./token-based-chunks/run_with_optimal_filters.json"
save_results_to_file(run_with_filter_path, run_with_optimal_filters)

Ergebnisse wurden erfolgreich in ./token-based-chunks/run_with_optimal_filters.json gespeichert.


In [26]:
score_optimal_filters_dict_10 = evaluate(qrels, run_with_filter,
                                         ["ndcg@10", "mrr@10", "map@10", "precision@10", "recall@10", "hits@10",
                                          "hit_rate@10"])

In [45]:
score_with_optimal_filters_path = "./token-based-chunks/scores_with_optimal_filters.json"
save_results_to_file(score_with_optimal_filters_path, score_optimal_filters_dict_10)

Ergebnisse wurden erfolgreich in ./token-based-chunks/scores_with_optimal_filters.json gespeichert.


# Abfrage der Dokumente mit dem SelfQueryRetriever aus LangChain

Beginnt mit der Collection, welche mit dem infloat-Modell und dem SentenceTransformersTokenTextSplitter (Chunk-Größe=500Token, Overlap=50Token) erstellt wurde.

In [29]:
llm = create_llm_for_self_query_retriever()

In [30]:
search_kwargs = {"k": 10}

In [73]:
def retrieve_all_scores_self_query_retriever(client, llm, search_kwargs, fragen_dict):
    ergebnisse = {}

    for q_id, frage in fragen_dict.items():
        docs = retrieve_with_score_with_self_query_retriever(
            client=client, llm=llm, search_kwargs=search_kwargs, query=frage
        )

        # Dictionary für die Dokumente mit ihren Scores
        doc_scores = {f"d_{doc.metadata["id"]}": score for doc, score in docs}

        # Speichern im Hauptdictionary unter q_x
        ergebnisse[f"q_{q_id}"] = doc_scores

    return ergebnisse

In [32]:
run_with_self_query_retriever = retrieve_all_scores_self_query_retriever(client=token_client, llm=llm, search_kwargs=search_kwargs,
                                                    fragen_dict=fragen)

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:langchain.retrievers.self_query.base:Generated Query: query='Master in Elektrotechnik' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='studiengang', value='Elektrotechnik'), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='abschluss', value='Master'), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='standort', value='Meschede')]) limit=None
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_token_based_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:langchain.retrievers.self_query.base:Generated Query: query='Standort Hagen' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='standort', value='Hagen') limit=None
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingu

In [46]:
run_with_self_query_retriever_path = "./token-based-chunks/run_with_self_query_retriever.json"
save_results_to_file(run_with_self_query_retriever_path, run_with_self_query_retriever)

Ergebnisse wurden erfolgreich in ./token-based-chunks/run_with_self_query_retriever.json gespeichert.


In [34]:
run_self_query = Run(run_with_self_query_retriever)

In [35]:
score_self_query_dict_10 = evaluate(qrels, run_self_query,
                                    ["ndcg@10", "mrr@10", "map@10", "precision@10", "recall@10", "hits@10",
                                     "hit_rate@10"])

In [47]:
scores_with_self_query_retriever_path = "./token-based-chunks/scores_with_self_query_retriever.json"
save_results_to_file(scores_with_self_query_retriever_path, score_self_query_dict_10)

Ergebnisse wurden erfolgreich in ./token-based-chunks/scores_with_self_query_retriever.json gespeichert.


In [38]:
report = compare(
    qrels=qrels,
    runs=[run, run_with_filter, run_self_query],
    metrics=["ndcg@10", "mrr@10", "map@10", "precision@10", "recall@10", "hits@10", "hit_rate@10"],
    max_p=0.01
)

In [39]:
print(report)

#    Model      NDCG@10    MRR@10    MAP@10    P@10    Recall@10    Hits@10    Hit Rate@10
---  -------  ---------  --------  --------  ------  -----------  ---------  -------------
a    run_1        0.46      0.533     0.38     0.14        0.45         1.4            0.6
b    run_2        0.53      0.6       0.455    0.17        0.575        1.7            0.6
c    run_3        0.447     0.533     0.368    0.14        0.475        1.4            0.6


# Ab hier mit der Collection mit den Recursive-Chunks

# Abfrage ohne Filter

In [57]:
qrels_dict = {
    "q_1": {
        "d_1371": 10,
        "d_1370": 1
    },
    "q_2": {
        "d_1866": 10
    },
    "q_3": {
        "d_2292": 10,
        "d_2293": 2
    },
    "q_4": {
        "d_2313": 10,
        "d_2314": 10,
        "d_2315": 2
    },
    "q_5": {
        "d_2295": 10,
        "d_2296": 10,
        "d_2297": 10,
        "d_2298": 10
    },
    "q_6": {
        "d_25": 10
    },
    "q_7": {
        "d_1586": 10
    },
    "q_8": {
        "d_2388": 10,
        "d_2389": 10,
        "d_2390": 10
    },
    "q_9": {
        "d_946": 10
    },
    "q_10": {
        "d_19": 10
    }
}

In [59]:
qrels = Qrels(qrels_dict)

In [60]:
qrels_path = "./recursive-chunks/qrels.json"
save_results_to_file(qrels_path, qrels_dict)

Ergebnisse wurden erfolgreich in ./recursive-chunks/qrels.json gespeichert.


In [78]:
query_without_filters_path = "./recursive-chunks/run_no_filters.json"
query_no_filters = retrieve_all_scores_no_filters(client=recursive_client, fragen_dict=fragen)
save_results_to_file(query_without_filters_path, query_no_filters)

INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_recursive_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_recursive_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_recursive_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_recursive_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_recursive_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_recursive_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_recursive_chunks/points/query "HTTP/1.1 200 OK"

Ergebnisse wurden erfolgreich in ./recursive-chunks/run_no_filters.json gespeichert.


In [66]:
run = Run(query_no_filters)

In [67]:
scores_no_filters_path = "./recursive-chunks/scores_no_filters.json"
score_dict_10 = evaluate(qrels, run,
                         ["ndcg@10", "mrr@10", "map@10", "precision@10", "recall@10", "hits@10", "hit_rate@10"])
save_results_to_file(scores_no_filters_path, score_dict_10)

Ergebnisse wurden erfolgreich in ./recursive-chunks/scores_no_filters.json gespeichert.


In [68]:
query_with_optimal_filters_path = "./recursive-chunks/run_with_optimal_filters.json"
query_optimal_filters = retrieve_all_scores_optimal_filters(client=recursive_client, fragen_dict=fragen, filter_dict=filters)
save_results_to_file(query_with_optimal_filters_path, query_optimal_filters)

INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_recursive_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_recursive_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_recursive_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_recursive_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_recursive_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_recursive_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_recursive_chunks/points/query "HTTP/1.1 200 OK"

Ergebnisse wurden erfolgreich in ./recursive-chunks/run_with_optimal_filters.json gespeichert.


In [69]:
run_optimal_filters = Run(query_optimal_filters)

In [70]:
scores_optimal_filters_path = "./recursive-chunks/scores_with_optimal_filters.json"
scores_with_optimal_filters_dict = evaluate(qrels, run_optimal_filters, ["ndcg@10", "mrr@10", "map@10", "precision@10", "recall@10", "hits@10", "hit_rate@10"])
save_results_to_file(scores_optimal_filters_path, scores_with_optimal_filters_dict)

Ergebnisse wurden erfolgreich in ./recursive-chunks/scores_with_optimal_filters.json gespeichert.


In [74]:
query_with_self_query_retriever_path = "./recursive-chunks/run_with_self_query_retriever.json"
query_self_query_retriever = retrieve_all_scores_self_query_retriever(client=recursive_client, llm=llm, search_kwargs=search_kwargs, fragen_dict=fragen)
save_results_to_file(query_with_self_query_retriever_path, query_self_query_retriever)

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:langchain.retrievers.self_query.base:Generated Query: query='Master in Elektrotechnik' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='studiengang', value='Elektrotechnik'), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='abschluss', value='Master'), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='standort', value='Meschede')]) limit=None
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual-e5-large_recursive_chunks/points/query "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:langchain.retrievers.self_query.base:Generated Query: query='Standort Hagen' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='standort', value='Hagen') limit=None
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/intfloat_multilingual

Ergebnisse wurden erfolgreich in ./recursive-chunks/run_with_self_query_retriever.json gespeichert.


In [76]:
run_self_query_retriever = Run(query_self_query_retriever)

In [77]:
scores_self_query_retriever_path = "./recursive-chunks/scores_with_self_query_retriever.json"
scores_with_self_query_retriever_dict = evaluate(qrels, run_self_query_retriever, ["ndcg@10", "mrr@10", "map@10", "precision@10", "recall@10", "hits@10", "hit_rate@10"])
save_results_to_file(scores_self_query_retriever_path, scores_with_self_query_retriever_dict)

Ergebnisse wurden erfolgreich in ./recursive-chunks/scores_with_self_query_retriever.json gespeichert.
