This notebook was built to create the "HybridRAG" baseline described in section 4.2 of the Master Thesis.

In [None]:
import pickle, json
from neo4j import GraphDatabase
import config.EnvLoader as el

URI = "neo4j://localhost"
AUTH = ("neo4j", el.NEO4J_PWD)

# Open the file in binary mode 
with open('embedded_questions.pkl', 'rb') as file: 
	q_diz = pickle.load(file)

with open('Dataset.json') as f:
    ds = json.load(f)

In [None]:
results = {}
for obs in ds:
    # Extract chunks from graph traverser
    found_chunks = []
    for key, value in obs["observations"].items():
        if value.get("graph_traverser", 0) == 1:
            found_chunks.append(key)
    found_chunks = [int(x) for x in found_chunks]
    # Se il graph traverser ne ha trovati più di 3, prendiamo solo quelli con similarity più alta
    if len(found_chunks) > 3:
        with GraphDatabase.driver(URI, auth=AUTH) as driver:
            retrieved_chunks, _, _ = driver.execute_query(
                """WITH $int_keys AS list
                CALL db.index.vector.queryNodes("vector", 1000, $embedding)
                YIELD node, score
                WHERE ID(node) IN list
                RETURN ID(node), score
                ORDER BY score DESC
                LIMIT 3""",
                int_keys=found_chunks, embedding=obs["embedding"]
            )
        found_chunks = [x["ID(node)"] for x in retrieved_chunks]
    # Estrae i chunk mancanti, senza riprendere quelli già estratti
    with GraphDatabase.driver(URI, auth=AUTH) as driver:
            retrieved_chunks, _, _ = driver.execute_query(
                """WITH $int_keys AS list
                CALL db.index.vector.queryNodes("vector", 100, $embedding)
                YIELD node, score
                WHERE NOT ID(node) IN list
                RETURN ID(node), score
                ORDER BY score DESC
                LIMIT $n""",
                int_keys=found_chunks, embedding=obs["embedding"], n = 5 - len(found_chunks)
            )
    found_chunks += [x["ID(node)"] for x in retrieved_chunks]
    results[obs["question"]] = found_chunks

In [80]:
max([len(v) for k,v in results.items()])

5

In [81]:
with open('concatenated_results.json', 'w') as f:
    json.dump(results, f)

In [None]:
precision_lst = []
recall_lst = []
detection_lst = []

for q, retrieved_chunks in results.items():
    # Get max answers
    with GraphDatabase.driver(URI, auth=AUTH) as driver:
        answers, _, _ = driver.execute_query(
            """MATCH (a:Chunk {is_answer_of: $q})
            RETURN ID(a)""",
            q=q
        )
    max_answers = len(answers)
    retrieved_answers = len([x for x in answers if x["ID(a)"] in retrieved_chunks])
    precision_lst.append(retrieved_answers/5)
    recall_lst.append(retrieved_answers/max_answers)
    detection_lst.append(1 if retrieved_answers > 0 else 0)

In [85]:
length = len(results)
print(f"Average Precision: {round(sum(precision_lst)/length,3)}")
print(f"Average Recall: {round(sum(recall_lst)/length,3)}")
print(f"Average Detection: {round(sum(detection_lst)/length,3)}")

Average Precision: 0.188
Average Recall: 0.695
Average Detection: 0.776


## MultiHop dataset

In [91]:
import json

with open('Dataset_multihop.json') as f:
    ds = json.load(f)

In [96]:
def compute_results(param):
    n_chunks = int(param/2)
    results = {}
    for obs in ds:
        found_chunks = []
        # Extract chunks from graph traverser
        for key, value in obs["observations"].items():
            if value.get("graph_traverser", 0) == 1:
                found_chunks.append(key)
        found_chunks = [int(x) for x in found_chunks]
        # Se il graph traverser ne ha trovati più di 3, prendiamo solo quelli con similarity più alta
        if len(found_chunks) > 3:
            with GraphDatabase.driver(URI, auth=AUTH) as driver:
                retrieved_chunks, _, _ = driver.execute_query(
                    """WITH $int_keys AS list
                    CALL db.index.vector.queryNodes("vector", 1000, $embedding)
                    YIELD node, score
                    WHERE ID(node) IN list
                    RETURN ID(node), score
                    ORDER BY score DESC
                    LIMIT $limit""",
                    int_keys=found_chunks, embedding=obs["embedding"], limit = n_chunks
                )
            found_chunks = [x["ID(node)"] for x in retrieved_chunks]
        # Estrae i chunk mancanti, senza riprendere quelli già estratti
        with GraphDatabase.driver(URI, auth=AUTH) as driver:
                retrieved_chunks, _, _ = driver.execute_query(
                    """WITH $int_keys AS list
                    CALL db.index.vector.queryNodes("vector", 100, $embedding)
                    YIELD node, score
                    WHERE NOT ID(node) IN list
                    RETURN ID(node), score
                    ORDER BY score DESC
                    LIMIT $n""",
                    int_keys=found_chunks, embedding=obs["embedding"], n = param - len(found_chunks)
                )
        found_chunks += [x["ID(node)"] for x in retrieved_chunks]
        results[obs["question"]] = found_chunks
    return results

In [None]:
final_results = {}
for x in [4,6,8]:
    results = compute_results(x)
    precision_lst = []
    recall_lst = []
    detection_lst = []

    for obs in ds:
        retrieved_chunks = results[obs["question"]]
        # Get max answers
        answers = [int(k) for k, v in obs["observations"].items() if v.get("is_answer", 0) == 1]
        max_answers = len(answers)
        retrieved_answers = len([x for x in answers if x in retrieved_chunks])
        precision_lst.append(retrieved_answers/6)
        recall_lst.append(retrieved_answers/max_answers)
        detection_lst.append(1 if retrieved_answers == max_answers else 0)
    length = len(results)
    final_results[x] = [round(sum(precision_lst)/length,3), round(sum(recall_lst)/length,3), round(sum(detection_lst)/length,3)]

In [99]:
for x in [4,6,8]:
    print(f"Results for {str(x)} chunks.")
    print(f"Average Precision: {final_results[x][0]}")
    print(f"Average Recall: {final_results[x][1]}")
    print(f"Average Detection: {final_results[x][2]}")
    print("")

Results for 4 chunks.
Average Precision: 0.125
Average Recall: 0.381
Average Detection: 0.11

Results for 6 chunks.
Average Precision: 0.143
Average Recall: 0.432
Average Detection: 0.17

Results for 8 chunks.
Average Precision: 0.155
Average Recall: 0.467
Average Detection: 0.2

