In [None]:
import os
import sys
import time
from ast import literal_eval
from typing import Any, Dict, List, Union


sys.path.append(os.path.join(os.getcwd(), ".."))


import pandas as pd
from pprint import pprint
from dotenv import load_dotenv
from langchain_core.embeddings import Embeddings
from langchain_neo4j import Neo4jGraph
from langchain_huggingface import HuggingFaceEmbeddings
from ragas.dataset_schema import EvaluationDataset, EvaluationResult
from src.grag import run_hybrid_cypher_workflow, evaluate_retriever


load_dotenv()

True

# **Preparation**

In [None]:
OUTPUT_PATH = os.path.join("results", "hybrid_cypher_retriever")
DATASET_PATH = os.path.join("data", "Dataset Testing.xlsx")

os.makedirs(OUTPUT_PATH, exist_ok=True)

df: pd.DataFrame = pd.read_excel(DATASET_PATH)
dataset: List[Dict[str, Any]] = []

for idx, row in df.iterrows():
    if row["is_valid"]:
        dataset.append(
            {
                "user_input": str(row["user_input"]),
                "reference": str(row["reference"]),
                "reference_contexts": literal_eval(row["reference_contexts_1"])
            }
        )

evaluation_dataset = EvaluationDataset.from_list(dataset)

print(len(evaluation_dataset))

100


# **Evaluation**

In [None]:
def save_experiment_result(
    evaluation_result: EvaluationResult,
    experiment_name: str
) -> None:
    evaluation_result.to_pandas().to_json(
        os.path.join(OUTPUT_PATH, f"{experiment_name}.json"),
        orient="records",
    )


def run_test_case(
    neo4j_graph_name: str,
    neo4j_graph: Neo4jGraph,
    embedding_model_name: str,
    embedding_model: Embeddings,
    neo4j_graph_config: Dict[str, str],
) -> List[Dict[str, Any]]:

    test_result = []

    for k in range(1, 16):
        experiment_name = f"{neo4j_graph_name}_{embedding_model_name}_init-{k}".replace(
            "/", "-"
        )

        hybrid_cypher_workflow_result = run_hybrid_cypher_workflow(
            evaluation_dataset,
            experiment_name,
            embedder_model=embedding_model,
            neo4j_graph=neo4j_graph,
            neo4j_config=neo4j_graph_config,
            top_k_initial_article=k,
        )

        evaluation_dataset_hybrid_cypher, _ = hybrid_cypher_workflow_result

        evaluation_result = evaluate_retriever(
            evaluation_dataset_hybrid_cypher,
            experiment_name=experiment_name,
        )

        # Checkpoint
        save_experiment_result(evaluation_result, experiment_name=experiment_name)

        test_result.append(
            {
                "experiment_name": experiment_name,
                "args": {
                    "database": neo4j_graph_name,
                    "embedding_model": embedding_model_name,
                    "k": k,
                },
                "evaluation_result": evaluation_result,
            }
        )

    return test_result

In [4]:
URI = os.environ["NEO4J_HOST"]
USERNAME = os.environ["NEO4J_USERNAME"]
PASSWORD = os.environ["NEO4J_PASSWORD"]

neo4j_config = {
    "DATABASE_NAME": None,
    "ARTICLE_VECTOR_INDEX_NAME": "effective_vector_index",
    "ARTICLE_FULLTEXT_INDEX_NAME": "effective_fulltext_index",
    "DEFINITION_VECTOR_INDEX_NAME": "definition_vector_index",
    "DEFINITION_FULLTEXT_INDEX_NAME": "definition_fulltext_index",
}

## **Test Case 1**

- db-small
- all-MiniLM-L6-v2

In [5]:
DATABASE_NAME = "db-small"
EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"

neo4j_config["DATABASE_NAME"] = DATABASE_NAME

neo4j_graph = Neo4jGraph(
    url=URI,
    username=USERNAME,
    password=PASSWORD,
    database=DATABASE_NAME,
    enhanced_schema=True
)

embedding_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)

In [6]:
test_result_1 = run_test_case(
    DATABASE_NAME,
    neo4j_graph,
    EMBEDDING_MODEL_NAME,
    embedding_model,
    neo4j_graph_config=neo4j_config
)

Running hybrid_cypher_retriever: `db-small_all-MiniLM-L6-v2_init-1`: 100%|██████████| 100/100 [00:08<00:00, 11.65it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-small_all-MiniLM-L6-v2_init-2`: 100%|██████████| 100/100 [00:07<00:00, 13.51it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-small_all-MiniLM-L6-v2_init-3`: 100%|██████████| 100/100 [00:07<00:00, 14.14it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-small_all-MiniLM-L6-v2_init-4`: 100%|██████████| 100/100 [00:07<00:00, 13.62it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-small_all-MiniLM-L6-v2_init-5`: 100%|██████████| 100/100 [00:07<00:00, 14.11it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-small_all-MiniLM-L6-v2_init-6`: 100%|██████████| 100/100 [00:07<00:00, 14.13it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-small_all-MiniLM-L6-v2_init-7`: 100%|██████████| 100/100 [00:07<00:00, 14.07it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-small_all-MiniLM-L6-v2_init-8`: 100%|██████████| 100/100 [00:07<00:00, 13.70it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-small_all-MiniLM-L6-v2_init-9`: 100%|██████████| 100/100 [00:07<00:00, 13.62it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-small_all-MiniLM-L6-v2_init-10`: 100%|██████████| 100/100 [00:07<00:00, 13.47it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-small_all-MiniLM-L6-v2_init-11`: 100%|██████████| 100/100 [00:07<00:00, 14.18it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-small_all-MiniLM-L6-v2_init-12`: 100%|██████████| 100/100 [00:07<00:00, 14.18it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-small_all-MiniLM-L6-v2_init-13`: 100%|██████████| 100/100 [00:07<00:00, 13.25it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-small_all-MiniLM-L6-v2_init-14`: 100%|██████████| 100/100 [00:07<00:00, 12.80it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-small_all-MiniLM-L6-v2_init-15`: 100%|██████████| 100/100 [00:07<00:00, 12.77it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

In [7]:
pprint(test_result_1)

[{'args': {'database': 'db-small',
           'embedding_model': 'all-MiniLM-L6-v2',
           'k': 1},
  'evaluation_result': {'precision': 0.0353, 'recall': 0.0775},
  'experiment_name': 'db-small_all-MiniLM-L6-v2_init-1'},
 {'args': {'database': 'db-small',
           'embedding_model': 'all-MiniLM-L6-v2',
           'k': 2},
  'evaluation_result': {'precision': 0.0759, 'recall': 0.2459},
  'experiment_name': 'db-small_all-MiniLM-L6-v2_init-2'},
 {'args': {'database': 'db-small',
           'embedding_model': 'all-MiniLM-L6-v2',
           'k': 3},
  'evaluation_result': {'precision': 0.0717, 'recall': 0.2847},
  'experiment_name': 'db-small_all-MiniLM-L6-v2_init-3'},
 {'args': {'database': 'db-small',
           'embedding_model': 'all-MiniLM-L6-v2',
           'k': 4},
  'evaluation_result': {'precision': 0.0626, 'recall': 0.3073},
  'experiment_name': 'db-small_all-MiniLM-L6-v2_init-4'},
 {'args': {'database': 'db-small',
           'embedding_model': 'all-MiniLM-L6-v2',
       

## **Test Case 2**

- db-large
- intfloat/multilingual-e5-large

In [8]:
DATABASE_NAME = "db-large"
EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-large"

neo4j_config["DATABASE_NAME"] = DATABASE_NAME

neo4j_graph = Neo4jGraph(
    url=URI,
    username=USERNAME,
    password=PASSWORD,
    database=DATABASE_NAME,
    enhanced_schema=True
)

embedding_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)

In [9]:
test_result_2 = run_test_case(
    DATABASE_NAME,
    neo4j_graph,
    EMBEDDING_MODEL_NAME,
    embedding_model,
    neo4j_graph_config=neo4j_config
)

Running hybrid_cypher_retriever: `db-large_intfloat-multilingual-e5-large_init-1`: 100%|██████████| 100/100 [00:09<00:00, 10.44it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-large_intfloat-multilingual-e5-large_init-2`: 100%|██████████| 100/100 [00:08<00:00, 11.37it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-large_intfloat-multilingual-e5-large_init-3`: 100%|██████████| 100/100 [00:08<00:00, 11.29it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-large_intfloat-multilingual-e5-large_init-4`: 100%|██████████| 100/100 [00:08<00:00, 11.44it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-large_intfloat-multilingual-e5-large_init-5`: 100%|██████████| 100/100 [00:08<00:00, 11.24it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-large_intfloat-multilingual-e5-large_init-6`: 100%|██████████| 100/100 [00:10<00:00,  9.62it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-large_intfloat-multilingual-e5-large_init-7`: 100%|██████████| 100/100 [00:09<00:00, 10.92it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-large_intfloat-multilingual-e5-large_init-8`: 100%|██████████| 100/100 [00:09<00:00, 10.86it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-large_intfloat-multilingual-e5-large_init-9`: 100%|██████████| 100/100 [00:09<00:00, 10.42it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-large_intfloat-multilingual-e5-large_init-10`: 100%|██████████| 100/100 [00:09<00:00, 10.53it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-large_intfloat-multilingual-e5-large_init-11`: 100%|██████████| 100/100 [00:09<00:00, 10.02it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-large_intfloat-multilingual-e5-large_init-12`: 100%|██████████| 100/100 [00:09<00:00, 10.26it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-large_intfloat-multilingual-e5-large_init-13`: 100%|██████████| 100/100 [00:09<00:00, 10.25it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-large_intfloat-multilingual-e5-large_init-14`: 100%|██████████| 100/100 [00:09<00:00, 10.08it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-large_intfloat-multilingual-e5-large_init-15`: 100%|██████████| 100/100 [00:11<00:00,  9.07it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

In [10]:
pprint(test_result_2)

[{'args': {'database': 'db-large',
           'embedding_model': 'intfloat/multilingual-e5-large',
           'k': 1},
  'evaluation_result': {'precision': 0.0437, 'recall': 0.0950},
  'experiment_name': 'db-large_intfloat-multilingual-e5-large_init-1'},
 {'args': {'database': 'db-large',
           'embedding_model': 'intfloat/multilingual-e5-large',
           'k': 2},
  'evaluation_result': {'precision': 0.1015, 'recall': 0.3276},
  'experiment_name': 'db-large_intfloat-multilingual-e5-large_init-2'},
 {'args': {'database': 'db-large',
           'embedding_model': 'intfloat/multilingual-e5-large',
           'k': 3},
  'evaluation_result': {'precision': 0.0755, 'recall': 0.3094},
  'experiment_name': 'db-large_intfloat-multilingual-e5-large_init-3'},
 {'args': {'database': 'db-large',
           'embedding_model': 'intfloat/multilingual-e5-large',
           'k': 4},
  'evaluation_result': {'precision': 0.0776, 'recall': 0.3500},
  'experiment_name': 'db-large_intfloat-multilingual

## **Test Case 3**

- db-domain-specific
- archi-ai/Indo-LegalBERT

In [11]:
DATABASE_NAME = "db-domain-specific"
EMBEDDING_MODEL_NAME = "archi-ai/Indo-LegalBERT"

neo4j_config["DATABASE_NAME"] = DATABASE_NAME

neo4j_graph = Neo4jGraph(
    url=URI,
    username=USERNAME,
    password=PASSWORD,
    database=DATABASE_NAME,
    enhanced_schema=True
)

embedding_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)

No sentence-transformers model found with name archi-ai/Indo-LegalBERT. Creating a new one with mean pooling.
Some weights of BertModel were not initialized from the model checkpoint at archi-ai/Indo-LegalBERT and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
test_result_3 = run_test_case(
    DATABASE_NAME,
    neo4j_graph,
    EMBEDDING_MODEL_NAME,
    embedding_model,
    neo4j_graph_config=neo4j_config
)

Running hybrid_cypher_retriever: `db-domain-specific_archi-ai-Indo-LegalBERT_init-1`: 100%|██████████| 100/100 [00:08<00:00, 11.26it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-domain-specific_archi-ai-Indo-LegalBERT_init-2`: 100%|██████████| 100/100 [00:08<00:00, 11.60it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-domain-specific_archi-ai-Indo-LegalBERT_init-3`: 100%|██████████| 100/100 [00:08<00:00, 11.68it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-domain-specific_archi-ai-Indo-LegalBERT_init-4`: 100%|██████████| 100/100 [00:08<00:00, 12.03it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-domain-specific_archi-ai-Indo-LegalBERT_init-5`: 100%|██████████| 100/100 [00:08<00:00, 11.26it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-domain-specific_archi-ai-Indo-LegalBERT_init-6`: 100%|██████████| 100/100 [00:08<00:00, 11.61it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-domain-specific_archi-ai-Indo-LegalBERT_init-7`: 100%|██████████| 100/100 [00:08<00:00, 11.56it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-domain-specific_archi-ai-Indo-LegalBERT_init-8`: 100%|██████████| 100/100 [00:08<00:00, 11.22it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-domain-specific_archi-ai-Indo-LegalBERT_init-9`: 100%|██████████| 100/100 [00:09<00:00, 10.71it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-domain-specific_archi-ai-Indo-LegalBERT_init-10`: 100%|██████████| 100/100 [00:09<00:00, 10.74it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-domain-specific_archi-ai-Indo-LegalBERT_init-11`: 100%|██████████| 100/100 [00:09<00:00, 10.63it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-domain-specific_archi-ai-Indo-LegalBERT_init-12`: 100%|██████████| 100/100 [00:09<00:00, 10.33it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-domain-specific_archi-ai-Indo-LegalBERT_init-13`: 100%|██████████| 100/100 [00:09<00:00, 10.13it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-domain-specific_archi-ai-Indo-LegalBERT_init-14`: 100%|██████████| 100/100 [00:09<00:00, 10.01it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Running hybrid_cypher_retriever: `db-domain-specific_archi-ai-Indo-LegalBERT_init-15`: 100%|██████████| 100/100 [00:10<00:00,  9.94it/s]


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

In [13]:
pprint(test_result_3)

[{'args': {'database': 'db-domain-specific',
           'embedding_model': 'archi-ai/Indo-LegalBERT',
           'k': 1},
  'evaluation_result': {'precision': 0.0310, 'recall': 0.0558},
  'experiment_name': 'db-domain-specific_archi-ai-Indo-LegalBERT_init-1'},
 {'args': {'database': 'db-domain-specific',
           'embedding_model': 'archi-ai/Indo-LegalBERT',
           'k': 2},
  'evaluation_result': {'precision': 0.0711, 'recall': 0.2309},
  'experiment_name': 'db-domain-specific_archi-ai-Indo-LegalBERT_init-2'},
 {'args': {'database': 'db-domain-specific',
           'embedding_model': 'archi-ai/Indo-LegalBERT',
           'k': 3},
  'evaluation_result': {'precision': 0.0708, 'recall': 0.2830},
  'experiment_name': 'db-domain-specific_archi-ai-Indo-LegalBERT_init-3'},
 {'args': {'database': 'db-domain-specific',
           'embedding_model': 'archi-ai/Indo-LegalBERT',
           'k': 4},
  'evaluation_result': {'precision': 0.0693, 'recall': 0.3255},
  'experiment_name': 'db-domain