In [None]:
import os
import sys
import time
from ast import literal_eval
from typing import Any, Dict, List, Union


sys.path.append(os.path.join(os.getcwd(), ".."))


import pandas as pd
from pprint import pprint
from dotenv import load_dotenv
from langchain_neo4j import Neo4jGraph
from langchain_ollama import ChatOllama
from langchain_anthropic import ChatAnthropic
from langchain_huggingface import HuggingFaceEmbeddings
from ragas.dataset_schema import EvaluationDataset, EvaluationResult
from src.grag import run_text2cypher_workflow, evaluate_retriever


load_dotenv()

True

# **Preparation**

In [2]:
DATABASE_NAME = "db-large"

URI = os.environ["NEO4J_HOST"]
USERNAME = os.environ["NEO4J_USERNAME"]
PASSWORD = os.environ["NEO4J_PASSWORD"]

neo4j_graph = Neo4jGraph(
    url=URI,
    username=USERNAME,
    password=PASSWORD,
    database=DATABASE_NAME,
    enhanced_schema=True
)

In [None]:
OUTPUT_PATH = os.path.join("results", "text2cypher_retriever")
DATASET_PATH = os.path.join("data", "Dataset Testing.xlsx")

os.makedirs(OUTPUT_PATH, exist_ok=True)

df = pd.read_excel(DATASET_PATH)
dataset = []

for idx, row in df.iterrows():
    if row["is_valid"]:
        dataset.append(
            {
                "user_input": str(row["user_input"]),
                "reference": str(row["reference"]),
                "reference_contexts": literal_eval(row["reference_contexts_1"]),
                # "reference_contexts": literal_eval(row["reference_contexts_2"]),
            }
        )

evaluation_dataset = EvaluationDataset.from_list(dataset)

print(f"Jumlah soal: {len(evaluation_dataset)}")

Jumlah soal: 100


# **Evaluation**

In [None]:
def print_result_summary(result: Dict[str, Any]) -> None:
    for key, value in result.items():
        if key != "cypher_result":
            print(f"{key}: {value}")


def save_experiment_dataset_or_result(
    dataset: Union[EvaluationDataset, EvaluationResult],
    generated_cypher_results: List[str],
    experiment_name: str
) -> None:
    df = dataset.to_pandas()
    df["cypher_result"] = generated_cypher_results
    df.to_json(
        os.path.join(OUTPUT_PATH, f"{experiment_name}.json"),
        orient="records",
    )


def run_test_case(test_case: Dict[str, Any]) -> Dict[str, Any]:
    if test_case["embedding_model"]:
        prompt_type = "few-shot"
    else:
        prompt_type = "zero-shot"

    experiment_name = f"{test_case["llm_model_name"]}_{prompt_type}"

    text2cypher_workflow_result = run_text2cypher_workflow(
        evaluation_dataset,
        experiment_name,
        neo4j_graph=neo4j_graph,
        cypher_llm=test_case["llm_model"],
        embedder_model=test_case["embedding_model"],
        verbose=True,
    )

    evaluation_dataset_completed, generated_cypher_results = (
        text2cypher_workflow_result
    )

    # Checkpoint 1
    save_experiment_dataset_or_result(
        evaluation_dataset_completed,
        generated_cypher_results,
        experiment_name=experiment_name
    )

    evaluation_result = evaluate_retriever(
        evaluation_dataset_completed,
        experiment_name=experiment_name,
    )

    # Checkpoint 2
    save_experiment_dataset_or_result(
        evaluation_result,
        generated_cypher_results,
        experiment_name=experiment_name
    )

    return {
        "experiment_name": experiment_name,
        "args": {"llm": test_case["llm_model_name"], "prompt_type": prompt_type},
        "evaluation_result": evaluation_result,
        "cypher_result": generated_cypher_results,
    }

In [None]:
EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-large"
embedding_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)

CLAUDE_LLM_MODEL_NAME = "claude-3-5-haiku-20241022"
claude_llm = ChatAnthropic(
    model_name=CLAUDE_LLM_MODEL_NAME,
    max_tokens_to_sample=4096,
    temperature=0.0,
    timeout=None,
    api_key=os.environ["ANTHROPIC_API_KEY"],
)

LLAMA_LLM_MODEL_NAME = "llama3.1:8b-instruct-q4_K_M"
llama_llm = ChatOllama(
    model=LLAMA_LLM_MODEL_NAME,
    num_ctx=32768,
    num_predict=4096,
    temperature=0.0,
)

In [None]:
test_cases = [
    {
        # Zero-shot
        "llm_model_name": CLAUDE_LLM_MODEL_NAME,
        "llm_model": claude_llm,
        "embedding_model": None
    },
    {
        # Few-shot
        "llm_model_name": CLAUDE_LLM_MODEL_NAME,
        "llm_model": claude_llm,
        "embedding_model": embedding_model
    },
    {
        # Zero-shot
        "llm_model_name": LLAMA_LLM_MODEL_NAME,
        "llm_model": llama_llm,
        "embedding_model": None
    },
    {
        # Few-shot
        "llm_model_name": LLAMA_LLM_MODEL_NAME,
        "llm_model": llama_llm,
        "embedding_model": embedding_model
    },
]



## **Test Case 1**

- Claude
- Zero-Shot

In [None]:
test_result_1 = run_test_case(test_cases[0])

print_result_summary(test_result_1)

## **Test Case 2**

- Claude
- Few-Shot

In [None]:
test_result_2 = run_test_case(test_cases[1])

print_result_summary(test_result_2)

## **Test Case 3**

- Llama
- Zero-Shot

In [None]:
test_result_3 = run_test_case(test_cases[2])

print_result_summary(test_result_3)

## **Test Case 4**

- Llama
- Few-Shot

In [None]:
test_result_4 = run_test_case(test_cases[3])

print_result_summary(test_result_4)