In [3]:
import json
import os
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy, context_precision


with open("logs.json", "r", encoding="utf-8") as f:
    log_data = json.load(f)


flat_items = []
for block in log_data:
    for item in block["items"]:
        question = " ".join(i["context"] for i in item["input"] if i["role"] == "user")
        context = " ".join(i["context"] for i in item["input"] if i["role"] == "system")
        response = " ".join(i.get("content", "") for i in item.get("expectedOutput", []))
        reference = response

        flat_items.append({
            "id": item["id"],
            "question": question,
            "response": response,
            "reference": reference,
            "retrieved_contexts": [context]  
        })


ds = Dataset.from_list(flat_items)


# ----------------------------------------------------Evaluate----------------------------------------------------------------
results = evaluate(
    ds,
    metrics=[faithfulness, answer_relevancy, context_precision],
    raise_exceptions=True
)


score_df = results.to_pandas()
output_json = []

for i, row in enumerate(score_df.itertuples(index=False)):
    output_json.append({
        "id": flat_items[i]["id"],
        "faithfulness": round(row.faithfulness, 3),
        "answer_relevancy": round(row.answer_relevancy, 3),
        "context_precision": round(row.context_precision, 3)
    })

with open("ragas_output.json", "w", encoding="utf-8") as file:
    json.dump(output_json, file, indent=2)

print("RAGAs evaluation complete. Results saved to 'ragas_output.json'.")


Evaluating:   0%|          | 0/30 [00:00<?, ?it/s]

✅ RAGAs evaluation complete. Results saved to 'ragas_output.json'.


In [5]:
import json
import pandas as pd

with open("ragas_output.json", "r", encoding="utf-8") as f:
    data = json.load(f)

df = pd.DataFrame(data)

df.to_csv("ragas_output.csv", index=False)
print("Saved as ragas_output.csv")


Saved as ragas_output.csv
