In [None]:
import os
import sys

import json
import pandas as pd
from weaviate_rag.rag_system import GraphRAGSystem
import ollama
from ragas import evaluate
from ragas.metrics import Faithfulness, AnswerRelevancy, ContextRecall
import re
from ragas import EvaluationDataset

In [None]:
# Load your JSON data
with open('/Users/alexlecu/PycharmProjects/LLMKGraph/backend/evaluation/data/grok_evaluation_datasets/1_Hop_OpenEnded_Questions.json', 'r') as f:
    data = json.load(f)

# Convert to DataFrame for RAGAS
df = pd.DataFrame(data)
df = df[['question', 'answer']]

# Later, add retrieved contexts and generated answers after querying your RAG
df['contexts'] = None  # Will be filled with retrieved passages
df['generated_answer'] = None  # Will be filled with RAG responses

In [None]:
questions = [entry['question'] for entry in data]
ground_truths = [entry['answer'] for entry in data]

In [None]:
def retrieve(user_input):
    analyzer = GraphRAGSystem(user_input)
    context = analyzer.analyze()

    return [context]

In [None]:
contexts = [retrieve(q) for q in questions]

In [None]:
def generate_answer(question, context, model):
    system_prompt = f"""
    You are a trusted medical research assistant specializing in age-related macular degeneration (AMD). Your task is to provide concise, accurate, and factual answers about AMD research, limited to one sentence, based on the following additional relevant data:
    
    {context}
    
    Please adhere to these guidelines when formulating your response:

    1. Be Concise:
    Provide a direct answer in one sentence, focusing on the most essential information without unnecessary elaboration.
    2. Express Uncertainty Transparently:
    If the available information is insufficient to answer confidently, acknowledge this and specify what additional data or details would be needed to provide a more complete response.
    3. Maintain Accuracy and Integrity:
    Base your answer solely on verified data and the provided context. Do not fabricate any information or references.
    4. Communicate Professionally:
    Present your response in a clear, well-organized, and professional manner, ensuring complex information is accessible and easy to understand.
    
    Begin your response below.
    """
    
    response = ollama.chat(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": question}
        ],
        stream=False,
        options={
            "temperature": 0.1,
            "top_k": 50,
            "top_p": 0.9
        }
    )
    return response['message']['content']

In [None]:
answers = [generate_answer(q, c, "llama3.2") for q, c in zip(questions, contexts)]

In [None]:
def remove_think_tags(response):
    cleaned_content = re.sub(r'<think>.*?</think>', '', response, flags=re.DOTALL)

    return cleaned_content

In [None]:
answers = [remove_think_tags(q) for q in answers]

In [None]:
from datasets import Dataset

evaluation_dataset = Dataset.from_dict({
    "user_input": questions,
    "response": answers,
    "reference": ground_truths,
    "retrieved_contexts": contexts
})

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()

In [None]:
from langchain_openai import ChatOpenAI
from ragas.llms import LangchainLLMWrapper

llm = ChatOpenAI(model="gpt-4o-mini")
evaluator_llm = LangchainLLMWrapper(llm)

In [None]:
from ragas import evaluate
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness

result = evaluate(dataset=evaluation_dataset,metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness()],llm=evaluator_llm)


In [None]:
result

In [None]:
evaluation_data = pd.DataFrame({
    'question': questions,
    'ground_truth': ground_truths,
    'context': contexts,
    'generated_answer': answers
})

evaluation_data.to_csv('ragas_1hop_evaluation_data.csv', index=False)