# OpenEnded_Questions_Evaluation

In [1]:
import os
import sys

# Use the current working directory as the base
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

import json
import pandas as pd
from weaviate_rag.rag_system import KGRAGSystem
import ollama
from ragas import evaluate
from ragas.metrics import Faithfulness, AnswerRelevancy, ContextRecall
import re
from ragas import EvaluationDataset

  from .autonotebook import tqdm as notebook_tqdm
INFO:datasets:PyTorch version 2.5.1 available.
INFO:datasets:TensorFlow version 2.18.0 available.


In [2]:
# Load your JSON data
with open('/Users/alexlecu/PycharmProjects/LLMKGraph/backend/evaluation/data/grok_evaluation_dataset_mini/OpenEnded_Questions_mini.json', 'r') as f:
    data = json.load(f)

# Convert to DataFrame for RAGAS
df = pd.DataFrame(data)
df = df[['question', 'answer']]

# Later, add retrieved contexts and generated answers after querying your RAG
df['contexts'] = None  # Will be filled with retrieved passages
df['generated_answer'] = None  # Will be filled with RAG responses

In [3]:
def retrieve(user_input):
    rag_system = KGRAGSystem()
    kg_result = rag_system.query(user_input)

    return [kg_result["context"]]

In [4]:
def generate_answer(question, context, model):
    system_prompt = f"""
    You are a trusted medical research assistant specializing in age-related macular degeneration (AMD). Your task is to provide thorough, accurate, and detailed answers about AMD research based on the following additional relevant data:
    
    {context}
    
    Please adhere to these guidelines when formulating your response:
    
    1. Express Uncertainty Transparently:
    If the available information is insufficient to answer confidently, acknowledge this and specify what additional data or details would be needed to provide a more complete response.
    2. Maintain Accuracy and Integrity:
    Base your answer solely on verified data and the provided context. Do not fabricate any information or references.
    3. Communicate Professionally:
    Present your response in a clear, well-organized, and professional manner, ensuring complex information is accessible and easy to understand.
    
    Begin your response below.
    """
    
    response = ollama.chat(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": question}
        ],
        stream=False
    )
    return response['message']['content']

In [5]:
def remove_think_tags(response):
    cleaned_content = re.sub(r'<think>.*?</think>', '', response, flags=re.DOTALL)

    return cleaned_content

In [6]:
dataset = []

for index, row in df.iterrows():
    context = retrieve(row["question"])
    response = remove_think_tags(generate_answer(row["question"], context, "deepseek-r1"))

    dataset.append(
        {
            "user_input":row["question"],
            "retrieved_contexts":context,
            "response":response,
            "reference":row["answer"]
        }
    )

INFO:httpx:HTTP Request: GET http://localhost:8080/v1/.well-known/openid-configuration "HTTP/1.1 404 Not Found"
INFO:httpx:HTTP Request: GET http://localhost:8080/v1/meta "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://pypi.org/pypi/weaviate-client/json "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:8080/v1/schema "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:8080/v1/.well-known/openid-configuration "HTTP/1.1 404 Not Found"
INFO:httpx:HTTP Request: GET http://localhost:8080/v1/meta "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://pypi.org/pypi/weaviate-client/json "HTTP/1.1 200 OK"
INFO:KGRAG:Starting hybrid search for: What is age-related macular degeneration (AMD)?
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:8080/v1/.well-known/openid-configuration "HTTP/1.1 404 Not Found"
INFO:httpx:HTTP Request: GET http://localhost:8080/v1/meta "HTTP/1.1 200 OK"
INFO:ht

In [7]:
print(dataset)

[{'user_input': 'What is age-related macular degeneration (AMD)?', 'retrieved_contexts': ['Age Related Macular Degeneration (DISEASE) can cause Blindness (SYMPTOM). Age Related Macular Degeneration (DISEASE) can cause Choroidal Neovascular Membrane Formation (PROGRESSION). Age Related Macular Degeneration (DISEASE) presents with Ill Defined Choroidal Neovascularization (DISEASE). Age Related Macular Degeneration (DISEASE) presents with Well Defined Choroidal Neovascularization (BIOMARKER). Age Related Macular Degeneration (DISEASE) can cause Legal Blindness (SYMPTOM). Manipulate Mechanisms (TREATMENT) treats Age Related Macular Degeneration (DISEASE). Medical History (TREATMENT) diagnoses Age Related Macular Degeneration (DISEASE). Physical Examination (TREATMENT) diagnoses Age Related Macular Degeneration (DISEASE). Eye Examination (TREATMENT) diagnoses Age Related Macular Degeneration (DISEASE). Study (TREATMENT) improves Age Related Macular Degeneration (DISEASE). Early Age Related 

In [11]:
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('all-MiniLM-L6-v2')

INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: mps
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2


In [12]:
responses = []
references = []
similarity_scores = []

In [13]:
for entry in dataset:

    embeddings = model.encode([entry["reference"], entry["response"]], convert_to_tensor=True)
    similarity = util.cos_sim(embeddings[0], embeddings[1]).item()
    similarity_scores.append(similarity)
    
    responses.append(entry["response"])
    references.append(entry["reference"])
    print(entry["response"].replace("\n\n", "") + " " + str(entry["reference"]))

Batches: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.85s/it]


**Age-Related Macular Degeneration (AMD)** is a leading cause of vision loss affecting millions of people globally. It primarily affects older adults and typically occurs in both eyes symmetrically.### **Symptoms**
- **Blindness**: As a major symptom, AMD can lead to total or partial blindness.
- **Choroidal Neovascular Membrane Formation (CNVM)**: This is the most common form of AMD-related pathology. It causes ill-defined choroidodermatous neovascularization (CNV), with well-defined CNV being a biomarker for disease progression.### **Progression**
- **Early Age-Related Macular Degeneration (EARMD)**:
  - Presenters: Soft druses, retinal pigment abnormalities.
  - Symptom: Risk of developing visual loss.
  - Management: Treatable and reversible with early intervention.- **Late Age-Related Macular Degeneration (LA-RMD) or Neovascular Age-Related Macular Degeneration (NAMD)**:
  - Severe symptoms: Blindness, irreversible vision loss.
  - Complications: Pigment epithelial detachments as 

Batches: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 121.24it/s]


**Impact of Age-Related Macular Degeneration (AMD) on Vision**AMD significantly impacts vision through various stages and symptoms:1. **Early AMD**: Characterized by mild vision issues such as difficulty seeing colors and edge details. This stage often does not cause significant impairment.2. **Late AMD**: Leads to severe vision loss, potentially resulting in almost total blindness if untreated. This indicates a clear progression with age.3. **Some Impairment of Vision**: Suggests better-than-late-stage vision but may include partial central and peripheral vision loss, indicating AMD's varied impact.4. **Severely Impaired Vision**: Causes almost complete vision loss, highlighting AMD's severe effects when left untreated.**Risk Factors:**
- **Age**: Affects Some Impairment of Vision, with AMD worsening over time.
- **Choroidal Neovascularization (CNV)**: A related condition that can distort vision by creating dark patches and reducing brightness in the center. AMD often contributes to C

Batches: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.07it/s]


The two main types of age-related macular degeneration (AMD) are:1. **Exudative AMD**: Characterized by the presence of exudates, which can be serous or hemorrhagic, leading to detachment of the retinal pigment epithelium. This type is often associated with late-stage disease and treated with anti-VEGF agents like bevacizumab and ranibizumab.2. **Neovascular AMD**: This type involves the development of新生血管 (new blood vessels) beneath the retina, leading to sub-retinal membranes, edema, scarring, significant visual loss, and blindness. It is treated with anti-VEGF agents like Triamcinolone Acetonide, Verteporfin, Lucentis, among others.These two types represent distinct pathways of AMD progression and treatment approaches based on the data provided. The two main types are dry AMD, characterized by gradual thinning of the macula and drusen deposits, and wet AMD, involving abnormal blood vessel growth under the retina.


Batches: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.50it/s]


The causes of dry age-related macular degeneration (AMD) are not explicitly detailed in the provided data. However, based on general knowledge, several potential factors may contribute:1. **Genetic Factors**: A family history of AMD significantly increases the risk, making genetics a known contributing factor.2. **Early Changes**: The presence of drusen deposits and pigment clumping observed as biomarkers may indicate early stages of AMD progression, suggesting they might be precursors to disease development.3. **Environmental Influences**: Although not directly mentioned in the data, factors such as smoking, exposure to environmental pollutants, or genetic predispositions could play a role in AMD development.4. **Geographic Atrophy**: This initial sign of AMD might serve as an indicator of underlying genetic or early environmental factors contributing to disease progression.In summary, while specific causes for dry AMD aren't detailed here, known risk factors include genetics, early v

Batches: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 28.12it/s]

Wet AMD, as described in the context, primarily involves changes such as thickened drusen deposits around the macula and retinal involvement. However, based solely on the provided data, specific causes are not explicitly outlined. Generally, wet AMD is associated with these pathophysiological changes rather than detailed biological mechanisms.To summarize:- **Description of Wet AMD**: Involves drusen deposits, retinal involvement, and visual symptoms like loss of vision.
- **Biological Connection**: Linked to processes such as increased drusen thickness, which may contribute to visual decline.
- **Recommendation for Further Information**: Suggest consulting an expert or referring to existing studies for detailed causes.This response synthesizes the provided data while acknowledging the limitations in specific cause details. Wet AMD is caused by the growth of abnormal blood vessels under the retina that leak fluid or blood, damaging the macula and leading to rapid vision loss.





In [15]:
average_similarity = sum(similarity_scores) / len(similarity_scores)

In [16]:
print("\nEvaluation Summary:")
print(f"Average Similarity Score: {average_similarity:.3f} (Range: 0 to 1)")
print(f"Total Questions Evaluated: {len(dataset)}")


Evaluation Summary:
Average Similarity Score: 0.705 (Range: 0 to 1)
Total Questions Evaluated: 5


In [19]:
# Log incorrect answers for analysis
print("\nResponses for Manual Review (Similarity < 0.7):")
for i, (reference, response, item, score) in enumerate(zip(references, responses, dataset, similarity_scores)):
    if score < 0.7:
        print(f"Question {i+1}: {item['user_input']}")
        print(f"Ground Truth: {reference}")
        print(f"RAG Predicted: {response}")
        print(f"Similarity Score: {score:.3f}")
        print("-" * 50)


Responses for Manual Review (Similarity < 0.7):
Question 1: What is age-related macular degeneration (AMD)?
Ground Truth: AMD is an eye disease that blurs central vision due to damage to the macula, the part of the retina responsible for sharp, detailed vision, primarily affecting older adults.
RAG Predicted: 

**Age-Related Macular Degeneration (AMD)** is a leading cause of vision loss affecting millions of people globally. It primarily affects older adults and typically occurs in both eyes symmetrically.

### **Symptoms**
- **Blindness**: As a major symptom, AMD can lead to total or partial blindness.
- **Choroidal Neovascular Membrane Formation (CNVM)**: This is the most common form of AMD-related pathology. It causes ill-defined choroidodermatous neovascularization (CNV), with well-defined CNV being a biomarker for disease progression.

### **Progression**
- **Early Age-Related Macular Degeneration (EARMD)**:
  - Presenters: Soft druses, retinal pigment abnormalities.
  - Symptom: