# GraphRAG Relevance, Factuality and Synthesis Evaluation

## Step 0: environment set up

In [1]:
from dotenv import load_dotenv
import os
from langchain_neo4j import Neo4jGraph
from libs import create_vector_index
import pandas as pd
from conn import connect2Googlesheet,retrieval_rel_docs, get_concatenate_df, apply_metric
from libs import context_builder, chunk_finder, enhanced_chunk_finder
# Force reload of the .env file
load_dotenv()

True

In [2]:
# Connect to Neo4j database
try:
    graph = Neo4jGraph(
        url=os.getenv("NEO4J_URL"),
        username=os.getenv("NEO4J_USERNAME"),
        password=os.getenv("NEO4J_PASSWORD")
    )
    print("Connected to Neo4j database successfully.")
except ValueError as e:
    print(f"Could not connect to Neo4j database: {e}")

Connected to Neo4j database successfully.


## Step 1: Create vector index

In [3]:
#create_vector_index(graph, "entities")

## Step 2: Load questions from google sheet

In [4]:
spreadsheet = connect2Googlesheet()

# Select the worksheet: relevance
worksheet = spreadsheet.get_worksheet(2)  

# Get all records as a list of dictionaries
data = worksheet.get_all_records()

# Convert to Pandas DataFrame
df_MedQ = pd.DataFrame(data)
df_MedQ.head()

Unnamed: 0,condition,number,docs,Question,Mahmud's Note,status,comments,Unnamed: 8
0,ARDS,1,ACURASYS,Does early administration of neuromuscular blo...,Like,,,
1,ARDS,2,ACURASYS,Do patients with severe ARDS being treated wit...,Replace,fixed,,
2,ARDS,3,ROSE,"In patients with moderate to severe ARDS, does...",Maybe this question: In patients with moderate...,fixed,,
3,ARDS,4,ROSE,Do patients with moderate-to-severe ARDS have ...,Local question (not sure if this is the aim of...,fixed,Wrong concept since PEEP by itself is mandator...,Does the use of neuromuscular blockers in pati...
4,ARDS,5,FACTT,"Among patients with ALI/ARDS, does a conservat...",Local question (not sure if this is the aim of...,fixed,Check if studies defined conservative by CVP <...,


## Step 3: Relevance check for top K questions

In [5]:
def retrieval_rel_docs (graph, questions, top_k=5):
    top_k_questions = questions.head(top_k)
    # Initialize a list to store the results
    results = []
    # Iterate over the top k questions
    for index, row in top_k_questions.iterrows():
        question_number = index + 1  # Assuming the question number is the index + 1
        question = row['Question']  # Replace 'Question' with the actual column name for questions in df_MedQ
        
        # Generate response for the question
        # context = context_builder(graph, question, method="vector")
        filenames , output = enhanced_chunk_finder(graph, question)
        # Extract relevant documents from the response content
        # docs = response.choices[0].message.content  # Adjust this based on the actual response structure
        # Iterate over the output to extract chunk details
        for chunk in output:
            file_name, chunk_text, page_number, position , similarity = chunk
            # Append the result to the list
            results.append({
                'Question number': question_number,
                'Question': question,
                'Retrieved FileName': file_name,
                'Chunk Text': chunk_text,
                'Page Number': page_number,
                'Position': position,
                'Similarity': similarity
            })
        # Append the result to the list
        #results.append({'Question number': question_number, 'Question': question, 'Retrieved FileName': filenames, 'Chunks': output})

    # Convert the results to a DataFrame
    results_df = pd.DataFrame(results, columns=[
        'Question number', 'Question', 'Retrieved FileName', 'Chunk Text', 'Page Number', 'Position' , 'Similarity'
    ])
    
    return results_df
#retrieval_rel_docs (graph, questions, top_k=1)

In [6]:
# Set pandas display options to show the full text content
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
topk = 10
results_df = retrieval_rel_docs(graph, df_MedQ, top_k=topk)
results_df
# results_df.to_csv('./outputs/retrieved_docs_results.csv', index=False)

Unnamed: 0,Question number,Question,Retrieved FileName,Chunk Text,Page Number,Position,Similarity
0,1,Does early administration of neuromuscular blocking agents increases the ventilator free days?,SMDLTSWCI.pdf,"and interactive patient, a lower incidence of delirium, fewer days on ven- tilator, and an earlier ICU discharge [4, 16–27]. However, previous studies reported dexmedetomidine was more applicable for light to moderate sedation than deep seda- tion despite the use of the maximum dose of dexmedeto- midine (1.4 µg/kg/h) [19, 20, 23, 28] and was associated",2,20,0.824856
1,1,Does early administration of neuromuscular blocking agents increases the ventilator free days?,TOF-ARDS.pdf,: mechanical ventilation; NMBAs: neuromuscular-blocking agents; PaO2: arterial partial pressure of oxygen; PBW: predicted body weight; PEEP: positive end-expiratory pressure; Pplat: end- inspiratory plateau pressure; RASS: Richmond Agitation-Sedation Scale; RCTs: randomized controlled trials; SAPS 2: Simplified Acute Severity Score 2; SOFA: Sepsis-related,8,98,0.809941
2,1,Does early administration of neuromuscular blocking agents increases the ventilator free days?,PESDCOMV.pdf,"renal replacement therapy, and neuromuscular blockade. Clinical outcomes, including ICU discharge, ventilator days, and survival status, were also recorded. To determine the impact of sedation depth, the cohort was divided into two groups (the light and deep sedation). The primary outcome of this study was time to extubation, ICU length of stay, and in- hospital mortality. Time to extubation was defined as the number of days from initiation",4,51,0.810721
3,1,Does early administration of neuromuscular blocking agents increases the ventilator free days?,ACURASYS.pdf,"rium group during the first 48 hours after enrollment. However, the two groups did not dif- fer significantly with respect to the number of pa- tients given at least one open-label cisatracuri­um bolus during the entire ICU stay after enrollment (Table 8 in the Supplementary Appendix). The re- quired dose of sedatives or analgesics was similar in the two groups during the first week of the study (Table 9 in the Supplementary Appendix). Safety Bradycardia developed during the cisatracurium infusion in one patient. No other side effects were reported. Discussion Treatment with the neuromuscular blocking agent cisatracurium for 48 hours early in the course of severe ARDS improved the adjusted 90-day sur- vival rate, increased the numbers of ventilator- free days and days outside the ICU, and decreased the incidence of barotrauma during the first 90 days",7,47,0.812738
4,1,Does early administration of neuromuscular blocking agents increases the ventilator free days?,SMDLTSWCI.pdf,"11, 14, 15]. Unlike other sedatives, dexmedetomidine—a highly selective central alpha-2 adrenergic agonist with both analgesic and sedative effects, notable for its ability to provide light sedation, analgesia, and physiologic-like sleep, as well as its minimal effect on respiratory drive— has been shown to result in a more awake and interactive patient, a lower incidence of delirium, fewer days on ven- tilator",2,19,0.824856
5,1,Does early administration of neuromuscular blocking agents increases the ventilator free days?,ESCPARDS.pdf,"osteroids in patients with persistent ARDS. Methods We randomly assigned 180 patients with ARDS of at least seven days’ duration to receive either methylprednisolone or placebo in a double-blind fashion. The primary end point was mortality at 60 days. Secondary end points included the number of ventilator-free days and organ-failure–free days, biochemical markers of inflamma- tion and fibroproliferation, and infectious complications. Results At 60",1,6,0.806533
6,1,Does early administration of neuromuscular blocking agents increases the ventilator free days?,FMWSCPARDS.pdf,"significantly lower cumulative fluid balance by 5,074 mL over 7 days than FACTT Liberal. In subjects without baseline shock, in whom the fluid protocol was applied throughout the duration of the study, management with FACTT Lite resulted in an equivalent cumulative fluid balance to FACTT Conservative. FACTT Lite had similar clinical outcomes of ventilator-free days, ICU-free days, and mortality as FACTT Conservative and significantly greater ventilator-",6,57,0.858627
7,1,Does early administration of neuromuscular blocking agents increases the ventilator free days?,TOF-ARDS.pdf,breviations ARDS: acute respiratory distress syndrome; ECCO2R: extra-corporeal ­CO2 removal; ECMO: extra-corporeal membrane oxygenation; FiO2: fraction of inspired oxygen; ICU: intensive care unit; ICUAW: intensive care unit-acquired weakness; iNO: inhaled nitric oxide; MV: mechanical ventilation; NMBAs: neuromuscular-blocking agents; PaO2,8,97,0.809941
8,2,Do patients with severe ARDS being treated with neuromuscular blocking agents have increased muscle weakness?,NBSARDS.pdf,"exploiting the increased clinical use of NMB in patients with moderate to severe ARDS and testing the potential benefit of therapeutic hypothermia in patients with ARDS who were already receiving NMB. Our Cooling to Help Injured Lungs (CHILL) pilot study supported the feasibil- ity of this strategy.18 However, successful completion of such a clinical trial depends on the continued practice of using NMB to man- age patients with ARDS. Although NMB has been",2,13,0.873604
9,2,Do patients with severe ARDS being treated with neuromuscular blocking agents have increased muscle weakness?,ACURASYS.pdf,"3% (95% CI, 26.5 to 40.9) with placebo (P = 0.05). The rate of ICU-acquired paresis did not differ significantly between the two groups. Conclusions In patients with severe ARDS, early administration of a neuromuscular blocking agent improved the adjusted 90-day survival and increased the time off the ventilator without increasing muscle weakness. (Funded by Assistance Publique–Hôpitaux de Marseille and the Programme Hospitalier de Recherche Clinique Régional 2004-26 of the French Ministry of Health; ClinicalTrials.gov number, NCT00299650.) The New England Journal of Medicine is produced by NEJM Group, a division of the Massachusetts Medical Society. Downloaded from nejm.org on January 15, 2025. For personal use only. No other uses without permission. Copyright © 2010 Massachusetts Medical Society.",1,8,0.808846


In [7]:
# old results
# old_results = pd.read_csv('./outputs/relevant_docs.csv')
# old_results