# GraphRAG Relevance, Factuality and Synthesis Evaluation

## Step 0: environment set up

In [1]:
from dotenv import load_dotenv
import os
from langchain_neo4j import Neo4jGraph
from libs import create_vector_index
import pandas as pd
from conn import connect2Googlesheet,retrieval_rel_docs, get_concatenate_df, apply_metric
from libs import context_builder, chunk_finder, enhanced_chunk_finder
# Force reload of the .env file
load_dotenv()

True

In [2]:
# Connect to Neo4j database
try:
    graph = Neo4jGraph(
        url=os.getenv("NEO4J_URL"),
        username=os.getenv("NEO4J_USERNAME"),
        password=os.getenv("NEO4J_PASSWORD")
    )
    print("Connected to Neo4j database successfully.")
except ValueError as e:
    print(f"Could not connect to Neo4j database: {e}")

Connected to Neo4j database successfully.


## Step 1: Create vector index

In [3]:
#create_vector_index(graph, "entities")

## Step 2: Load questions from google sheet

In [4]:
spreadsheet = connect2Googlesheet()

# Select the worksheet: relevance
worksheet = spreadsheet.get_worksheet(2)  

# Get all records as a list of dictionaries
data = worksheet.get_all_records()

# Convert to Pandas DataFrame
df_MedQ = pd.DataFrame(data)
df_MedQ.head()

Unnamed: 0,condition,number,docs,Question,Mahmud's Note,status,comments,Unnamed: 8
0,ARDS,1,ACURASYS,Does early administration of neuromuscular blo...,Like,,,
1,ARDS,2,ACURASYS,Do patients with severe ARDS being treated wit...,Replace,fixed,,
2,ARDS,3,ROSE,"In patients with moderate to severe ARDS, does...",Maybe this question: In patients with moderate...,fixed,,
3,ARDS,4,ROSE,Do patients with moderate-to-severe ARDS have ...,Local question (not sure if this is the aim of...,fixed,Wrong concept since PEEP by itself is mandator...,Does the use of neuromuscular blockers in pati...
4,ARDS,5,FACTT,"Among patients with ALI/ARDS, does a conservat...",Local question (not sure if this is the aim of...,fixed,Check if studies defined conservative by CVP <...,


## Step 3: Relevance check for top K questions

In [5]:
def retrieval_rel_docs (graph, questions, top_k=5):
    top_k_questions = questions.head(top_k)
    # Initialize a list to store the results
    results = []
    # Iterate over the top k questions
    for index, row in top_k_questions.iterrows():
        question_number = index + 1  # Assuming the question number is the index + 1
        question = row['Question']  # Replace 'Question' with the actual column name for questions in df_MedQ
        
        # Generate response for the question
        # context = context_builder(graph, question, method="vector")
        output = enhanced_chunk_finder(graph, question)
        # Extract relevant documents from the response content
        # docs = response.choices[0].message.content  # Adjust this based on the actual response structure
        
        # Append the result to the list
        results.append({'Question number': question_number, 'Question': question, 'Retrieved': output})

    # Convert the results to a DataFrame
    results_df = pd.DataFrame(results, columns=['Question number', 'Question', 'Retrieved'])
    
    return results_df
#retrieval_rel_docs (graph, questions, top_k=1)

In [6]:
# Set pandas display options to show the full text content
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
topk = 25
results_df = retrieval_rel_docs(graph, df_MedQ, top_k=topk)
results_df
# results_df.to_csv('./outputs/retrieved_docs_results.csv', index=False)

Unnamed: 0,Question number,Question,Retrieved
0,1,Does early administration of neuromuscular blocking agents increases the ventilator free days?,"[ACURASYS.pdf, PESDCOMV.pdf, FMWSCPARDS.pdf]"
1,2,Do patients with severe ARDS being treated with neuromuscular blocking agents have increased muscle weakness?,"[LSPA.pdf, ETSDMV.pdf]"
2,3,"In patients with moderate to severe ARDS, does early use of continuous neuromuscular blockade improve mortality?","[ROSE.pdf, ENB.pdf, NBSARDS.pdf]"
3,4,Do patients with moderate-to-severe ARDS have a significance difference in mortality rate beween patients who recieved an early and continous cisatracurium infusion than those with usual care approach with lighter sedation targets?,"[LSPA.pdf, ENB.pdf, ROSE.pdf, TOF-ARDS.pdf, NBSARDS.pdf]"
4,5,"Among patients with ALI/ARDS, does a conservative fluid management strategy improves lung function and decrease ventilator days compared to liberal strategy?","[ENB.pdf, OSCILLATE.pdf, ROSE.pdf, ETSDMV.pdf, FACTT.pdf, TOF-ARDS.pdf, ACURASYS.pdf, FMWSCPARDS.pdf]"
5,6,Does a conservative fluid management strategy result in a significant difference in mortality as the primary outcome?,"[PAC in FACTT.pdf, FACTT.pdf]"
6,7,Does mechanical ventilation with a lower tidal volume result in decreased mortality with acute lung injury and acute respiratory distress syndrome?,"[OSCILLATE.pdf, ARDSNet.pdf]"
7,8,Does mechanical ventilation with a lower tidal volume result increased ventilator free days with patients with acute lung injury and acute respiratory distress syndrome?,"[ARDSNet.pdf, ARDSSRDRFMS.pdf, OSCILLATE.pdf, ROSE.pdf, ACURASYS.pdf]"
8,9,Prone positioning sessions in patients with severe ARDS significantly decreased mortality?,"[ESCPARDS.pdf, LSPA.pdf, CEIIUPPSARDS.pdf, ETSDMV.pdf]"
9,10,Does early application of high frequency oscillatory ventilation compared with ventilation strategy of low tidal voulme decrease mortality?,"[OSCILLATE.pdf, BMIMSARDS.pdf]"


In [None]:
# old results
old_results = pd.read_csv('./outputs/relevant_docs.csv')
old_results

Unnamed: 0.1,Unnamed: 0,Question,Docs
0,0,Relevace Q1,"ROSE, ACURASYS"
1,1,Relevace Q2,"ROSE, ACURASYS"
2,2,Relevace Q3,ROSE
3,3,Relevace Q4,ROSE
4,4,Relevace Q5,FACTT
5,5,Relevace Q6,FACTT
6,6,Relevace Q7,ARDSNet
7,7,Relevace Q8,ARDSNet
8,8,Relevace Q9,PROSEVA
9,9,Relevace Q10,OSCILLATE
