# Get the Top 5 Chunks to Check Paper's Stance

## Step 0: Environment Setup

In [7]:
from dotenv import load_dotenv # type: ignore
import os
from langchain_neo4j import Neo4jGraph # type: ignore
from libs import create_vector_index
import pandas as pd # type: ignore
from conn import connect2Googlesheet
from annolibs import get_all_chunks_per_paper, compare_embeddings
load_dotenv()

True

In [8]:
# Connect to Neo4j database
try:
    graph = Neo4jGraph(
        url=os.getenv("NEO4J_URL"),
        username=os.getenv("NEO4J_USERNAME"),
        password=os.getenv("NEO4J_PASSWORD")
    )
    print("Connected to Neo4j database successfully.")
except ValueError as e:
    print(f"Could not connect to Neo4j database: {e}")

# Check if the entities index exists
index_name = "entities"
query = "SHOW INDEXES YIELD name, type WHERE type = 'VECTOR' AND name = $index_name"

result = graph.query(query, params={"index_name": index_name})
if result:
    print("The 'entities' index already exists.")
else:
    create_vector_index(graph, "entities")

Connected to Neo4j database successfully.
The 'entities' index already exists.


## Step 1: Load Questions from Google Sheet

In [9]:
spreadsheet = connect2Googlesheet()

# Select the worksheet: relevance
worksheet = spreadsheet.get_worksheet(2)  

# Get all records as a list of dictionaries
data = worksheet.get_all_records()

# Convert to Pandas DataFrame
df_Paper = pd.DataFrame(data)
df_Paper.head()

Unnamed: 0,condition,number,docs,Question,Mahmud's Note,status,comments,Unnamed: 8
0,ARDS,1,ACURASYS,Does early administration of neuromuscular blocking agents increases the ventilator free days?,Like,,,
1,ARDS,2,ACURASYS,Do patients with severe ARDS being treated with neuromuscular blocking agents have increased muscle weakness?,Replace,fixed,,
2,ARDS,3,ROSE,"In patients with moderate to severe ARDS, does early use of continuous neuromuscular blockade improve mortality?","Maybe this question: In patients with moderate to severe ARDS, does early use of continuous neuromuscular blockade improve mortality?",fixed,,
3,ARDS,4,ROSE,Do patients with moderate-to-severe ARDS have a significance difference in mortality rate beween patients who recieved an early and continous cisatracurium infusion than those with usual care approach with lighter sedation targets?,Local question (not sure if this is the aim of your project) It will be nice as second step after proving the general summarization is working but focusing in general summarization would be priority in my opinion so you can have meanigful tool.,fixed,Wrong concept since PEEP by itself is mandatory component in ventilator.,"Does the use of neuromuscular blockers in patients with moderate-to-severe ARDS impact cardiovascular stability, particularly in terms of vasopressor requirements and hemodynamic effects, compared to sedation strategy without routine neuromuscular blockade?"
4,ARDS,5,FACTT,"Among patients with ALI/ARDS, does a conservative fluid management strategy improves lung function and decrease ventilator days compared to liberal strategy?","Local question (not sure if this is the aim of your project) consider (WikiJournal): In patients with ALI/ARDS that are intubated and receiving positive pressure ventilation, how does the conservative compare to the liberal fluid management strategy in reducing mortality?",fixed,Check if studies defined conservative by CVP < 4 or elese just dont mention how much the CVP (i prefer the last approach),


## Step 3: Get the chunks from each paper

In [10]:
# Usage example
#papers = sorted(list(set([doc.strip() for doc in df_Paper['docs'].tolist()])))

# Filter papers with 'Delirium' in condition
delirium_papers = df_Paper[df_Paper['condition'].str.contains('Delirium', case=False, na=False)]

# Display the filtered papers
print(f"Found {len(delirium_papers)} papers related to Delirium:")
display(delirium_papers)

# Get unique paper names
delirium_paper_names = delirium_papers['docs'].str.strip().unique()
print(f"Number of unique paper names: {len(delirium_paper_names)}")
delirium_papers_chunks = get_all_chunks_per_paper(graph, delirium_paper_names)

Found 27 papers related to Delirium:


Unnamed: 0,condition,number,docs,Question,Mahmud's Note,status,comments,Unnamed: 8
23,Delirium,24,AID-ICU,Does treatment with haloperidol lead to a significantly greater number of days alive and out of the hospital at 90 days than placebo when encountering patients in ICU with delirium?,,,Can you check if that outcome used in the articles?,yes it was
24,Delirium,25,MIND-USA,"Does the use of ziprasidone as compared with placebo in patients with acute respiratory failure, shock, hypoactive or hyperactive delirium in the ICU alter the duration of delirium?",,,,
25,Delirium,26,SPICE-III,Is there more adverse events in ICU patients who recieved early dexmedetomidine?,,,,
26,Delirium,27,SPICE-III,"Among patients undergoing mechanical ventilation in the ICU, does early dexmedetomidine for sedation result in a mortality rate similar to that of the usual-care group?",,,"Make it two questions. one about is it enough as mono-therapy and second question about mortality (also u can make it broad and not necessarly 90 days, just mortality)",noted
27,Delirium,28,SEDCOM,Was there a difference between dexmedetomidine and midazolam in time at targeted sedation level in mechanically ventilated ICU patients?,,,,
28,Delirium,29,SEDCOM,Is bradycardia an adverse effect in critically ill patients receiving dexmedetomidine?,,,"Make it direct questions ""What is the side effects of dexa?",We are trying to make it yes and no questions
29,Delirium,30,SEDCOM,"At comparable sedation levels, did dexmedetomidine-treated patients spend less time on the ventilator, experience less delirium, and develop less tachycardia and hypertension?",,,consider making it multiple questions,
35,Delirium/Sepsis,36,DPSMVAS,is there a difference in outcomes in patients who receive dexmedetomidine than propofol among mechanically ventilated adults with sepsis?,,,,
36,ARDS/Delirium,37,ETSDMV,,,,,
37,ARDS/Delirium,38,SMDLTSWCI,,,,,


Number of unique paper names: 24
Found 100 chunks in paper AID-ICU
Found 100 chunks in paper MIND-USA
Found 100 chunks in paper SPICE-III
Found 100 chunks in paper SEDCOM
Found 0 chunks in paper DPSMVAS
Found 100 chunks in paper ETSDMV
Found 98 chunks in paper SMDLTSWCI
Found 100 chunks in paper EDvsLSMENDS
Found 100 chunks in paper DDMLSMVICP
Found 100 chunks in paper HDDSICU
Found 100 chunks in paper ESDDVCIP
Found 69 chunks in paper RDSCI
Found 100 chunks in paper DIICU
Found 100 chunks in paper RAEATICU
Found 100 chunks in paper AMTD
Found 100 chunks in paper AUDDICU
Found 100 chunks in paper AUPTICUPP
Found 100 chunks in paper RUSGATICUD
Found 94 chunks in paper ATD
Found 100 chunks in paper DPD
Found 100 chunks in paper ND
Found 59 chunks in paper PDHP
Found 99 chunks in paper ASD
Found 100 chunks in paper FDSD


In [11]:
# check if the expected number of papers match the actual number of papers
# Get list of expected papers
expected_papers = delirium_paper_names.tolist()

# Get list of actual papers from chunks_of_paper directory
actual_papers = [f.replace('chunks_of_', '').replace('.csv', '') 
                 for f in os.listdir('./chunks_of_paper') 
                 if f.endswith('.csv')]

# Find missing papers
missing_papers = set(expected_papers) - set(actual_papers)

print("Missing papers:")
for paper in missing_papers:
    print(f"- {paper}")

Missing papers:
- DPSMVAS


## Step 4: Compare Question Embedding and Paper Chunk Embeddings

In [12]:
# Set pandas display options to show the full text content
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
test_question = df_Paper['Question'].iloc[0]
paper_name = "ACURASYS"  # without .pdf extension
top5chunks = compare_embeddings(
    question=test_question,
    paper=paper_name,
    top_k=5
)
top5chunks

No chunk data found for paper: ACURASYS
