# Get the Top 5 Chunks to Check Paper's Stance

## Step 0: Environment Setup

In [None]:
from dotenv import load_dotenv # type: ignore
import os
from langchain_neo4j import Neo4jGraph # type: ignore
from libs import create_vector_index
import pandas as pd # type: ignore
from conn import connect2Googlesheet
from annolibs import get_all_chunks_per_paper, compare_embeddings
load_dotenv()

True

In [2]:
# Connect to Neo4j database
try:
    graph = Neo4jGraph(
        url=os.getenv("NEO4J_URL"),
        username=os.getenv("NEO4J_USERNAME"),
        password=os.getenv("NEO4J_PASSWORD")
    )
    print("Connected to Neo4j database successfully.")
except ValueError as e:
    print(f"Could not connect to Neo4j database: {e}")

# Check if the entities index exists
index_name = "entities"
query = "SHOW INDEXES YIELD name, type WHERE type = 'VECTOR' AND name = $index_name"

result = graph.query(query, params={"index_name": index_name})
if result:
    print("The 'entities' index already exists.")
else:
    create_vector_index(graph, "entities")

Connected to Neo4j database successfully.
The 'entities' index already exists.


## Step 1: Load Questions from Google Sheet

In [3]:
spreadsheet = connect2Googlesheet()

# Select the worksheet: relevance
worksheet = spreadsheet.get_worksheet(2)  

# Get all records as a list of dictionaries
data = worksheet.get_all_records()

# Convert to Pandas DataFrame
df_Paper = pd.DataFrame(data)
df_Paper.head()

Unnamed: 0,condition,number,docs,Question,Mahmud's Note,status,comments,Unnamed: 8
0,ARDS,1,ACURASYS,Does early administration of neuromuscular blo...,Like,,,
1,ARDS,2,ACURASYS,Do patients with severe ARDS being treated wit...,Replace,fixed,,
2,ARDS,3,ROSE,"In patients with moderate to severe ARDS, does...",Maybe this question: In patients with moderate...,fixed,,
3,ARDS,4,ROSE,Do patients with moderate-to-severe ARDS have ...,Local question (not sure if this is the aim of...,fixed,Wrong concept since PEEP by itself is mandator...,Does the use of neuromuscular blockers in pati...
4,ARDS,5,FACTT,"Among patients with ALI/ARDS, does a conservat...",Local question (not sure if this is the aim of...,fixed,Check if studies defined conservative by CVP <...,


## Step 3: Get the chunks from each paper

In [9]:
# Usage example
papers = sorted(list(set([doc.strip() for doc in df_Paper['docs'].tolist()])))
paper_dfs = get_all_chunks_per_paper(graph, papers)


[#C06A]  _: <CONNECTION> error: Failed to read from defunct connection ResolvedIPv4Address(('34.28.184.63', 7687)) (ResolvedIPv4Address(('34.28.184.63', 7687))): OSError('No data')
Unable to retrieve routing information
Transaction failed and will be retried in 1.0502174141839158s (Unable to retrieve routing information)
[#C06C]  _: <CONNECTION> error: Failed to read from defunct connection IPv4Address(('si-983b61ab-4110.production-orch-0696.neo4j.io', 7687)) (ResolvedIPv4Address(('34.28.184.63', 7687))): OSError('No data')
Transaction failed and will be retried in 1.796375018516084s (Failed to read from defunct connection IPv4Address(('si-983b61ab-4110.production-orch-0696.neo4j.io', 7687)) (ResolvedIPv4Address(('34.28.184.63', 7687))))


Found 100 chunks in paper ACURASYS
Saved ./chunks_of_paper/chunks_of_ACURASYS.csv
Found 100 chunks in paper ADRENAL
Saved ./chunks_of_paper/chunks_of_ADRENAL.csv
Found 100 chunks in paper AID-ICU
Saved ./chunks_of_paper/chunks_of_AID-ICU.csv
Found 100 chunks in paper ALBIOS
Saved ./chunks_of_paper/chunks_of_ALBIOS.csv
Found 100 chunks in paper AMTD
Saved ./chunks_of_paper/chunks_of_AMTD.csv
Found 100 chunks in paper ANNANE
Saved ./chunks_of_paper/chunks_of_ANNANE.csv
Found 98 chunks in paper APROCCHSS
Saved ./chunks_of_paper/chunks_of_APROCCHSS.csv
Found 100 chunks in paper APV
Saved ./chunks_of_paper/chunks_of_APV.csv
Found 100 chunks in paper ARDSNet
Saved ./chunks_of_paper/chunks_of_ARDSNet.csv
Found 100 chunks in paper ARDSSRDRFMS
Saved ./chunks_of_paper/chunks_of_ARDSSRDRFMS.csv
Found 100 chunks in paper ARISE
Saved ./chunks_of_paper/chunks_of_ARISE.csv
Found 99 chunks in paper ASD
Saved ./chunks_of_paper/chunks_of_ASD.csv
Found 94 chunks in paper ATD
Saved ./chunks_of_paper/chunk

## Step 4: Compare Question Embedding and Paper Chunk Embeddings

In [8]:
# Set pandas display options to show the full text content
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
test_question = df_Paper['Question'].iloc[0]
paper_name = "ACURASYS"  # without .pdf extension
top5chunks = compare_embeddings(
    question=test_question,
    paper=paper_name,
    top_k=5
)
top5chunks

Unnamed: 0,paper_name,position,chunk_text,similarity_score
63,ACURASYS.pdf,64,"Neuromuscular Blocking Agents in ARDS n engl j med 363;12 nejm.org september 16, 2010 1111 without the aid of a ventilator, for a period of at least 48 hours for weaning from the ventilator to be considered successful. The number of ventilator- free days was considered to be zero for patients who were weaned from mechanical ventilation but who died before day 28 or",0.7262
34,ACURASYS.pdf,35,"Neuromuscular Blocking Agents in ARDS n engl j med 363;12 nejm.org september 16, 2010 1109 sure were adjusted as in the Prospective, Random- ized, Multi-Center Trial of 12 ml/kg Tidal Volume Positive Pressure Ventilation for Treatment of Acute Lung Injury and Acute Respiratory Distress Syndrome (ARMA).14 An open-label, rapid, intravenous injection",0.6263
10,ACURASYS.pdf,11,"mechanical ventilation for the acute respiratory distress syn- drome (ARDS), neuromuscular blocking agents may improve oxygenation and de- crease ventilator-induced lung injury but may also cause muscle weakness. We evalu- ated clinical outcomes after 2 days of therapy with neuromuscular blocking agents in patients with early, severe ARDS. Methods In this multicenter, double-blind trial, 340 patients presenting to the intensive care unit (ICU) with an onset",0.6136
22,ACURASYS.pdf,23,"did not receive a blocking agent; P = 0.06). However, this study was not designed or powered to evaluate mortality. Thus, the benefits and risks of adjunctive therapy with neuromuscu- lar blocking agents in patients with ARDS who were receiving lung-protective mechanical venti- lation14 require further evaluation. We conducted a multicenter, randomized, pla- cebo-controlled, double-blind trial to determine",0.612
16,ACURASYS.pdf,17,"�= 0.05). The rate of ICU-acquired paresis did not differ significantly between the two groups. Conclusions In patients with severe ARDS, early administration of a neuromuscular blocking agent improved the adjusted 90-day survival and increased the time off the ventilator without increasing muscle weakness. (Funded by Assistance Publique–Hôpitaux de Marseille and the Programme Hospitalier de Recherche Clinique Rég",0.5906
