# Get the Top 5 Chunks to Check Paper's Stance

## Step 0: Environment Setup

In [1]:
from dotenv import load_dotenv # type: ignore
import os
from langchain_neo4j import Neo4jGraph # type: ignore
from libs import create_vector_index
import pandas as pd # type: ignore
from conn import connect2Googlesheet
from annolibs import get_all_chunks_per_paper, compare_embeddings
from openai import OpenAI
load_dotenv()

True

In [2]:
# Connect to Neo4j database
try:
    graph = Neo4jGraph(
        url=os.getenv("NEO4J_URL"),
        username=os.getenv("NEO4J_USERNAME"),
        password=os.getenv("NEO4J_PASSWORD")
    )
    print("Connected to Neo4j database successfully.")
except ValueError as e:
    print(f"Could not connect to Neo4j database: {e}")

# Check if the entities index exists
index_name = "entities"
query = "SHOW INDEXES YIELD name, type WHERE type = 'VECTOR' AND name = $index_name"

result = graph.query(query, params={"index_name": index_name})
if result:
    print("The 'entities' index already exists.")
else:
    create_vector_index(graph, "entities")

Connected to Neo4j database successfully.
The 'entities' index already exists.


## Step 1: Load Questions from Google Sheet

In [3]:
spreadsheet = connect2Googlesheet()

# Select the worksheet: relevance
worksheet = spreadsheet.get_worksheet(2)  

# Get all records as a list of dictionaries
data = worksheet.get_all_records()

# Convert to Pandas DataFrame
df_Paper = pd.DataFrame(data)
df_Paper.head()

Unnamed: 0,condition,number,docs,Question,Mahmud's Note,status,comments,Unnamed: 8
0,ARDS,1,ACURASYS,Does early administration of neuromuscular blo...,Like,,,
1,ARDS,2,ACURASYS,Do patients with severe ARDS being treated wit...,Replace,fixed,,
2,ARDS,3,ROSE,"In patients with moderate to severe ARDS, does...",Maybe this question: In patients with moderate...,fixed,,
3,ARDS,4,ROSE,Do patients with moderate-to-severe ARDS have ...,Local question (not sure if this is the aim of...,fixed,Wrong concept since PEEP by itself is mandator...,Does the use of neuromuscular blockers in pati...
4,ARDS,5,FACTT,"Among patients with ALI/ARDS, does a conservat...",Local question (not sure if this is the aim of...,fixed,Check if studies defined conservative by CVP <...,


## Step 3: Get the chunks from each paper

In [7]:
# Filter papers with 'Delirium' in condition
delirium_papers = df_Paper[df_Paper['condition'].str.contains('Delirium', case=False, na=False)]

# Display the filtered papers
print(f"Found {len(delirium_papers)} papers related to Delirium:")
# display(delirium_papers)

# Get unique paper names
delirium_paper_names = delirium_papers['docs'].str.strip().unique()
print(f"Number of unique paper names: {len(delirium_paper_names)}")
delirium_papers_chunks = get_all_chunks_per_paper(graph, delirium_paper_names)

Found 27 papers related to Delirium:
Number of unique paper names: 24
Found 100 chunks in paper AID-ICU
Found 100 chunks in paper MIND-USA
Found 100 chunks in paper SPICE-III
Found 100 chunks in paper SEDCOM
Found 0 chunks in paper DPSMVAS
Found 100 chunks in paper ETSDMV
Found 98 chunks in paper SMDLTSWCI
Found 100 chunks in paper EDvsLSMENDS
Found 100 chunks in paper DDMLSMVICP
Found 100 chunks in paper HDDSICU
Found 100 chunks in paper ESDDVCIP
Found 69 chunks in paper RDSCI
Found 100 chunks in paper DIICU
Found 100 chunks in paper RAEATICU
Found 100 chunks in paper AMTD
Found 100 chunks in paper AUDDICU
Found 100 chunks in paper AUPTICUPP
Found 100 chunks in paper RUSGATICUD
Found 94 chunks in paper ATD
Found 100 chunks in paper DPD
Found 100 chunks in paper ND
Found 59 chunks in paper PDHP
Found 99 chunks in paper ASD
Found 100 chunks in paper FDSD


In [5]:
# check if the expected number of papers match the actual number of papers
# Get list of expected papers
expected_papers = delirium_paper_names.tolist()

# Get list of actual papers from chunks_of_paper directory
actual_papers = [f.replace('chunks_of_', '').replace('.csv', '') 
                 for f in os.listdir('./chunks_of_paper') 
                 if f.endswith('.csv')]

# Find missing papers
missing_papers = set(expected_papers) - set(actual_papers)

print("Missing papers:")
for paper in missing_papers:
    print(f"- {paper}")

Missing papers:
- DPSMVAS


## Step 4: Compare Question Embedding and Paper Chunk Embeddings

In [6]:
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise ValueError("OPENAI_API_KEY not found in .env file")

client = OpenAI(api_key=api_key)

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

idx = 35 # index of the question. Change this to test different questions
test_question = df_Paper['Question'].iloc[idx]
print(f"Question {idx+1}: {test_question}")

for i, paper in enumerate(delirium_paper_names):
    print(f"\nPaper {i+1}: {paper}")
    paper_name = str(paper)  # without .pdf extension
    top5chunks = compare_embeddings(
        question=test_question,
        paper=paper_name,
        top_k=5
    )
    
    prompt = f"Does the following question have any relevance to the text chunks below? Keep your answer short\n\nQuestion: {test_question}\n\nText Chunks:\n" + \
         '\n'.join([f"{i+1}. {text}" for i, text in enumerate(top5chunks['chunk_text'])])
    
    try:
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            max_tokens=200
        )
    except Exception as e:
        print(f"Error: {e}")
        continue

    output = response.choices[0].message.content.strip()
    display(output)

Question 36: is there a difference in outcomes in patients who receive dexmedetomidine than propofol among mechanically ventilated adults with sepsis?

Paper 1: AID-ICU


'No, there is no mention of dexmedetomidine in the text chunks provided.'


Paper 2: MIND-USA


'No.'


Paper 3: SPICE-III


'Yes, the question is relevant as it pertains to comparing outcomes in patients receiving dexmedetomidine versus propofol among mechanically ventilated adults with sepsis.'


Paper 4: SEDCOM


'Yes, there is relevance to the question in the text chunks provided, as they discuss a study comparing dexmedetomidine to midazolam for sedation of critically ill patients.'


Paper 5: DPSMVAS
No chunk data found for paper: DPSMVAS


'No, the text chunks do not address outcomes in patients who receive dexmedetomidine versus propofol among mechanically ventilated adults with sepsis.'


Paper 6: ETSDMV


'Yes, the question of whether there is a difference in outcomes between dexmedetomidine and propofol in mechanically ventilated adults with sepsis is relevant to the text chunks provided.'


Paper 7: SMDLTSWCI


'Yes, the question is relevant as it pertains to comparing outcomes of patients who receive dexmedetomidine versus propofol among mechanically ventilated adults with sepsis.'


Paper 8: EDvsLSMENDS


'Yes, the question is relevant to the text chunks as they discuss the outcomes of patients with sepsis who receive dexmedetomidine compared to propofol.'


Paper 9: DDMLSMVICP


'Yes, it is relevant because the text includes information about the SEDCOM trial, which compared dexmedetomidine with other sedatives.'


Paper 10: HDDSICU


'Yes, the question is relevant to the text chunks as they discuss the use of dexmedetomidine in mechanically ventilated patients and the outcomes of different doses on sedation and other parameters.'


Paper 11: ESDDVCIP


'Yes, the question is relevant as the text chunks discuss the outcomes of using dexmedetomidine in mechanically ventilated adults, including differences in mortality rates based on age and operative status.'


Paper 12: RDSCI


'Yes, the question is relevant to the text chunks as they mention studies comparing outcomes of patients with sepsis who receive dexmedetomidine versus propofol.'


Paper 13: DIICU


'Yes, it does have relevance as it compares outcomes in patients sedated with dexmedetomidine to those sedated with propofol.'


Paper 14: RAEATICU


'Yes, the question is relevant as it involves comparing outcomes in patients who receive dexmedetomidine versus propofol among mechanically ventilated adults with sepsis.'


Paper 15: AMTD


'No.'


Paper 16: AUDDICU


'No.'


Paper 17: AUPTICUPP


'No.'


Paper 18: RUSGATICUD


'No, the question is not relevant to the text chunks provided.'


Paper 19: ATD


'No, there is no relevance to the question in the provided text chunks.'


Paper 20: DPD


'No, the question is about outcomes in patients receiving different medications for sepsis, while the text chunks are about delirium and catatonia in hospitalized patients.'


Paper 21: ND


'No.'


Paper 22: PDHP


'No, the question is not relevant to the text chunks provided.'


Paper 23: ASD


'No, the question is not relevant to the text chunks provided.'


Paper 24: FDSD


'Yes, the question is relevant as it involves comparing outcomes in patients who receive different medications (dexmedetomidine vs. propofol) among mechanically ventilated adults with sepsis, which is related to the text discussing delirium in adult in-patients.'