In [1]:
import pandas as pd
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama
import json



## Load Data (Pubmed pdf CSV containing title,abstrct and the name of the pdf stored)

In [2]:
data = pd.read_csv('pubmed_pdfs.csv')
data = data[data.titles != 'Not Found']
data=data[data.pdfs != 'No pdf']
len(data)

300

In [3]:
# Basic text cleaning function 
def clean_abstracts(text):
    return text.replace("Abstract\n","") 
data.abstracts = data.abstracts.apply(lambda x: clean_abstracts(x))

## Creating a Class for the Meidcal Literauture Review System. 

In [5]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama

class MedicalLiteratureReview:
    def __init__(self, temperature=0, format='json'):
        # Initialize the ChatOllama model with the given parameters
        self.llm = None
        self.prompt = None
        self.chain=None
        self.responses = []
        self.temperature = temperature
        self.format = format
        # self.num_gpu = num_gpu
        # self.num_predict=num_predict


    def setup_llm(self,model_name = 'llama3'):  
        self.llm = ChatOllama(
            model=model_name,
            temperature=self.temperature,
            format=self.format
        )
    
    def setup_chain(self):
        # Initialize the ChatPromptTemplate with the given message format
        system_prompt = '''You are a Medical Literature Review Expert. Based on the given title and abstract, answer the following question correctly and provide reasons to support your answer. The output should be in JSON format with the following fields:\n- \"answer\": Yes, No, or Unknown\n- \"reason\": A brief explanation for your decision'''
        self.prompt = ChatPromptTemplate.from_messages(
            [
                ("system", "Title: {title}\nAbstract: {abstract}\n" + system_prompt),
                ("human", "\nQuestion: {input}"),
            ]
        )

        self.chain = self.prompt | self.llm
    

    def ask_questions(self, title, abstract, questions):
        self.responses=[]
        for question in questions: 
            # Process the request through the chain
            response = self.chain.invoke({"title": title,"abstract": abstract,"input": question})
            self.responses.append(response.content)
        # Return the response
        return self.responses

review_expert = MedicalLiteratureReview()
review_expert.setup_llm()
review_expert.setup_chain()

In [None]:
# Import necessary classes from langchain_core and langchain_ollama libraries
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama

# Define a class for conducting medical literature reviews
class MedicalLiteratureReview:
    def __init__(self, temperature=0, format='json'):
        # Initialize the class with default temperature and output format
        # The temperature controls the randomness of the model's responses
        # The format specifies the output format (e.g., 'json')
        
        # Initialize the ChatOllama model and other necessary attributes as None
        self.llm = None
        self.prompt = None
        self.chain = None
        self.responses = []  # To store responses from the model
        self.temperature = temperature  # Temperature setting for the model
        self.format = format  # Output format (e.g., JSON)
        
        # Placeholder for additional attributes such as num_gpu, num_predict if needed
        # self.num_gpu = num_gpu
        # self.num_predict = num_predict

    # Method to set up the language model (LLM) using ChatOllama
    def setup_llm(self, model_name='llama3'):
        # Initialize the ChatOllama model with the specified model name
        # This sets up the LLM with the desired model, temperature, and output format
        self.llm = ChatOllama(
            model=model_name,
            temperature=self.temperature,
            format=self.format
        )

    # Method to set up the prompt template and the processing chain
    def setup_chain(self):
        # Define a system prompt that instructs the model to act as a Medical Literature Review Expert
        # The system prompt includes instructions on how to format the output in JSON
        system_prompt = '''You are a Medical Literature Review Expert. Based on the given title and abstract, answer the following question correctly and provide reasons to support your answer. The output should be in JSON format with the following fields:\n- "answer": Yes, No, or Unknown\n- "reason": A brief explanation for your decision'''

        # Create a ChatPromptTemplate from the defined system prompt and human input
        self.prompt = ChatPromptTemplate.from_messages(
            [
                # The system message provides the title, abstract, and instructions for the model
                ("system", "Title: {title}\nAbstract: {abstract}\n" + system_prompt),
                # The human message is the question to be answered based on the title and abstract
                ("human", "\nQuestion: {input}"),
            ]
        )

        # Combine the prompt template with the LLM to create a processing chain
        self.chain = self.prompt | self.llm

    # Method to ask questions based on the provided title and abstract
    def ask_questions(self, title, abstract, questions):
        # Re-initialize responses to empty list to store responses
        self.responses = []
        
        # Iterate over each question provided in the list
        for question in questions:
            # Invoke the processing chain with the title, abstract, and current question
            response = self.chain.invoke({"title": title, "abstract": abstract, "input": question})
            # Append the model's response to the responses list
            self.responses.append(response.content)
        
        # Return the list of responses
        return self.responses



review_expert = MedicalLiteratureReview()  # Create an instance of MedicalLiteratureReview
review_expert.setup_llm()                  # Set up the language model
review_expert.setup_chain()                # Set up the prompt template and processing chain

# You can now use the `ask_questions` method to evaluate medical literature based on titles, abstracts, and questions.
#review_expert.ask_questions(title,abstract,list_of_questions)  


## Run Models on the decided input questions 

## LLama3

In [6]:
from tqdm import tqdm

review_expert = MedicalLiteratureReview()
review_expert.setup_llm()
review_expert.setup_chain()


titles = list(data.titles)
abstracts = list(data.abstracts)
papers = [(a,b) for a,b in zip(titles,abstracts)]


study_design = ['Is it is a review article?', "Is this paper a meta analysis?", "Is this paper an editorial?", "Is this paper a conference proceedings?", "Is this paper a letter to editors?", "Is this paper abstract only?"]


population = ["Does this paper mention human patients or participants above the age of 18?",
             "Does this paper mention the use of any vestubular testing methods? (A few examples are caloric test, rotatory chair test, (video) head impulse test)", "Based on the abstract, can you determine if the population of this research is related to either of these - unilateral vestibular hypofunction or Unilateral vestibular loss or Unilateral vestibular failure or Unilateral vestibular dysfunction or Unilateral vestibulopathy or Unilateral vestibular deafferentation or Unilateral vestibular disease or Unilateral vestibular disorder or Unilateral vestibular syndrome?"]


outcome = [
        "Does this paper describe any chronic or Persistent or Enduring or Permanent or Uncompensated or Continuous or Recurrent symptomps, signs or complaints?",
        "Does this paper describe any patient-reported questionnaires (Dizziness Handicap Inventory, or Vertigo Symptom Scales or Visual Analogue Scale)?",
        "Does this paper mention duration of symptoms to be more than 3 months?"]

study_outputs = []
pop_outputs = []
out_outputs = []

for title,abstract in tqdm(papers): 
    study_outputs.append(review_expert.ask_questions( title, abstract, study_design))
    pop_outputs.append(review_expert.ask_questions( title, abstract, population))
    out_outputs.append(review_expert.ask_questions( title, abstract, outcome))
    

100%|██████████| 300/300 [2:55:47<00:00, 35.16s/it]  


In [4]:
data1 = {
    'StudyDesign':study_outputs,
    'Population': pop_outputs,
    'Outcome': out_outputs
}
df = pd.DataFrame(data1)

df.to_csv('first_screening_outputs/llama3.csv', index=False)
print("DataFrame saved")

DataFrame saved


## LLama3.1

In [9]:
from tqdm import tqdm

review_expert = MedicalLiteratureReview()
review_expert.setup_llm("llama3.1")
review_expert.setup_chain()


titles = list(data.titles)
abstracts = list(data.abstracts)
papers = [(a,b) for a,b in zip(titles,abstracts)]


study_design = ['Is it is a review article?', "Is this paper a meta analysis?", "Is this paper an editorial?", "Is this paper a conference proceedings?", "Is this paper a letter to editors?", "Is this paper abstract only?","Is this paper written other than English, French and German?" ]


population = ["Does this paper mention human patients or participants above the age of 18?",
             "Does this paper mention the use of any vestubular testing methods? (A few examples are caloric test, rotatory chair test, (video) head impulse test)", "Based on the abstract, can you determine if the population of this research is related to either of these - unilateral vestibular hypofunction or Unilateral vestibular loss or Unilateral vestibular failure or Unilateral vestibular dysfunction or Unilateral vestibulopathy or Unilateral vestibular deafferentation or Unilateral vestibular disease or Unilateral vestibular disorder or Unilateral vestibular syndrome?"]


outcome = [
        "Does this paper describe any chronic or Persistent or Enduring or Permanent or Uncompensated or Continuous or Recurrent symptomps, signs or complaints?",
        "Does this paper describe any patient-reported questionnaires (Dizziness Handicap Inventory, or Vertigo Symptom Scales or Visual Analogue Scale)?",
        "Does this paper mention duration of symptoms to be more than 3 months?"]

study_outputs = []
pop_outputs = []
out_outputs = []

for title,abstract in tqdm(papers): 
    study_outputs.append(review_expert.ask_questions( title, abstract, study_design))
    pop_outputs.append(review_expert.ask_questions( title, abstract, population))
    out_outputs.append(review_expert.ask_questions( title, abstract, outcome))
    

100%|██████████| 300/300 [3:05:29<00:00, 37.10s/it]  


In [5]:
data1 = {
    'StudyDesign':study_outputs,
    'Population': pop_outputs,
    'Outcome': out_outputs
}
df = pd.DataFrame(data1)

df.to_csv('first_screening_outputs/llama3.1.csv', index=False)
print("DataFrame saved")

DataFrame saved


## Gemma2

In [11]:
from tqdm import tqdm

review_expert = MedicalLiteratureReview()
review_expert.setup_llm("gemma2")
review_expert.setup_chain()


titles = list(data.titles)
abstracts = list(data.abstracts)
papers = [(a,b) for a,b in zip(titles,abstracts)]


study_design = ['Is it is a review article?', "Is this paper a meta analysis?", "Is this paper an editorial?", "Is this paper a conference proceedings?", "Is this paper a letter to editors?", "Is this paper abstract only?","Is this paper written other than English, French and German?" ]


population = ["Does this paper mention human patients or participants above the age of 18?",
             "Does this paper mention the use of any vestubular testing methods? (A few examples are caloric test, rotatory chair test, (video) head impulse test)", "Based on the abstract, can you determine if the population of this research is related to either of these - unilateral vestibular hypofunction or Unilateral vestibular loss or Unilateral vestibular failure or Unilateral vestibular dysfunction or Unilateral vestibulopathy or Unilateral vestibular deafferentation or Unilateral vestibular disease or Unilateral vestibular disorder or Unilateral vestibular syndrome?"]


outcome = [
        "Does this paper describe any chronic or Persistent or Enduring or Permanent or Uncompensated or Continuous or Recurrent symptomps, signs or complaints?",
        "Does this paper describe any patient-reported questionnaires (Dizziness Handicap Inventory, or Vertigo Symptom Scales or Visual Analogue Scale)?",
        "Does this paper mention duration of symptoms to be more than 3 months?"]

study_outputs = []
pop_outputs = []
out_outputs = []

for title,abstract in tqdm(papers): 
    study_outputs.append(review_expert.ask_questions( title, abstract, study_design))
    pop_outputs.append(review_expert.ask_questions( title, abstract, population))
    out_outputs.append(review_expert.ask_questions( title, abstract, outcome))
    

100%|██████████| 300/300 [3:17:58<00:00, 39.59s/it]  


In [12]:
data1 = {
    'StudyDesign':study_outputs,
    'Population': pop_outputs,
    'Outcome': out_outputs
}
df = pd.DataFrame(data1)

df.to_csv('first_screening_outputs/gemma.csv', index=False)
print("DataFrame saved")

DataFrame saved to output.csv
