In [1]:
import numpy as np
import pandas as pd

# Loading the labeled PubMedQA dataset

In [2]:
df = pd.read_parquet("hf://datasets/qiaojin/PubMedQA/pqa_labeled/train-00000-of-00001.parquet")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
df.head()

Unnamed: 0,pubid,question,context,long_answer,final_decision
0,21645374,Do mitochondria play a role in remodelling lac...,{'contexts': ['Programmed cell death (PCD) is ...,Results depicted mitochondrial dynamics in viv...,yes
1,16418930,Landolt C and snellen e acuity: differences in...,{'contexts': ['Assessment of visual acuity dep...,"Using the charts described, there was only a s...",no
2,9488747,"Syncope during bathing in infants, a pediatric...",{'contexts': ['Apparent life-threatening event...,"""Aquagenic maladies"" could be a pediatric form...",yes
3,17208539,Are the long-term results of the transanal pul...,{'contexts': ['The transanal endorectal pull-t...,Our long-term study showed significantly bette...,no
4,10808977,Can tailored interventions increase mammograph...,{'contexts': ['Telephone counseling and tailor...,The effects of the intervention were most pron...,yes


In [4]:
df.columns

Index(['pubid', 'question', 'context', 'long_answer', 'final_decision'], dtype='object')

# Creating the Document for each row

In [5]:
from langchain.schema import Document

In [6]:
docs = []

for _, row in df.iterrows():
  # Using 'contexts', 'labels', 'meshes', 'long_answer' and 'final_decision' for the content of document
  content = "Contexts:\n" + ", ".join(row["context"]["contexts"]) + "\n\nLabels:\n" + ", ".join(row["context"]["labels"]) + "\n\nMeshes:\n" + ", ".join(row["context"]["meshes"]) + "\n\nAnswer:\n" + row["long_answer"] + "\n\nFinal Decision:\n" + row["final_decision"]
  metadata = {
      "pubid" : row["pubid"],
      "question" : row["question"]
  }

  # Adding the Document to 'docs'
  docs.append(Document(page_content=content, metadata=metadata))

# Importing embedding model using Langchain

In [7]:
from langchain_community.embeddings import HuggingFaceEmbeddings

model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


# Creating FAISS Vectors of the Documents

In [8]:
from langchain_community.vectorstores import FAISS

vectors = FAISS.from_documents(docs, model)

# Creating Retriever Interface from the vector store

In [9]:
retriever = vectors.as_retriever(search_type="similarity", search_kwargs={"k": 3}) # Checks the top 3 related sources from the vector store

# Using GROQ API to use the llama3 LLM

In [13]:
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from dotenv import load_dotenv
import os

load_dotenv()
API_KEY = os.environ.get("API_KEY")

llm = ChatGroq(
    api_key=API_KEY,
    model="llama-3.3-70b-versatile",
    temperature=0,
)

# Setting Up Retrieval Augmented QA Chain

In [14]:
chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type="map_reduce"
)

# Example of a Query

In [15]:
query = "Is the Hawkins sign able to predict necrosis in fractures of the neck of the astragalus?, yes or no"
result = chain({"query": query})

print("Answer:", result["result"])

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Answer: Yes.
