In [1]:
import numpy as np
import pandas as pd

# Loading the labeled PubMedQA dataset

In [2]:
df = pd.read_parquet("hf://datasets/qiaojin/PubMedQA/pqa_labeled/train-00000-of-00001.parquet")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [3]:
df.head()

Unnamed: 0,pubid,question,context,long_answer,final_decision
0,21645374,Do mitochondria play a role in remodelling lac...,{'contexts': ['Programmed cell death (PCD) is ...,Results depicted mitochondrial dynamics in viv...,yes
1,16418930,Landolt C and snellen e acuity: differences in...,{'contexts': ['Assessment of visual acuity dep...,"Using the charts described, there was only a s...",no
2,9488747,"Syncope during bathing in infants, a pediatric...",{'contexts': ['Apparent life-threatening event...,"""Aquagenic maladies"" could be a pediatric form...",yes
3,17208539,Are the long-term results of the transanal pul...,{'contexts': ['The transanal endorectal pull-t...,Our long-term study showed significantly bette...,no
4,10808977,Can tailored interventions increase mammograph...,{'contexts': ['Telephone counseling and tailor...,The effects of the intervention were most pron...,yes


In [4]:
df.columns

Index(['pubid', 'question', 'context', 'long_answer', 'final_decision'], dtype='object')

# Creating the Document for each row

In [5]:
from langchain.schema import Document

In [None]:
docs = []

for _, row in df.iterrows():
  # Using 'contexts', 'labels', 'meshes', 'long_answer' and 'final_decision' for the content of document
  content = "Contexts:\n" + ", ".join(row["context"]["contexts"]) + "\n\nLabels:\n" + ", ".join(row["context"]["labels"]) + "\n\nMeshes:\n" + ", ".join(row["context"]["meshes"]) + "\n\nAnswer:\n" + row["long_answer"] + "\n\nFinal Decision:\n" + row["final_decision"]
  metadata = {
      "pubid" : row["pubid"],
      "question" : row["question"]
  }

  # Adding the Document to 'docs'
  docs.append(Document(page_content=content, metadata=metadata))

# Importing embedding model using Langchain

In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings

model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Creating FAISS Vectors of the Documents

In [None]:
from langchain_community.vectorstores import FAISS

vectors = FAISS.from_documents(docs, model)

# Creating Retriever Interface from the vector store

In [None]:
retriever = vectors.as_retriever(search_type="similarity", search_kwargs={"k": 3}) # Checks the top 3 related sources from the vector store

# Using GROQ API to use the llama3 LLM

In [56]:
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA

llm = ChatGroq(
    api_key="API_KEY_here",
    model="llama3-8b-8192",
    temperature=0,
)

# Setting Up Retrieval Augmented QA Chain

In [None]:
chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type="map_reduce"
)

# Example of a Query

In [57]:
query = "Is the Hawkins sign able to predict necrosis in fractures of the neck of the astragalus?, yes or no"
result = chain({"query": query})

print("Answer:", result["result"])

Answer: Based on the provided context, the answer is:

No

According to the text, a positive Hawkins sign rules out the development of avascular necrosis, but its absence does not confirm it.
