# RAG Medical Research
### with Groq


In [1]:
# Import necessary libraries
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEndpoint
import warnings
from langchain_groq import ChatGroq
from langchain.prompts.prompt import PromptTemplate

# Load environment variables
load_dotenv()



True

In [2]:
warnings.filterwarnings("ignore")

llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2
)

---
## Data Ingestion

In [9]:
from langchain_community.document_loaders import PyPDFLoader
from pathlib import Path

def load_multiple_pdfs(folder_path):
    """
    Load text data from all PDF files in a folder.
    """
    documents = []
    pdf_folder = Path(folder_path)
    
    # Find all PDF files in the folder
    for pdf_file in pdf_folder.glob("*.pdf"):
        print(f"Loading {pdf_file.name}...")
        loader = PyPDFLoader(file_path=str(pdf_file))
        documents.extend(loader.load())
    
    return documents

# Load all PDFs from folder
medi_ppt_docs = load_multiple_pdfs(folder_path=r"C:\Users\peter\Desktop\ds_ai\repo_folder\nutrition-ai-assistant\data\raw\DataBasePPT")
print(f"Loaded {len(medi_ppt_docs)} pages total")
print(medi_ppt_docs[0].page_content)

Loading Nutritional-Considerations-ALS.pdf...
Loaded 36 pages total
WELCOME!
March 20, 2023
________________________________________________________________________________________
Guest Speaker:   
Michelle McDonagh, RD, CD
Outpatient Dietitian-ALS, CF, and Diabetes Care Clinics 
The Medical College of Wisconsin & Froedtert Hospital
ALS Association
National Office-Care Services
Ph: 800-782-4747  Cynthia.Knoche@als.org
Nutritional Considerations in 
ALS


---

## Document Chunking


In [10]:
# Import RecursiveCharacterTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Example chunking function
def split_documents(documents, chunk_size=200, chunk_overlap=50):
    """
    Splits documents into chunks of given size and overlap
    """
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    chunks = text_splitter.split_documents(documents=documents)
    
    # Just to add id for etch chunks to map it later 
    for i, chunk in enumerate(chunks):
         chunk.metadata.update({
        "id": f"chunk_{i}",
    })
    
    return chunks

In [11]:
# Execute your chunking function and display results here
medi_ppt_chunks = split_documents(medi_ppt_docs)


---

## Embedding and Storage


In [12]:
# Import libraries
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.vectorstores.faiss import DistanceStrategy
from langchain import hub
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
import numpy as np

# Example function for embeddings and storage

def create_embedding_vector_db(chunks, db_name):
    """
    This function uses the open-source embedding model HuggingFaceEmbeddings 
    to create embeddings and store those in a VectorStore called FAISS, 
    which allows for efficient similarity search
    """
    # instantiate embedding model
    embedding = HuggingFaceEmbeddings(
        model_name='sentence-transformers/all-mpnet-base-v2',
        encode_kwargs={"normalize_embeddings": True}
    )
    # create the vector store 
    vectorstore = FAISS.from_documents(
        documents=chunks,
        embedding=embedding,
        distance_strategy=DistanceStrategy.COSINE  # or DistanceStrategy.DOT or DistanceStrategy.L2 
        
    )
    # save VectorStore locally
    vectorstore.save_local(f"../vector_databases/vector_db_{db_name}")
    return vectorstore

In [13]:
# Generate embeddings and save them locally
all_embedding=create_embedding_vector_db(chunks=medi_ppt_chunks, db_name="medi_ppt")

---

## Retrieval from FAISS

In [15]:
# Implement retrieval logic from your FAISS database
def retrieve_from_vector_db(vector_db_path):
    """
    this function splits out a retriever object from a local VectorStore
    """
    # instantiate embedding model
    embeddings = HuggingFaceEmbeddings(
        model_name='sentence-transformers/all-mpnet-base-v2',
        encode_kwargs={"normalize_embeddings": True}
    )
    react_vectorstore = FAISS.load_local(
        folder_path=vector_db_path,
        embeddings=embeddings,
        allow_dangerous_deserialization=True,
        distance_strategy=DistanceStrategy.COSINE
    )
    retriever = react_vectorstore.as_retriever()
    return retriever ,react_vectorstore

# Load the retriever and index
react_retriever,react_vectorstore = retrieve_from_vector_db("../vector_databases/vector_db_medi_ppt")
type(react_retriever),type(react_vectorstore)

(langchain_core.vectorstores.base.VectorStoreRetriever,
 langchain_community.vectorstores.faiss.FAISS)

In [16]:
# Test your retrieval system with queries
query="""
what is import for diets of people with neurodegenerative diseases?
"""

In [17]:
react_retriever.get_relevant_documents(query,k=3)

[Document(id='37abffd7-952a-46a1-ac27-ae78269ba436', metadata={'source': 'C:\\Users\\peter\\Desktop\\ds_ai\\repo_folder\\nutrition-ai-assistant\\data\\raw\\DataBasePPT\\Nutritional-Considerations-ALS.pdf', 'page': 1, 'id': 'chunk_3'}, page_content='Nutritional Considerations \nin ALS\nMichelle McDonagh, RD, CD\nFroedtert & MCW, Milwaukee, Wisconsin'),
 Document(id='8703154d-a848-45d4-b110-5b6417c630af', metadata={'source': 'C:\\Users\\peter\\Desktop\\ds_ai\\repo_folder\\nutrition-ai-assistant\\data\\raw\\DataBasePPT\\Nutritional-Considerations-ALS.pdf', 'page': 5, 'id': 'chunk_9'}, page_content='socialization\nWhy is Nutrition Important in \nALS?'),
 Document(id='e554d3b1-99db-422d-8ae8-6dcfc7c3434b', metadata={'source': 'C:\\Users\\peter\\Desktop\\ds_ai\\repo_folder\\nutrition-ai-assistant\\data\\raw\\DataBasePPT\\Nutritional-Considerations-ALS.pdf', 'page': 0, 'id': 'chunk_2'}, page_content='ALS Association\nNational Office-Care Services\nPh: 800-782-4747  Cynthia.Knoche@als.org\nNut

---

## Connecting Retrieval with LLM

In [18]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

# Write a function to create retrieval and document processing chains
def connect_chains(retriever):
    """
    this function connects stuff_documents_chain with retrieval_chain
    """
    stuff_documents_chain = create_stuff_documents_chain(
        llm=llm,
        prompt=hub.pull("langchain-ai/retrieval-qa-chat")
    )
    retrieval_chain = create_retrieval_chain(
        retriever=retriever,
        combine_docs_chain=stuff_documents_chain
    )
    return retrieval_chain


react_retrieval_chain = connect_chains(react_retriever)

In [19]:
# Invoke your chain with a sample question
output = react_retrieval_chain.invoke(
    {"input": "what are neurodegenerative diseases?"}
)
type(output) , output.keys()

(dict, dict_keys(['input', 'context', 'answer']))

---

## Interactive Chat System


In [20]:
medi_retriever_ppt = retrieve_from_vector_db("../vector_databases/vector_db_medi_ppt")

medi_retrieval_chain_ppt = connect_chains(medi_retriever_ppt[0])

In [22]:
# Define your interactive chat querying function
def print_output(
    inquiry,
    retrieval_chain_ppt=react_retrieval_chain
):
    result = retrieval_chain_ppt.invoke({"input": inquiry})
    print(result['answer'].strip("\n"))

In [23]:
# Run and test your interactive chat system
print_output("wie sollten sich menschen mit ALS ern√§hren?")

Leider gibt es in dem gegebenen Kontext keine spezifischen Ern√§hrungsempfehlungen f√ºr Menschen mit ALS. Der Text scheint eher auf die allgemeinen Vorteile bestimmter Nahrungsmittel wie Kokos√∂l (Coconut Oil) hinzuweisen, die angeblich antimikrobielle, entz√ºndungshemmende und m√∂glicherweise auch antikanzerogene Eigenschaften haben.

Es ist jedoch wichtig zu beachten, dass Menschen mit ALS eine individuelle Ern√§hrungsberatung ben√∂tigen, um ihre spezifischen Bed√ºrfnisse und Anforderungen zu ber√ºcksichtigen. Eine Ern√§hrung, die reich an N√§hrstoffen ist, kann helfen, die Symptome von ALS zu lindern und die Lebensqualit√§t zu verbessern.

Wenn Sie oder jemand, den Sie kennen, mit ALS diagnostiziert wurde, sollten Sie sich an einen Ern√§hrungsexperten oder einen Arzt wenden, um eine individuelle Ern√§hrungsberatung zu erhalten.


In [24]:
# Define your interactive chat querying function
def chat_with_rag(chain):
    """
    Interactive function to chat with the RAG system.
    """
    print("Welcome to the RAG Chat! Type 'exit' to quit.\n")
    while True:
        user_input = input("üßë You: ")
        if user_input.lower() in ["exit", "quit"]:
            print("üëã Exiting the chat. Goodbye!")
            break
        try:
            result = chain.invoke({"input": user_input})
            print(f"ü§ñ RAG Answer: {result['answer']}\n")
        except Exception as e:
            print(f" Error: {e}\n")


In [25]:
# Run your interactive chat
chat_with_rag(react_retrieval_chain)

Welcome to the RAG Chat! Type 'exit' to quit.

ü§ñ RAG Answer: Unfortunately, the provided context does not contain specific recommendations for a diet for people with ALS. However, it does mention that Michelle McDonagh, a registered dietitian, is a guest speaker on the topic of "Nutritional Considerations in ALS" at the ALS Association.

Based on general knowledge, people with ALS may benefit from a diet that is high in calories, protein, and fiber to help maintain weight and muscle mass. They may also need to avoid certain foods that can be difficult to swallow or digest.

Some general dietary recommendations for people with ALS may include:

1. Eating small, frequent meals throughout the day to help manage weight and muscle mass.
2. Choosing high-calorie foods and drinks, such as smoothies, soups, and puddings.
3. Incorporating protein-rich foods, such as lean meats, fish, eggs, and dairy products.
4. Eating fiber-rich foods, such as fruits, vegetables, and whole grains.
5. Avoidi