# RAG Medical Research
### with Groq


In [None]:
# Import necessary libraries
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEndpoint
import warnings
from langchain_groq import ChatGroq
from langchain.prompts.prompt import PromptTemplate

# Load environment variables
load_dotenv()

True

In [2]:
warnings.filterwarnings("ignore")

llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2
)

---
## Data Ingestion

In [3]:
from langchain_community.document_loaders import PyPDFLoader
from pathlib import Path

def load_multiple_pdfs(folder_path):
    """
    Load text data from all PDF files in a folder.
    """
    documents = []
    pdf_folder = Path(folder_path)
    
    # Find all PDF files in the folder
    for pdf_file in pdf_folder.glob("*.pdf"):
        print(f"Loading {pdf_file.name}...")
        loader = PyPDFLoader(file_path=str(pdf_file))
        documents.extend(loader.load())
    
    return documents

# Load all PDFs from folder
medi_docs = load_multiple_pdfs(folder_path=r"C:\Users\peter\Desktop\ds_ai\repo_folder\nutrition-ai-assistant\data\raw\DataBase")
print(f"Loaded {len(medi_docs)} pages total")
print(medi_docs[0].page_content)

Loading Adiponectin Role in Neurodegenerative Diseases.pdf...
Loading Correlation_of_Neurodegenerative_Diseases_with_Oxi.pdf...
Loading Diet, Nutrition and Chronic Degenerative Diseases.pdf...
Loading Educommunication in Nutrition and Neurodegenerative.pdf...
Loading Frailty, Cognitive Decline, Neurodegenerative.pdf...
Loading Ketogenic Diet An Effective Treatment Approach for Neurodegenerative.pdf...
Loading Mediterranean Diet and Neurodegenerative Diseases.pdf...
Loading Modulation of Gut Microbiota Through Dietary Intervention.pdf...
Loading Molecular Crossfires between Inflammasome Signalling and Dietary Small.pdf...
Loading Nutritional-Considerations-ALS.pdf...
Loading Nutritional_Approaches_in_Neurodegenerative_Disord.pdf...
Loading The Ketogenic Diet and Alzheimer‚Äôs Disease.pdf...
Loading The Role of Diet and Dietary Patterns in Parkinson‚Äôs Disease.pdf...
Loading The Role of Diet in Parkinson‚Äôs Disease.pdf...
Loading The Role of Ketogenic Diet in the Treatment of Neurologi

---

## Document Chunking


In [4]:
# Import RecursiveCharacterTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Example chunking function
def split_documents(documents, chunk_size=200, chunk_overlap=50):
    """
    Splits documents into chunks of given size and overlap
    """
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    chunks = text_splitter.split_documents(documents=documents)
    
    # Just to add id for etch chunks to map it later 
    for i, chunk in enumerate(chunks):
         chunk.metadata.update({
        "id": f"chunk_{i}",
    })
    
    return chunks

In [5]:
# Execute your chunking function and display results here
medi_chunks = split_documents(medi_docs)


---

## Embedding and Storage


In [6]:
# Import libraries
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.vectorstores.faiss import DistanceStrategy
from langchain import hub
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
import numpy as np

# Example function for embeddings and storage

def create_embedding_vector_db(chunks, db_name):
    """
    This function uses the open-source embedding model HuggingFaceEmbeddings 
    to create embeddings and store those in a VectorStore called FAISS, 
    which allows for efficient similarity search
    """
    # instantiate embedding model
    embedding = HuggingFaceEmbeddings(
        model_name='sentence-transformers/all-mpnet-base-v2',
        encode_kwargs={"normalize_embeddings": True}
    )
    # create the vector store 
    vectorstore = FAISS.from_documents(
        documents=chunks,
        embedding=embedding,
        distance_strategy=DistanceStrategy.COSINE  # or DistanceStrategy.DOT or DistanceStrategy.L2 
        
    )
    # save VectorStore locally
    vectorstore.save_local(f"../vector_databases/vector_db_{db_name}")
    return vectorstore

In [7]:
# Generate embeddings and save them locally
all_embedding=create_embedding_vector_db(chunks=medi_chunks, db_name="medi")

---

## Retrieval from FAISS

In [8]:
# Implement retrieval logic from your FAISS database
def retrieve_from_vector_db(vector_db_path):
    """
    this function splits out a retriever object from a local VectorStore
    """
    # instantiate embedding model
    embeddings = HuggingFaceEmbeddings(
        model_name='sentence-transformers/all-mpnet-base-v2',
        encode_kwargs={"normalize_embeddings": True}
    )
    react_vectorstore = FAISS.load_local(
        folder_path=vector_db_path,
        embeddings=embeddings,
        allow_dangerous_deserialization=True,
        distance_strategy=DistanceStrategy.COSINE
    )
    retriever = react_vectorstore.as_retriever()
    return retriever ,react_vectorstore

# Load the retriever and index
react_retriever,react_vectorstore = retrieve_from_vector_db("../vector_databases/vector_db_medi")
type(react_retriever),type(react_vectorstore)

(langchain_core.vectorstores.base.VectorStoreRetriever,
 langchain_community.vectorstores.faiss.FAISS)

In [9]:
# Test your retrieval system with queries
query="""
what is import for diets of people with neurodegenerative diseases?
"""

In [10]:
react_retriever.get_relevant_documents(query,k=3)

[Document(id='e80c4f9e-0680-4ffd-9eff-d4b2ad36e38b', metadata={'source': 'C:\\Users\\peter\\Desktop\\ds_ai\\repo_folder\\nutrition-ai-assistant\\data\\raw\\DataBase\\The Ketogenic Diet and Alzheimer‚Äôs Disease.pdf', 'page': 0, 'id': 'chunk_4805'}, page_content='specific diet that has been studied vis a vis neurodegenerative diseases. \nSimilar benefits to those of a KD can also be achieved through'),
 Document(id='b41eeb87-0a7d-4af5-b98d-ee9094bfdb72', metadata={'source': 'C:\\Users\\peter\\Desktop\\ds_ai\\repo_folder\\nutrition-ai-assistant\\data\\raw\\DataBase\\Nutritional_Approaches_in_Neurodegenerative_Disord.pdf', 'page': 2, 'id': 'chunk_4525'}, page_content='disorders. By exploring current knowledge of nutritional needs, the impact of diet on\nneurodegenerative disease, individual responses to dietary interventions, and available'),
 Document(id='333c2b94-95e0-4764-98f5-e7dabf8eb46f', metadata={'source': 'C:\\Users\\peter\\Desktop\\ds_ai\\repo_folder\\nutrition-ai-assistant\\dat

---

## Connecting Retrieval with LLM

In [11]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

# Write a function to create retrieval and document processing chains
def connect_chains(retriever):
    """
    this function connects stuff_documents_chain with retrieval_chain
    """
    stuff_documents_chain = create_stuff_documents_chain(
        llm=llm,
        prompt=hub.pull("langchain-ai/retrieval-qa-chat")
    )
    retrieval_chain = create_retrieval_chain(
        retriever=retriever,
        combine_docs_chain=stuff_documents_chain
    )
    return retrieval_chain


react_retrieval_chain = connect_chains(react_retriever)

In [12]:
# Invoke your chain with a sample question
output = react_retrieval_chain.invoke(
    {"input": "what are neurodegenerative diseases?"}
)
type(output) , output.keys()

(dict, dict_keys(['input', 'context', 'answer']))

---

## Interactive Chat System


In [13]:
medi_retriever = retrieve_from_vector_db("../vector_databases/vector_db_medi")

medi_retrieval_chain = connect_chains(medi_retriever[0])

In [14]:
# Define your interactive chat querying function
def print_output(
    inquiry,
    retrieval_chain=react_retrieval_chain
):
    result = retrieval_chain.invoke({"input": inquiry})
    print(result['answer'].strip("\n"))

In [17]:
# Run and test your interactive chat system
print_output("wie sollten sich menschen mit ALS ern√§hren?")

Ich muss darauf hinweisen, dass der bereitgestellte Kontext keine spezifischen Informationen √ºber die Ern√§hrung bei ALS (Amyotrophe Lateralsklerose) enth√§lt. Der Kontext bezieht sich auf verschiedene wissenschaftliche Artikel und B√ºcher, aber keine davon behandelt direkt die Ern√§hrung bei ALS.

Trotzdem kann ich Ihnen allgemeine Informationen √ºber die Ern√§hrung bei ALS anbieten. Bei ALS ist es wichtig, eine ausgewogene und leicht verdauliche Ern√§hrung zu w√§hlen, um die Symptome zu lindern und die Gesundheit zu erhalten. Hier sind einige allgemeine Tipps:

1. **Leicht verdauliche Nahrung**: W√§hlen Sie leicht verdauliche Nahrungsmittel, wie z.B. Bananen, Reis, Kartoffeln, H√ºhnchen und Fisch. Vermeiden Sie fettige, saure oder scharfe Lebensmittel.
2. **Hydratation**: Trinken Sie viel Wasser, um die Fl√ºssigkeitsverluste auszugleichen, die durch die Krankheit entstehen.
3. **Energiequelle**: Verwenden Sie Energiequellen wie Smoothies, Suppen oder P√ºrees, um die Nahrungsaufnahme

In [16]:
# Define your interactive chat querying function
def chat_with_rag(chain):
    """
    Interactive function to chat with the RAG system.
    """
    print("Welcome to the RAG Chat! Type 'exit' to quit.\n")
    while True:
        user_input = input("üßë You: ")
        if user_input.lower() in ["exit", "quit"]:
            print("üëã Exiting the chat. Goodbye!")
            break
        try:
            result = chain.invoke({"input": user_input})
            print(f"ü§ñ RAG Answer: {result['answer']}\n")
        except Exception as e:
            print(f" Error: {e}\n")


In [32]:
# Run your interactive chat
chat_with_rag(react_retrieval_chain)

Welcome to the RAG Chat! Type 'exit' to quit.

ü§ñ RAG Answer: Based on the provided context, I can offer some general nutritional advice for people with ALS. However, please note that this is not a substitute for personalized medical advice. It's essential to consult a healthcare professional or a registered dietitian for specific guidance.

From the context, we know that a diet rich in fruits and vegetables can confer protection against the development of ALS. Here are some general nutritional tips:

1. **Eat a balanced diet**: Focus on whole, unprocessed foods like fruits, vegetables, whole grains, lean proteins, and healthy fats.
2. **Increase antioxidant intake**: Antioxidants, such as vitamins C and E, can help protect against oxidative stress, which may contribute to ALS progression. Include antioxidant-rich foods like berries, leafy greens, and nuts in your diet.
3. **Stay hydrated**: Adequate hydration is essential for overall health, and it may help alleviate symptoms like m