# RAG Medical Research
### with Ollama


In [1]:
# Import necessary libraries
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEndpoint
import warnings
from langchain_ollama import OllamaLLM
from langchain.prompts.prompt import PromptTemplate

# Load environment variables
load_dotenv()



True

In [2]:
warnings.filterwarnings("ignore")

#Initialize Ollama LLM
#Make sure Ollama is running: ollama serve
llm = OllamaLLM(
    model="llama3.2",  # Change to your preferred model
    temperature=0.7,    # Creativity level (0-1)
    # base_url="http://localhost:11434/"  # Default Ollama URL
)

In [3]:
#Test the LLM connection
test_response = llm.invoke("Hello! Say 'Connection successful!' if you can read this.")
print(test_response)

ConnectError: [WinError 10061] Es konnte keine Verbindung hergestellt werden, da der Zielcomputer die Verbindung verweigerte

---
## Data Ingestion

In [9]:
from langchain_community.document_loaders import PyPDFLoader
from pathlib import Path

def load_multiple_pdfs(folder_path):
    """
    Load text data from all PDF files in a folder.
    """
    documents = []
    pdf_folder = Path(folder_path)
    
    # Find all PDF files in the folder
    for pdf_file in pdf_folder.glob("*.pdf"):
        print(f"Loading {pdf_file.name}...")
        loader = PyPDFLoader(file_path=str(pdf_file))
        documents.extend(loader.load())
    
    return documents

# Load all PDFs from folder
medi_docs = load_multiple_pdfs(folder_path=r"C:\Users\peter\Desktop\ds_ai\repo_folder\nutrition-ai-assistant\data\raw\DataBase")
print(f"Loaded {len(medi_docs)} pages total")
print(medi_docs[0].page_content)

Loading Adiponectin Role in Neurodegenerative Diseases.pdf...
Loading Correlation_of_Neurodegenerative_Diseases_with_Oxi.pdf...
Loading Diet, Nutrition and Chronic Degenerative Diseases.pdf...
Loading Educommunication in Nutrition and Neurodegenerative.pdf...
Loading Frailty, Cognitive Decline, Neurodegenerative.pdf...
Loading Ketogenic Diet An Effective Treatment Approach for Neurodegenerative.pdf...
Loading Mediterranean Diet and Neurodegenerative Diseases.pdf...
Loading Modulation of Gut Microbiota Through Dietary Intervention.pdf...
Loading Molecular Crossfires between Inflammasome Signalling and Dietary Small.pdf...
Loading Nutritional-Considerations-ALS.pdf...
Loading Nutritional_Approaches_in_Neurodegenerative_Disord.pdf...
Loading The Ketogenic Diet and Alzheimer‚Äôs Disease.pdf...
Loading The Role of Diet and Dietary Patterns in Parkinson‚Äôs Disease.pdf...
Loading The Role of Diet in Parkinson‚Äôs Disease.pdf...
Loading The Role of Ketogenic Diet in the Treatment of Neurologi

---

## Document Chunking


In [10]:
# Import RecursiveCharacterTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Example chunking function
def split_documents(documents, chunk_size=200, chunk_overlap=50):
    """
    Splits documents into chunks of given size and overlap
    """
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    chunks = text_splitter.split_documents(documents=documents)
    
    # Just to add id for etch chunks to map it later 
    for i, chunk in enumerate(chunks):
         chunk.metadata.update({
        "id": f"chunk_{i}",
    })
    
    return chunks

In [12]:
# Execute your chunking function and display results here
medi_chunks = split_documents(medi_docs)


---

## Embedding and Storage


In [11]:
# Import libraries
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.vectorstores.faiss import DistanceStrategy
from langchain import hub
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
import numpy as np

# Example function for embeddings and storage

def create_embedding_vector_db(chunks, db_name):
    """
    This function uses the open-source embedding model HuggingFaceEmbeddings 
    to create embeddings and store those in a VectorStore called FAISS, 
    which allows for efficient similarity search
    """
    # instantiate embedding model
    embedding = HuggingFaceEmbeddings(
        model_name='sentence-transformers/all-mpnet-base-v2',
        encode_kwargs={"normalize_embeddings": True}
    )
    # create the vector store 
    vectorstore = FAISS.from_documents(
        documents=chunks,
        embedding=embedding,
        distance_strategy=DistanceStrategy.COSINE  # or DistanceStrategy.DOT or DistanceStrategy.L2 
        
    )
    # save VectorStore locally
    vectorstore.save_local(f"../vector_databases/vector_db_{db_name}")
    return vectorstore

In [None]:
# Generate embeddings and save them locally
all_embedding=create_embedding_vector_db(chunks=medi_chunks, db_name="medi")

---

## Retrieval from FAISS

In [13]:
# Implement retrieval logic from your FAISS database
def retrieve_from_vector_db(vector_db_path):
    """
    this function splits out a retriever object from a local VectorStore
    """
    # instantiate embedding model
    embeddings = HuggingFaceEmbeddings(
        model_name='sentence-transformers/all-mpnet-base-v2',
        encode_kwargs={"normalize_embeddings": True}
    )
    react_vectorstore = FAISS.load_local(
        folder_path=vector_db_path,
        embeddings=embeddings,
        allow_dangerous_deserialization=True,
        distance_strategy=DistanceStrategy.COSINE
    )
    retriever = react_vectorstore.as_retriever()
    return retriever ,react_vectorstore

# Load the retriever and index
react_retriever,react_vectorstore = retrieve_from_vector_db("../vector_databases/vector_db_medi")
type(react_retriever),type(react_vectorstore)

(langchain_core.vectorstores.base.VectorStoreRetriever,
 langchain_community.vectorstores.faiss.FAISS)

In [14]:
# Test your retrieval system with queries
query="""
what is import for diets of people with neurodegenerative diseases?
"""

In [15]:
react_retriever.get_relevant_documents(query,k=3)

[Document(id='3f8933b9-f8cb-46c6-aca9-dcd4a52e3833', metadata={'source': 'C:\\Users\\peter\\Desktop\\ds_ai\\repo_folder\\nutrition-ai-assistant\\data\\raw\\DataBase\\The Ketogenic Diet and Alzheimer‚Äôs Disease.pdf', 'page': 0, 'id': 'chunk_4805'}, page_content='specific diet that has been studied vis a vis neurodegenerative diseases. \nSimilar benefits to those of a KD can also be achieved through'),
 Document(id='55046b1f-3bc8-4a93-98f1-75568a20c6da', metadata={'source': 'C:\\Users\\peter\\Desktop\\ds_ai\\repo_folder\\nutrition-ai-assistant\\data\\raw\\DataBase\\Nutritional_Approaches_in_Neurodegenerative_Disord.pdf', 'page': 2, 'id': 'chunk_4525'}, page_content='disorders. By exploring current knowledge of nutritional needs, the impact of diet on\nneurodegenerative disease, individual responses to dietary interventions, and available'),
 Document(id='14f7c417-df5c-4d0f-bc31-f114c64cbc51', metadata={'source': 'C:\\Users\\peter\\Desktop\\ds_ai\\repo_folder\\nutrition-ai-assistant\\dat

---

## Connecting Retrieval with LLM

In [16]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

# Write a function to create retrieval and document processing chains
def connect_chains(retriever):
    """
    this function connects stuff_documents_chain with retrieval_chain
    """
    stuff_documents_chain = create_stuff_documents_chain(
        llm=llm,
        prompt=hub.pull("langchain-ai/retrieval-qa-chat")
    )
    retrieval_chain = create_retrieval_chain(
        retriever=retriever,
        combine_docs_chain=stuff_documents_chain
    )
    return retrieval_chain


react_retrieval_chain = connect_chains(react_retriever)

In [17]:
# Invoke your chain with a sample question
output = react_retrieval_chain.invoke(
    {"input": "what are neurodegenerative diseases?"}
)
type(output) , output.keys()

(dict, dict_keys(['input', 'context', 'answer']))

---

## Interactive Chat System


In [18]:
medi_retriever = retrieve_from_vector_db("../vector_databases/vector_db_medi")

medi_retrieval_chain = connect_chains(medi_retriever[0])

In [19]:
# Define your interactive chat querying function
def print_output(
    inquiry,
    retrieval_chain=react_retrieval_chain
):
    result = retrieval_chain.invoke({"input": inquiry})
    print(result['answer'].strip("\n"))

In [20]:
# Run and test your interactive chat system
print_output("wie sollten sich menschen mit alzheimer ern√§hren?")

Leider kann ich keine direkten Ern√§hrungsempfehlungen f√ºr Menschen mit Alzheimer geben, da die oben genannten Quellen nicht explizit zu diesem Thema sprechen. 

Allerdings gibt es einige allgemeine Ern√§hrungsprinzipien, die als hilfreich f√ºr die Gesundheit insgesamt und m√∂glicherweise auch f√ºr Menschen mit Alzheimer angesehen werden k√∂nnen:

1. Eine ausgewogene Ern√§hrung: Die American Heart Association empfiehlt eine ausgewogene Ern√§hrung, die reich an Obst, Gem√ºse, Vollkornprodukten, mageren Proteinen und gesunden Fetten ist.
2. Antioxidantien: Einige Studien deuten darauf hin, dass Antioxidantien wie Vitamine C und E, Polyphenole und andere Phytochemikalien die Entwicklung von Alzheimer-√§hnlichen Ver√§nderungen in der Gehirndurchblutung verlangsamen k√∂nnen.
3. Omega-3-Fetts√§uren: Diese Fetts√§uren, insbesondere EPA und DHA, sind wichtig f√ºr die Gehirngesundheit und haben m√∂glicherweise eine sch√ºtzende Wirkung bei Alzheimer.
4. Kalzium und Vitamin D: Eine ausreichende 

In [21]:
# Define your interactive chat querying function
def chat_with_rag(chain):
    """
    Interactive function to chat with the RAG system.
    """
    print("Welcome to the RAG Chat! Type 'exit' to quit.\n")
    while True:
        user_input = input("üßë You: ")
        if user_input.lower() in ["exit", "quit"]:
            print("üëã Exiting the chat. Goodbye!")
            break
        try:
            result = chain.invoke({"input": user_input})
            print(f"ü§ñ RAG Answer: {result['answer']}\n")
        except Exception as e:
            print(f" Error: {e}\n")


In [22]:
# Run your interactive chat
chat_with_rag(react_retrieval_chain)

Welcome to the RAG Chat! Type 'exit' to quit.

ü§ñ RAG Answer: Based on the context provided, I can offer general dietary recommendations for individuals with ALS. However, please note that it's essential to consult a registered dietitian or healthcare professional for personalized advice.

Michelle McDonagh, RD, CD, an expert in nutritional considerations for ALS, recommends:

1. **High-calorie, high-protein diets**: People with ALS often experience weight loss and muscle wasting. A calorie-rich diet with protein sources like lean meats, fish, eggs, dairy products, and plant-based options can help maintain or gain weight.
2. **Balanced macronutrients**: Aim for a balanced mix of carbohydrates, protein, and healthy fats to provide energy and support overall health.
3. **Easy-to-digest foods**: Individuals with ALS may experience difficulty swallowing or chewing due to muscle weakness. Opt for soft, easy-to-digest foods like soups, smoothies, yogurt, and cooked vegetables.
4. **Hydrati

In [None]:
from langchain_core.prompts import ChatPromptTemplate

Improved system prompt for nutrition Q&A
system_prompt = """You are a nutrition assistant that answers questions using a provided food database.

RULES:
Use ONLY the information in the CONTEXT below. Do not use external knowledge.
Always include the serving size when giving nutritional values.
If a serving size is not provided in the context, state that it is not available.
If asked to compare foods, present the data in a clear, structured format.
If asked for "high in X" or "best sources of X", list the top options from the context with exact values.
If the requested information is not in the context, respond with:"I don't have that information in my database."
Be concise, but always include specific numbers when available.
For health-related questions, include a reminder to consult a healthcare professional.

CONTEXT:
{context}"""

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}")
])

Create the RAG chain
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

Ask your own question!
your_question = "What are good sources of vitamin C?"

response = rag_chain.invoke({"input": your_question})

print(f"Question: {your_question}\n")
print(f"Answer: {response['answer']}\n")
print("=" * 60)