In [4]:
from langchain_community.document_loaders import DirectoryLoader,PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [5]:
def load_pdf_files(data):
    loader=DirectoryLoader(data,loader_cls=PyPDFLoader,glob="*.pdf")
    docs=loader.load()
    return docs

In [6]:
extracted_data=load_pdf_files("C:/Users/saich/OneDrive/Desktop/chatbot/Medical-chatbot/data")
extracted_data

[Document(metadata={'producer': 'doPDF Ver 8.9 Build 950', 'creator': 'PyPDF', 'creationdate': '2020-03-12T17:24:48+05:30', 'source': 'C:\\Users\\saich\\OneDrive\\Desktop\\chatbot\\Medical-chatbot\\data\\book 1.pdf', 'total_pages': 141, 'page': 0, 'page_label': '1'}, page_content='1 \n \nUNIT 1  FOOD AND NUTRITION \nStructure \n1.0 Objectives \n1.1 Introduction \n1.2 Definition of Food ,Nutrition, Nutrients and Diet \n1.3 Functions of Food \n1.3.1 Physiological functions \n1.3.2 Psychological functions \n1.3.3 Socio-cultural functions \n1.3.4 Food habits \n1.3.5 Food misinformation \n1.4 Nutrition \n1.4.1 Normal nutrition \n1.4.2 Malnutrition \n1.4.3 Nutritional status \n1.5 Nutrients \n1.5.1 Classification of nutrients \n1.5.2 Types of nutrients \n1.5.3 Functions of nutrients \n1.6 Let us sum up \n1.7 Glossary \n1.8 Check your progress \n \n1.0  OBJECTIVES \nWhen we complete this unit, we will be able to: \n\uf0b7 define food, nutrition, and nutrients, and diet \n\uf0b7 list the funct

In [7]:
len(extracted_data)

432

In [8]:
from langchain.schema import Document

def filter_docs(docs:list[Document])->list[Document]:
    minimal_docs=[]
    for doc in docs:
        src=doc.metadata.get("src")
        minimal_docs.append(
            Document(
                page_content=doc.page_content,
                metadata={"source":src}
            )
        )

    return minimal_docs

In [9]:
minimali_docs=filter_docs(extracted_data)
minimali_docs

[Document(metadata={'source': None}, page_content='1 \n \nUNIT 1  FOOD AND NUTRITION \nStructure \n1.0 Objectives \n1.1 Introduction \n1.2 Definition of Food ,Nutrition, Nutrients and Diet \n1.3 Functions of Food \n1.3.1 Physiological functions \n1.3.2 Psychological functions \n1.3.3 Socio-cultural functions \n1.3.4 Food habits \n1.3.5 Food misinformation \n1.4 Nutrition \n1.4.1 Normal nutrition \n1.4.2 Malnutrition \n1.4.3 Nutritional status \n1.5 Nutrients \n1.5.1 Classification of nutrients \n1.5.2 Types of nutrients \n1.5.3 Functions of nutrients \n1.6 Let us sum up \n1.7 Glossary \n1.8 Check your progress \n \n1.0  OBJECTIVES \nWhen we complete this unit, we will be able to: \n\uf0b7 define food, nutrition, and nutrients, and diet \n\uf0b7 list the functions of food, \n\uf0b7 understand the vital role of nutrition \n\uf0b7 Identify the types of nutrients we get from food, and their functions. \n \n1.1 INTRODUCTION \nEvery single cell in the human body is an outcome of the food we 

In [10]:
def split_docs(docs:list[Document]):
    splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
    splits=splitter.split_documents(docs)
    return splits

In [11]:
splits=split_docs(extracted_data)
len(splits)

2162

In [12]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [13]:
groq_api_key=os.getenv("GRQ_API_KEY")
PINECONE_API_KEY=os.getenv("PINECONE_API_KEY")

os.environ["PINECONE_API_KEY"]=PINECONE_API_KEY

In [14]:
from pinecone import Pinecone,ServerlessSpec

pc=Pinecone(api_key=PINECONE_API_KEY)

In [15]:
pc

<pinecone.pinecone.Pinecone at 0x1d494043b00>

In [16]:
index_name = "nutri-gpt"

if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws",region="us-east-1")
    )

index=pc.Index(index_name)

  from .autonotebook import tqdm as notebook_tqdm


In [17]:
from langchain_community.embeddings import HuggingFaceEmbeddings
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


In [18]:
from langchain_pinecone import PineconeVectorStore

doc_search=PineconeVectorStore.from_documents(
    documents=splits,
    embedding=embeddings,
    index_name=index_name
)

In [19]:
retriever=doc_search.as_retriever(search_type="similarity",search_kwargs={"k":3})

In [20]:
retrieved=retriever.invoke("i am 20 year old male how much protine should i consume?")
retrieved

[Document(id='d935da9d-b043-459a-bee0-428ae611f547', metadata={'creationdate': '2020-03-12T17:24:48+05:30', 'creator': 'PyPDF', 'page': 135.0, 'page_label': '136', 'producer': 'doPDF Ver 8.9 Build 950', 'source': 'C:\\Users\\saich\\OneDrive\\Desktop\\chatbot\\Medical-chatbot\\data\\book 1.pdf', 'total_pages': 141.0}, page_content='30 \n \n25 \n \n330 \n \n60 \n \n500 \n \n100 \n \n100 \n \n100 \n \n100 \n \n35 \n \n25 \n \n270 \n \n60 \n \n500 \n \n100 \n \n100 \n \n100 \n \n100 \n \n30 \n \n25 \n  Source:  ICMR, 1998 \nTable 5.4.2:   Recommended dietary allowances of nutrients in adolescents \nAge (yrs) 13-15 boys 13-15 girls 16-18 boys 16-18 girls \nEnergy (kcal/d) 2450 2460 2460 2060 \nProtein (g/d) 70 65 78 63 \nCalcium(mg/d) 600 600 500 500 \nIron (mg/d) 41 28 50 30 \nRetinol (mcg/d) 600 600 600 600 \nBetacarotene(mcg/d) 2400 2400 2400 2400'),
 Document(id='da705d93-b99a-447e-b857-de2549c33a8b', metadata={'creationdate': '2020-03-12T17:24:48+05:30', 'creator': 'PyPDF', 'page': 135

In [21]:
from langchain_groq import ChatGroq
model=ChatGroq(model="openai/gpt-oss-20b",groq_api_key=groq_api_key)

In [22]:
from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder

context_system_prompt=("given a chat history and latest user question"
                        "which might refrence context in the chat history,"
                        "formulate a standalone question which can be understood"
                        "without chathistory do NOT answer the question"
                        "just reformulate it if needed and otherwise return it as it is.")

context_prompt=ChatPromptTemplate.from_messages([
    ("system",context_system_prompt),
    MessagesPlaceholder("history"),
    ("human","{input}")
])

In [23]:
from langchain.chains.history_aware_retriever import create_history_aware_retriever
history_retriever=create_history_aware_retriever(model,retriever,context_prompt)

In [24]:
system_prompt=("You are NutriBot, an expert nutrition assistant powered by a Retrieval-Augmented Generation (RAG) system. Your goal is to provide accurate, personalized, and practical nutrition suggestions based on the user's query and relevant nutritional information retrieved from a vector database. Follow these steps:"

"1. **Retrieve and Analyze Context**: Use the provided context from the vector store, which contains nutritional guidelines, food data, dietary restrictions, and healthy recipes. Ensure all suggestions align with evidence-based nutritional science."

"2. **Understand the User’s Query**: Interpret the user’s input, which may include dietary preferences (e.g., vegetarian, vegan, keto), health goals (e.g., weight loss, muscle gain, managing diabetes), allergies, cultural or religious dietary restrictions, or specific questions about foods, meals, or nutrients."

"3. **Personalize the Response**: Tailor your suggestions to the user’s specific needs, preferences, and constraints. If the user provides details like age, weight, activity level, or medical conditions, incorporate these into your recommendations. If details are missing, ask clarifying questions politely to refine your suggestions."

"4. **Provide Actionable Suggestions**: Offer clear, practical advice, such as specific meal ideas, portion sizes, nutrient breakdowns, or simple recipes. Include tips for preparation or substitutions if relevant. Ensure suggestions are realistic and accessible based on common ingredients and cooking skills."

"5. **Maintain a Professional and Friendly Tone**: Communicate in a clear, concise, and encouraging manner, as if you’re a trusted nutritionist. Avoid overly technical jargon unless the user requests it, and ensure your tone is supportive and non-judgmental."

"6. **Handle Edge Cases**: If the user’s query is vague, incomplete, or contains conflicting information, ask clarifying questions or make reasonable assumptions based on general health guidelines. If the retrieved context is insufficient, rely on general nutritional knowledge but acknowledge any limitations."

"7. **Ensure Safety and Accuracy**: Do not suggest foods or diets that could harm the user based on their stated allergies, medical conditions, or restrictions. If a query involves a medical condition requiring professional advice, recommend consulting a healthcare provider."

"Example Query: “I’m a 30-year-old vegetarian looking to gain muscle. I’m allergic to nuts. Suggest a high-protein dinner.”"
"Example Response: Based on your vegetarian diet and nut allergy, I recommend a high-protein dinner of lentil and chickpea curry with quinoa. Lentils and chickpeas provide approximately 15g of protein per cup, and quinoa adds about 8g per cup. Combine 1 cup cooked lentils, 1 cup chickpeas, spinach, and tomatoes in a curry sauce (use coconut milk for creaminess, avoiding nut-based ingredients). Serve with 1 cup cooked quinoa. This meal offers roughly 30g of protein and is rich in fiber and iron. Would you like a detailed recipe or additional meal ideas?"

"Use the retrieved context: {context}"

"If clarification is needed, ask: Could you share any dietary restrictions, allergies, health goals, or specific preferences (e.g., vegan, low-carb, cultural preferences)?")

In [25]:
prompt=ChatPromptTemplate.from_messages([
    ("system",system_prompt),
    MessagesPlaceholder("history"),
    ("human","{input}")
])

In [26]:
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
chain=create_stuff_documents_chain(prompt=prompt,llm=model)

rag_chain=create_retrieval_chain(history_retriever,chain)

In [101]:
from langchain_community.chat_message_histories import ChatMessageHistory

store = {}

def get_session_history(session_id: str):
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

In [28]:
from langchain_core.runnables.history import RunnableWithMessageHistory

rag_chain=RunnableWithMessageHistory(rag_chain,get_session_history,
                                     input_messages_key="input",
                                     history_messages_key="history",
                                     output_messages_key="answer"
    
)

In [29]:
response=rag_chain.invoke({"input":"i am 20 year old male "},
                          config={"configurable":{"session_id":"default"}})


print(["answer"])

['answer']


In [55]:
model1=ChatGroq(model="llama-3.1-8b-instant",groq_api_key=groq_api_key)

In [99]:
prompt1 = (
    "You are a professional AI prompt engineer. Your job is to take the user’s input prompt "
    "and the provided context, and return a highly optimized, precise, and effective prompt for a large language model. "
    "Follow these rules: "
    "1. **Incorporate Context:** Use the context to make the prompt fully relevant. Include any necessary background information to guide the AI. "
    "2. **Clarity & Precision:** Make the instructions unambiguous, concise, and goal-oriented. "
    "3. **Stepwise Guidance:** If the task is complex, break it into clear steps or instructions. "
    "4. **Enhance Creativity:** Make the prompt expressive and insightful while staying on-topic. "
    "5. **Neutral & Safe:** Avoid biased, leading, or unsafe instructions. "
    "**Input:** "
    "User Prompt: {user_prompt} "
    "**Output:** "
    "Return only the **enhanced, context-aware prompt** ready to be used. Do not include explanations or extra text."
)


In [100]:
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

enhance_prompt = ChatPromptTemplate.from_messages([
    ("system", prompt1),
    MessagesPlaceholder(variable_name="history"),
    ("human", "{user_prompt}")
])

In [97]:
enhance_prompt=ChatPromptTemplate.from_messages([
    ("system",prompt1),
    MessagesPlaceholder(variable_name="history"),
    ("human","{input}")
])

In [102]:
chain1 = create_stuff_documents_chain(prompt=enhance_prompt, llm=model1)


ValueError: Prompt must accept context as an input variable. Received prompt with input variables: ['history', 'user_prompt']

In [92]:
enhance_chain1=RunnableWithMessageHistory(rag_chain1,get_session_history,
                                          input_messages_key="input",
                                          history_messages_key="context")

In [93]:
response = enhance_chain1.invoke({
    "input": "how much protein should I eat?"
},
config={"configurable":{"session_id":"default"}})
print(response["answer"])

ValueError: variable context should be a list of base messages, got protein provides 240 kcal, which is 10  percent of her total requirement of 2400 kcal.)Our need for protein is about 1

protein provides 240 kcal, which is 10  percent of her total requirement of 2400 kcal.)Our need for protein is about 1

protein provides 240 kcal, which is 10  percent of her total requirement of 2400 kcal.)Our need for protein is about 1 of type <class 'str'>