# Upload PDF

In [36]:
# PyPDFLoader (it is easy prebuild in the langchain)
from langchain_community.document_loaders import PyPDFLoader
#Load PDF and split the pages 
loader = PyPDFLoader("Research papers for BSCS.pdf")
pages = loader.load_and_split()

In [37]:
#Check the total pages
len(pages)

29

In [40]:
# Print the page 5
pages[5]

Document(metadata={'producer': 'Acrobat Distiller 8.1.0 (Windows)', 'creator': 'Elsevier', 'creationdate': '2025-09-26T16:27:47+00:00', 'crossmarkdomains[1]': 'elsevier.com', 'creationdate--text': '26th September 2025', 'robots': 'noindex', 'elsevierwebpdfspecifications': '7.0.1', 'moddate': '2025-09-26T16:37:55+00:00', 'doi': '10.1016/j.rser.2025.116345', 'title': 'Leveraging machine learning for optimized microgrid management: Advances, applications, challenges, and future directions', 'keywords': 'Microgrid management,Machine learning applications,Edge computing,Federated learning,Generative AI,Renewable energy,Fault detection,Load prediction,Sustainable energy systems', 'subject': 'Renewable and Sustainable Energy Reviews, 226 (2026) 116345. doi:10.1016/j.rser.2025.116345', 'crossmarkdomains[2]': 'sciencedirect.com', 'author': 'Gaurav Singh Negi', 'source': 'Research papers for BSCS.pdf', 'total_pages': 14, 'page': 2, 'page_label': '3'}, page_content='microgrids\nSupervised, Unsupe

In [41]:
#  Split the text into small chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(chunk_size =2000 ,chunk_overlap =100)

documents =splitter.split_documents(pages)

In [42]:

from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings
embeddings =OllamaEmbeddings(model="nomic-embed-text")

In [43]:
vectorDB =Chroma.from_documents(documents,embeddings)

In [44]:
query ="which is Machine learning techniques in microgrids?"

result= vectorDB.similarity_search(query=query,k=4)
result

[Document(id='a02d00ec-0e5d-4186-90d1-d29d03666b7b', metadata={'creator': 'Elsevier', 'page': 5, 'creationdate--text': '26th September 2025', 'author': 'Gaurav Singh Negi', 'producer': 'Acrobat Distiller 8.1.0 (Windows)', 'crossmarkdomains[1]': 'elsevier.com', 'moddate': '2025-09-26T16:37:55+00:00', 'keywords': 'Microgrid management,Machine learning applications,Edge computing,Federated learning,Generative AI,Renewable energy,Fault detection,Load prediction,Sustainable energy systems', 'creationdate': '2025-09-26T16:27:47+00:00', 'source': 'Research papers for BSCS.pdf', 'title': 'Leveraging machine learning for optimized microgrid management: Advances, applications, challenges, and future directions', 'doi': '10.1016/j.rser.2025.116345', 'crossmarkdomains[2]': 'sciencedirect.com', 'total_pages': 14, 'elsevierwebpdfspecifications': '7.0.1', 'page_label': '6', 'subject': 'Renewable and Sustainable Energy Reviews, 226 (2026) 116345. doi:10.1016/j.rser.2025.116345', 'robots': 'noindex'}, 

In [59]:
# Create the chains
# Design ChatPrompt Templete

from langchain_core.prompts import ChatPromptTemplate

prompt =ChatPromptTemplate.from_template(
    """
    You are an expert research assistant. Use the following context to answer the question.
    If the answer cannot be found in the context, say "The answer is not available in the provided documents."
    
    <context>
    {context} 
    </context>
    
    Question:{input}
    """
)

#Context --> means all data or documents which are available in my vector store
# Input --> Querys (Which user ask)


In [60]:
import os
from dotenv import load_dotenv
load_dotenv()

GROQ_API_KEY=os.environ.get('GROQ_API_KEY')
from langchain_groq import ChatGroq
llm=ChatGroq(model="openai/gpt-oss-20b")

In [56]:
# Non RAG Question
res=llm.invoke("what is categories of microgrids?")

In [57]:
res

AIMessage(content='## The “Family Tree” of Microgrids  \nBelow is a practical, multi‑dimensional way to think about microgrid categories.  \nThe same system can belong to several of these families at once, but the taxonomy\nhelps designers, planners, regulators, and researchers talk about the right\nfeatures, constraints, and use‑cases.\n\n| **Dimension** | **Typical Sub‑Categories** | **What It Means** | **Typical Use‑Case / Example** |\n|---------------|----------------------------|-------------------|------------------------------|\n| **Size / Capacity** | •\u202fMini‑grid (<\u202f100\u202fkW)  <br>•\u202fMicrogrid (100\u202fkW\u202f–\u202f10\u202fMW) <br>•\u202fLarge‑scale microgrid (>\u202f10\u202fMW) | Physical power handling & complexity | •\u202fSolar + battery for a single building <br>•\u202fCampus‑wide 1‑MW solar + diesel <br>•\u202fIndustrial plant 20‑MW with multiple sources |\n| **Energy Source Mix** | •\u202fRenewable‑centric (solar, wind, hydro) <br>•\u202fDispatchable‑

In [61]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.output_parsers import StrOutputParser
parser=StrOutputParser()
documents_chain =create_stuff_documents_chain(llm,prompt)
retiever=vectorDB.as_retriever()


In [62]:
#Combine the retiever and chain
from langchain.chains import create_retrieval_chain

retrieval_chain =create_retrieval_chain(retiever,documents_chain)


In [55]:
#Ask the query
response = retrieval_chain.invoke({"input":"what is microgrids?"})
response

{'input': 'what is microgrids?',
 'context': [Document(id='d199a87e-ff9d-47e5-be6c-fbc2de037a3a', metadata={'producer': 'Acrobat Distiller 8.1.0 (Windows)', 'elsevierwebpdfspecifications': '7.0.1', 'total_pages': 14, 'moddate': '2025-09-26T16:37:55+00:00', 'author': 'Gaurav Singh Negi', 'page_label': '2', 'source': 'Research papers for BSCS.pdf', 'creationdate': '2025-09-26T16:27:47+00:00', 'keywords': 'Microgrid management,Machine learning applications,Edge computing,Federated learning,Generative AI,Renewable energy,Fault detection,Load prediction,Sustainable energy systems', 'title': 'Leveraging machine learning for optimized microgrid management: Advances, applications, challenges, and future directions', 'creator': 'Elsevier', 'doi': '10.1016/j.rser.2025.116345', 'crossmarkdomains[2]': 'sciencedirect.com', 'page': 1, 'crossmarkdomains[1]': 'elsevier.com', 'creationdate--text': '26th September 2025', 'subject': 'Renewable and Sustainable Energy Reviews, 226 (2026) 116345. doi:10.101

In [58]:
#Ask the query
response = retrieval_chain.invoke({"input":"what is categories of microgrids?"})
response

{'input': 'what is categories of microgrids?',
 'context': [Document(id='4dc7bb37-4b86-44f2-98fd-a93104e6f906', metadata={'moddate': '2025-09-26T16:37:55+00:00', 'source': 'Research papers for BSCS.pdf', 'elsevierwebpdfspecifications': '7.0.1', 'keywords': 'Microgrid management,Machine learning applications,Edge computing,Federated learning,Generative AI,Renewable energy,Fault detection,Load prediction,Sustainable energy systems', 'page_label': '3', 'crossmarkdomains[1]': 'elsevier.com', 'author': 'Gaurav Singh Negi', 'subject': 'Renewable and Sustainable Energy Reviews, 226 (2026) 116345. doi:10.1016/j.rser.2025.116345', 'crossmarkdomains[2]': 'sciencedirect.com', 'doi': '10.1016/j.rser.2025.116345', 'robots': 'noindex', 'producer': 'Acrobat Distiller 8.1.0 (Windows)', 'title': 'Leveraging machine learning for optimized microgrid management: Advances, applications, challenges, and future directions', 'total_pages': 14, 'creationdate': '2025-09-26T16:27:47+00:00', 'creator': 'Elsevier'

In [63]:
#Ask the query
response = retrieval_chain.invoke({"input":"what is supervised learning? "})
response

{'input': 'what is supervised learning? ',
 'context': [Document(id='e066d958-f790-442a-9ef4-05b4790c8583', metadata={'page': 8, 'producer': '3-Heights(TM) PDF Optimization Shell 4.8.25.2 (http://www.pdf-tools.com)', 'source': '2019BurkovTheHundred-pageMachineLearning.pdf', 'total_pages': 152, 'moddate': '2019-01-22T19:51:34+00:00', 'creationdate': '2018-12-18T05:07:46+00:00', 'creator': 'PyPDF', 'page_label': '9'}, page_content='1.3 How Supervised Learning Works\nIn this section, I brieﬂy explain how supervised learning works so that you have the picture\nof the whole process before we go into detail. I decided to use supervised learning as an\nexample because it’s the type of machine learning most frequently used in practice.\nThe supervised learning process starts with gathering the data. The data for supervised\nlearning is a collection of pairs (input, output). Input could be anything, for example, email\nmessages, pictures, or sensor measurements. Outputs are usually real numbers

In [64]:
llm.invoke("what is supervised learning ?")

AIMessage(content='### Supervised Learning – The Basics\n\n| Term | What it means |\n|------|---------------|\n| **Supervised** | The algorithm is *guided* by labeled data. |\n| **Learning** | The algorithm adjusts its internal parameters to minimize error. |\n| **Model** | A mathematical mapping from input features → output predictions. |\n\nIn short: **Supervised learning is a type of machine‑learning framework in which a model is trained on a dataset that contains both input data and the correct output (labels).** The goal is for the model to learn a function that can predict the correct label for new, unseen inputs.\n\n---\n\n## 1. Core Components\n\n| Component | Role | Example |\n|-----------|------|---------|\n| **Dataset** | Collection of instances | 10,000 images of cats & dogs |\n| **Features** | Input variables | Pixel values, color histograms |\n| **Labels** | Target outputs | “cat” or “dog” |\n| **Model** | Algorithm (e.g., linear regression, neural net) | A convolutional 