In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
HUGGINGFACEHUB_API_KEY = os.getenv("HUGGINGFACEHUB_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [2]:
import requests
url = 'https://en.wikipedia.org/wiki/Capgemini'
response = requests.get(url)
print(response)

<Response [200]>


In [3]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(response.text,"html.parser")
data = "\n".join([p.get_text() for p in soup.select("p")])

In [4]:
with open("capgemini.txt",'w',encoding='utf-8') as f:
    f.write(data)

In [5]:
from langchain_community.document_loaders import TextLoader
loader = TextLoader("capgemini.txt",encoding="utf-8")
document = loader.load()

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(separators="\n",chunk_size=300, chunk_overlap=30)
text = text_splitter.split_documents(document)

In [7]:
from langchain_community.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [8]:
from langchain_community.vectorstores import FAISS
vectorstore = FAISS.from_documents(text,embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k":5})

In [9]:
from langchain_community.llms import Ollama

In [10]:
llmnew = Ollama(model="gemma3:4b")

  llmnew = Ollama(model="gemma3:4b")


In [11]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [12]:
template = """
{input}Based on this input search answer in here {context} if found then give else return not found message
"""

In [13]:
prompt = ChatPromptTemplate.from_template(template)

In [14]:
output_parser = StrOutputParser()

In [15]:
rag_chain = (
    {"context":retriever, "input": RunnablePassthrough()}
    | prompt
    | llmnew
    | output_parser
)

In [16]:
rag_chain.invoke("What is Capgemini?")

'Capgemini SE is a French multinational information technology (IT) services and consulting company, headquartered in Paris, France. As of 2025, it has over 340,000 employees in approximately 50 countries.'

In [17]:
rag_chain.invoke("Is Machine Learning a subset of AI?")

'not found message'