In [295]:
!pip install langchain_community langchainhub chromadb langchain langchain-openai



In [296]:
from google.colab import userdata
import os
os.environ['OPENAI_API_KEY'] = userdata.get('RAGKey')

In [297]:
#scraping web application, so using langchain's web base loader
from langchain_community.document_loaders import WebBaseLoader

url = "https://harrypottershop.co.uk/collections/books"

loader = WebBaseLoader(web_paths=[url])

docs = loader.load()
print(docs)

[Document(metadata={'source': 'https://harrypottershop.co.uk/collections/books', 'title': 'Harry Potter Books | Book Set | Harry Potter Shop UK\n\n', 'description': 'Discover Harry Potter books at the Harry Potter Shop. Shop hardbacks, paperbacks, and boxed sets to relive the magic of the Wizarding World again and again.', 'language': 'en'}, page_content="\n\n\n\n\n\n\n\nHarry Potter Books | Book Set | Harry Potter Shop UK\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n      Skip to content\n    \n\n\n\n\n\n\n                Country/region\n              \n\n                    Albania (ALL\n                    L)\n                  \n                    Algeria (DZD\n                    د.ج)\n                  \n                    Andorra (EUR\n                    €)\n                  \n                    Angola (AOA\n                    Kz)\n                  \n                    Anguill

In [298]:
#splitting text so that only necessary information is passed to llm as query
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [299]:
print(len(splits))

119


In [300]:
#embedding splits as vectors and storing the vectors in vector database
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma

vectorstore = Chroma.from_documents(documents=splits,embedding=OpenAIEmbeddings())

In [301]:
print(vectorstore._collection.count())

687


In [302]:
print(vectorstore._collection.get())

{'ids': ['006ccd80-0c31-41f0-85dc-e6889e7e6449', '91d84b2e-4602-4b68-b06f-919a1ae361e2', 'a1c04299-9afd-4f2a-9b1d-7a97830d365b', 'fdd92f51-37e5-4b3a-86ca-f00ccd12424f', '0edf176e-deb8-459e-b314-18b5ca5f0b4d', '0d72d3ec-2c72-4589-b94c-2fe80b13862e', '8e2e8d37-8061-4e87-a310-a7457ca2fb02', '5095a114-2851-4bb6-8997-cb5da414a74c', '545178da-f523-4942-ab28-69a9dccb78ba', '79290b66-a6d3-407b-8880-1ededadbf8f1', '68025515-5f66-4b29-a570-7cee15810853', '2fefe5dd-aa76-4d1e-a660-7f0e23bdabeb', 'ecfdaf55-160e-4511-95af-e33cdbe8e3e2', 'd1abce72-349f-412f-be0c-fb36756ebac3', 'e6d2e58b-2a99-414a-a4d1-cd254f28e3ff', 'b4058e13-d6f8-4400-b026-d5629095b166', '41a0698f-1f9a-44bc-937c-6f63beef2e88', '4da0613d-4dff-4dca-99a0-adfa2419dc38', 'e3000fc2-d12e-4d26-a9ef-ea7aad89ae50', '2bfc37aa-1dac-4681-9ee2-4bcf24a4a4b9', '2826e07c-48da-4e70-ab2c-304440a510b1', '578f3222-a058-4c46-bcf4-5d66fe41338e', '30996e9c-9e84-44f1-8ec4-79ed6bae7aa3', '4363865d-11a7-4e1b-9a9f-dd91fcf014f9', '1ef33073-0dd4-4417-923c-018f41

In [303]:
print("\nCollection 1 -",vectorstore._collection.get(ids=['006ccd80-0c31-41f0-85dc-e6889e7e6449'],include=['embeddings','documents']))


Collection 1 - {'ids': ['006ccd80-0c31-41f0-85dc-e6889e7e6449'], 'embeddings': array([[-0.01130456, -0.02446928,  0.00153104, ..., -0.04207239,
         0.00724231, -0.010607  ]]), 'documents': ['Hands-on Generative AI CourseCoursesBundle CoursesMentorFree ContentTestimonialsFAQLogin Signup Starts on 16th September 2025Hands-on Generative AI CourseLearn, Build, Deploy and Apply Generative AI7 weeks · 3 classes/week · 2 hrs/class + Post-class Doubt SupportClasses on Tue, Wed, Thurs - 9PM ISTAccess all Live BatchesLifetime access of RecordingsAccess Discord CommunityCode availableBuild ProjectsLearn Future-Ready TechEnroll 1Week 1Foundations of Generative AI Introduction to AI Mathematical Foundations for AI Probability, Statistics, and Linear Algebra Basics of Neural Networks Gradient Descent and Optimization Basics Architectures: Feedforward, RNN, and CNN Mini Project - Build a Simple Neural Network Using TensorFlow Mini Project - Train an Autoencoder on the MNIST Dataset2Week 2Deep G

**RAG Pipeline**

In [304]:
#Retrieval
retriever = vectorstore.as_retriever()

In [305]:
#Augmentation
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

In [306]:
#llm
from langchain_openai import ChatOpenAI
llm = ChatOpenAI()

In [307]:
from langchain_core.runnables import RunnablePassthrough #for passing the question as it is
from langchain_core.output_parsers import StrOutputParser

In [308]:
def format_docs(docs):
  return "\n".join(doc.page_content for doc in docs)

In [309]:
#rag chain
rag_chain = ({"context":retriever | format_docs,"question":RunnablePassthrough()}
             | prompt
             | llm
             | StrOutputParser())

In [310]:
#testing rag chain
rag_chain.invoke("What is the price of Harry Potter Film Wizardry Book?")

'The price of the Harry Potter Film Wizardry Book is $56.00 USD.'

In [311]:
from langchain_core.runnables import RunnableLambda

In [312]:
def print_prompt(prompt_text):
  print("Prompt - ",prompt_text)
  return prompt_text

In [313]:
rag_chain_with_print = ({"context":retriever | format_docs,"question":RunnablePassthrough()}
             | prompt
             | RunnableLambda(print_prompt)
             | llm
             | StrOutputParser())

In [314]:
rag_chain_with_print.invoke("What is the price of chamber of secrets?")

Prompt -  messages=[HumanMessage(content="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: What is the price of chamber of secrets? \nContext: New Edition Harry Potter and the Philosopher's Stone (Paperback)\n\n\n\n\nRegular price\n\n          $13.00 USD\n        \n\n\n\nUnit price\n\n\n/\n\xa0per\xa0\n\n\n\n\n\n\n\n\n\nQuick shop\n\n\n\n\nLoading...\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nAdd to wishlist\n\n\n\n\nRemove from wishlist\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nNew Edition Harry Potter and the Chamber of Secrets (Paperback)\n\n\n\n\n\n\nNew Edition Harry Potter and the Chamber of Secrets (Paperback)\n\n\n\n\nRegular price\n\n          $13.00 USD\n        \n\n\n\nUnit price\n\n\n/\n\xa0per\xa0\n\n\n\n\n\n\n\n\n\nQuick shop\n\n\n\n\nLoading...\n\n\n\n\n\n\n\n\n\n\n\n\n

'The price of "Harry Potter and the Chamber of Secrets" is $13.00 USD.'