# Web Page RAG

In [9]:
import os
from dotenv import load_dotenv
from constants import openai_key

load_dotenv()
os.environ["OPENAI_API_KEY"] = openai_key

### Web Scraping

In [10]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(web_paths=["https://www.educosys.com/course/genai"])

docs = loader.load()
print(docs)

USER_AGENT environment variable not set, consider setting it to identify your requests.


[Document(metadata={'source': 'https://www.educosys.com/course/genai', 'title': 'Hands-on Generative AI Course', 'description': 'Learn, Build, Deploy and Apply Generative AI', 'language': 'en'}, page_content="Hands-on Generative AI CourseCoursesBundle CoursesMentorFree ContentTestimonialsFAQLogin Signup Join Anytime - Get LifeTime Access!Hands-on Generative AI CourseLearn, Build, Deploy and Apply Generative AI5 weeks · 3 classes/week · 2 hrs/class + Post-class Doubt SupportAccess all Live BatchesLifetime access of RecordingsAccess Discord CommunityCode availableBuild ProjectsLearn Future-Ready TechEnroll 1Week 1Foundations of Generative AI Introduction to AI Mathematical Foundations for AI Probability, Statistics, and Linear Algebra Basics of Neural Networks Gradient Descent and Optimization Basics Architectures: Feedforward, RNN, and CNN Mini Project - Build a Simple Neural Network Using TensorFlow Mini Project - Train an Autoencoder on the MNIST Dataset2Week 2Deep Generative Models D

### Spliting the docs

In [11]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
splits = text_splitter.split_documents(docs)

In [12]:
print(splits[0])
print(splits[1])
print(splits[2])

page_content='Hands-on Generative AI CourseCoursesBundle CoursesMentorFree ContentTestimonialsFAQLogin Signup Join Anytime - Get LifeTime Access!Hands-on Generative AI CourseLearn, Build, Deploy and Apply Generative AI5 weeks · 3 classes/week · 2 hrs/class + Post-class Doubt SupportAccess all Live BatchesLifetime access of RecordingsAccess Discord CommunityCode availableBuild ProjectsLearn Future-Ready TechEnroll 1Week 1Foundations of Generative AI Introduction to AI Mathematical Foundations for AI Probability, Statistics, and Linear Algebra Basics of Neural Networks Gradient Descent and Optimization Basics Architectures: Feedforward, RNN, and CNN Mini Project - Build a Simple Neural Network Using TensorFlow Mini Project - Train an Autoencoder on the MNIST Dataset2Week 2Deep Generative Models Discriminative and Generative models Generative Adversarial Networks (GANs) Variational Autoencoders (VAEs) Probabilistic Data Generation Using VAEs Four Mini Projects using TensorFlow Metrics Vis

In [13]:
print(len(splits))

20


### Adding the docs to the VectorDB


In [14]:
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma


vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

In [15]:
print(vectorstore._collection.count())

20


In [16]:
print(vectorstore._collection.get())

{'ids': ['fa7de156-3d0c-433d-a14d-d549ed104282', 'c8eadea6-1262-4185-80de-ba0b023e4e9d', '7d02c237-fc8d-49c8-b877-d1788767b248', '2ee00223-b5c8-4159-8775-2b5976f6f07c', '3634458f-5b42-4f5d-976f-46f3595b1835', '93f51321-550d-425e-93e9-56079e8243e5', 'ac0c1798-92ef-45ac-8367-9c3a66e700f0', '98744edb-bf2c-48a1-a425-bd612e96362f', '46768b17-6f0b-4019-b11b-9dcd88925a7d', '07850596-4909-43e9-b7c5-6273b57360c4', 'db5f1ef3-0c34-4882-896c-2d9da29f5885', '0a756ca8-0574-480e-99d5-69a1c0459fb7', '02024b46-62d0-46a8-bc39-ca2c89bc819b', 'e58e1e4c-d5e3-4f89-8843-cb8dcb5dd622', '4b0f1680-6091-4646-8645-de8c92c5a43a', 'd35992a3-86a9-42cc-b65a-8e13a23da2a6', 'd419491a-2420-4ff8-94de-def693e98c52', 'c118af6d-abac-458d-b3f1-970eccb44182', 'f4ac51a4-78d1-4d2f-ad4a-942198bf7782', '0d5b239e-68af-4a3e-ae17-87da1c5db5b2'], 'embeddings': None, 'documents': ['Hands-on Generative AI CourseCoursesBundle CoursesMentorFree ContentTestimonialsFAQLogin Signup Join Anytime - Get LifeTime Access!Hands-on Generative AI C

In [17]:
print("\nCollection 1 - ", vectorstore._collection.get(ids=['5e854824-f5d4-4744-9069-16431329ca06'], include=["embeddings", "documents"]))
print("\nCollection 2 - ", vectorstore._collection.get(ids=['0a74eba2-ffef-47a3-b52c-ef479ce2764d'], include=["embeddings", "documents"]))
print("\nCollection 3 - ", vectorstore._collection.get(ids=['22c92420-1275-4c76-ad89-797b4f6717af'], include=["embeddings", "documents"]))


Collection 1 -  {'ids': [], 'embeddings': array([], dtype=float64), 'documents': [], 'uris': None, 'included': ['embeddings', 'documents'], 'data': None, 'metadatas': None}

Collection 2 -  {'ids': [], 'embeddings': array([], dtype=float64), 'documents': [], 'uris': None, 'included': ['embeddings', 'documents'], 'data': None, 'metadatas': None}

Collection 3 -  {'ids': [], 'embeddings': array([], dtype=float64), 'documents': [], 'uris': None, 'included': ['embeddings', 'documents'], 'data': None, 'metadatas': None}


## RAG Pipline


In [18]:
retriever = vectorstore.as_retriever()

In [19]:
#Makin the prompt
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate.from_template("""
You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. 
Use three sentences maximum and keep the answer concise.

Question: {question}
Context: {context}

Answer:
""")






### LLM chain

In [20]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI()

In [21]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [22]:
# This is to join the relavent informations together and send as context
def format_docs(docs):
  return "\n".join(doc.page_content for doc in docs)

In [23]:
rag_chain = ({"context" : retriever | format_docs, "question" : RunnablePassthrough()}
             | prompt
             | llm
             | StrOutputParser())

In [24]:
rag_chain.invoke("Are the recordings of the course available? For how long?")

'Yes, the recordings of the course are available for lifetime access.'

In [25]:
rag_chain.invoke("Are the testimonials for the course available? Name the studenst who have shared testimonials")

'Yes, testimonials for the course are available. The students who have shared testimonials are Sahitya Raj, Ruthira Sekar, Abhijit Mone, Manika Kaushik, and Gowtamy Reddy Godhala.'

In [26]:
rag_chain.invoke("Who is the teacher in the course")

"The teacher in the Generative AI course is Keerti, who is described as passionate and genuine, making complex topics easy to understand for students. Many participants appreciated her teaching style and dedication to ensuring everyone grasped key concepts before moving on. Keerti's efforts in designing and delivering the course were praised by the participants."