In [None]:
# Install required LangChain, ChromaDB, and gemini packages for building the RAG agent

!pip install chromadb langchain langchain-core langchain-community langchain-google-genai google-generativeai

In [None]:
!pip install python-dotenv

In [3]:
from dotenv import load_dotenv
import os

# Load the .env file
load_dotenv()

# Access the API key
api_key = os.getenv("API_KEY")
os.environ['GOOGLE_API_KEY'] = api_key # Replace with your actual key

In [4]:
# Load web page content from the given Coursera URL using LangChain's WebBaseLoader

from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(web_paths=["https://www.coursera.org/learn/deep-neural-network"])

docs = loader.load()
#print(docs)

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [5]:
# Split loaded documents into chunks of 1000 characters with 200-character overlap for better context handling

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
splits = text_splitter.split_documents(docs)

In [6]:
# checking the splits, what info in each split(docs), how many splits
print(splits[1])
print(splits[2])
print(len(splits))

page_content='For IndividualsFor BusinessesFor UniversitiesFor GovernmentsExploreOnline DegreesCareersLog InJoin for Free0Improving Deep Neural Networks: Hyperparameter Tuning, Regularization and OptimizationAboutOutcomesModulesRecommendationsTestimonialsReviewsBrowseData ScienceMachine LearningImproving Deep Neural Networks: Hyperparameter Tuning, Regularization and OptimizationThis course is part of Deep Learning SpecializationInstructors: Andrew Ng +2 moreInstructorsInstructor ratingsWe asked all learners to give feedback on our instructors based on the quality of their teaching style.4.9 (4,890 ratings)Top InstructorAndrew NgDeepLearning.AI51 Courses•8,638,592 learnersTop InstructorKian KatanforooshDeepLearning.AI22 Courses•1,615,952 learnersTop InstructorYounes Bensouda MourriDeepLearning.AI23 Courses•1,620,585 learnersOKTop InstructorEnroll for FreeStarts Jun 5602,204 already enrolled3 modulesGain insight into a topic and learn the fundamentals.4.9(63,394 reviews)Intermediate lev

In [7]:
# converting textual info into numbers, vector embeddings and storing them in vector store
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import Chroma

# Specify the model name for the embeddings
vectorstore = Chroma.from_documents(documents=splits, embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"))

In [8]:
#created 20 vectors
print(vectorstore._collection.count())

20


In [9]:
#checking how vectors(UID) lookslike
#print(vectorstore._collection.get())

Here we can see the embeddings[array of numbers] and documents that are attached to first UID. Also you can see the text that is related to first UID. We can check this for multiple vectors by changing the value of 'ids'

In [10]:
print("\n Collection-1 ", vectorstore._collection.get(ids = ['3f407c61-709f-4980-8e71-05c58ef84a1b'], include=['embeddings', 'documents']))


 Collection-1  {'ids': [], 'embeddings': array([], dtype=float64), 'documents': [], 'uris': None, 'included': ['embeddings', 'documents'], 'data': None, 'metadatas': None}


In [11]:
# RAG pipeline
retriever = vectorstore.as_retriever()

In [12]:
#Agumentation
# Create a LANGSMITH_API_KEY in Settings > API Keys
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")



In [13]:
#setting up llm
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")

In [14]:
#the prompt given by the user is passed as it is to 'question' using 'RunnablePassthrough'
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [15]:
# we are joining all the docs form retriver relevent to context
def format_docs(docs):
  return "\n".join(doc.page_content for doc in docs)

In [16]:
#pass the context and question to 'prompt', for 'llm', we only need context as output
rag_chain = ({"context": retriever | format_docs, "question": RunnablePassthrough()}
             | prompt
             | llm
             | StrOutputParser())

In [17]:
#Getting o/p from the llm using rag pipeline
rag_chain.invoke("What is course name")

'The provided context mentions a "Deep Learning Specialization" course, which includes "Artificial Intelligence and Machine Learning (AI/ML)." It also references other courses and specializations in technical, analytical, and business skills. However, it does not explicitly state the name of a single course.'

In [18]:
rag_chain.invoke("What is rating of this course")

'The course has a rating of 4.9 based on 63,394 reviews. 88.21% of the reviews gave it 5 stars and 10.57% gave it 4 stars. A few reviews mention the structure, assignments, and tutorials were great.'

In [19]:
rag_chain.invoke("How many modules in the course")

'Based on the context, there are three modules mentioned in the course. These modules cover topics such as optimization algorithms, hyperparameter tuning, and programming frameworks. The modules include videos, readings, and assignments.'

In [20]:
rag_chain.invoke("What is the price of this course")

'To access graded assignments and earn a Certificate, you need to purchase the Certificate experience. If you only want to read and view the course content, you can audit the course for free. Financial aid or a scholarship may also be available if you cannot afford the enrollment fee.'

In [21]:
rag_chain.invoke("give any testimonial of the course")

'Jennifer J., a learner since 2020, said that learning at her own pace and rhythm on Coursera has been an amazing experience. Larry W., a learner since 2021, said that he directly applied the concepts and skills he learned from his courses to an exciting new project at work. One reviewer loved the structure of the course, the assignments, and the tutorials.'