In [27]:
# Install required LangChain, ChromaDB, and gemini packages for building the RAG agent

pip install chromadb langchain langchain-core langchain-community langchain-google-genai google-generativeai

INFO: pip is looking at multiple versions of google-generativeai to determine which version is compatible with other requirements. This could take a while.
Collecting google-generativeai
  Downloading google_generativeai-0.8.4-py3-none-any.whl.metadata (4.2 kB)
  Downloading google_generativeai-0.8.3-py3-none-any.whl.metadata (3.9 kB)
  Downloading google_generativeai-0.8.2-py3-none-any.whl.metadata (3.9 kB)
  Downloading google_generativeai-0.8.1-py3-none-any.whl.metadata (3.9 kB)
  Downloading google_generativeai-0.8.0-py3-none-any.whl.metadata (3.9 kB)
  Downloading google_generativeai-0.7.2-py3-none-any.whl.metadata (4.0 kB)
  Downloading google_generativeai-0.7.1-py3-none-any.whl.metadata (3.9 kB)
INFO: pip is still looking at multiple versions of google-generativeai to determine which version is compatible with other requirements. This could take a while.
  Downloading google_generativeai-0.7.0-py3-none-any.whl.metadata (3.9 kB)
  Downloading google_generativeai-0.6.0-py3-none-an

In [1]:
# Retrieve and set OpenAI API key securely from Colab userdata environment

from google.colab import userdata
import os
os.environ['GOOGLE_API_KEY'] = 'AIzaSyA-oVeRrKA2H-HLyd51ozHy2GGv4b8YFVc' # Replace with your actual key

In [59]:
# Load web page content from the given Coursera URL using LangChain's WebBaseLoader

from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(web_paths=["https://www.coursera.org/learn/deep-neural-network"])

docs = loader.load()
#print(docs)

In [3]:
# Split loaded documents into chunks of 1000 characters with 200-character overlap for better context handling

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
splits = text_splitter.split_documents(docs)

In [4]:
# checking the splits, what info in each split(docs), how many splits
print(splits[1])
print(splits[2])
print(len(splits))

page_content='For IndividualsFor BusinessesFor UniversitiesFor GovernmentsExploreOnline DegreesCareersLog InJoin for Free0Improving Deep Neural Networks: Hyperparameter Tuning, Regularization and OptimizationAboutOutcomesModulesRecommendationsTestimonialsReviewsBrowseData ScienceMachine LearningImproving Deep Neural Networks: Hyperparameter Tuning, Regularization and OptimizationThis course is part of Deep Learning SpecializationInstructors: Andrew Ng +2 moreInstructorsInstructor ratingsWe asked all learners to give feedback on our instructors based on the quality of their teaching style.4.9 (4,890 ratings)Top InstructorAndrew NgDeepLearning.AI51 Courses•8,633,209 learnersTop InstructorKian KatanforooshDeepLearning.AI22 Courses•1,615,602 learnersTop InstructorYounes Bensouda MourriDeepLearning.AI23 Courses•1,620,235 learnersOKTop InstructorEnroll for FreeStarts Jun 4602,102 already enrolled3 modulesGain insight into a topic and learn the fundamentals.4.9(63,394 reviews)Intermediate lev

In [5]:
# converting textual info into numbers, vector embeddings and storing them in vector store
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import Chroma

# Specify the model name for the embeddings
vectorstore = Chroma.from_documents(documents=splits, embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"))

In [6]:
#created 20 vectors
print(vectorstore._collection.count())

20


In [60]:
#checking how vectors(UID) lookslike
#print(vectorstore._collection.get())

Here we can see the embeddings[array of numbers] and documents that are attached to first UID. Also you can see the text that is related to first UID. We can check this for multiple vectors by changing the value of 'ids'

In [58]:
print("\n Collection-1 ", vectorstore._collection.get(ids = ['3f407c61-709f-4980-8e71-05c58ef84a1b'], include=['embeddings', 'documents']))


 Collection-1  {'ids': [], 'embeddings': array([], dtype=float64), 'documents': [], 'uris': None, 'included': ['embeddings', 'documents'], 'data': None, 'metadatas': None}


In [9]:
# RAG pipeline
retriever = vectorstore.as_retriever()

In [10]:
#Agumentation
# Create a LANGSMITH_API_KEY in Settings > API Keys
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")



In [49]:
#setting up llm
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")

In [50]:
#the prompt given by the user is passed as it is to 'question' using 'RunnablePassthrough'
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [51]:
# we are joining all the docs form retriver relevent to context
def format_docs(docs):
  return "\n".join(doc.page_content for doc in docs)

In [52]:
#pass the context and question to 'prompt', for 'llm', we only need context as output
rag_chain = ({"context": retriever | format_docs, "question": RunnablePassthrough()}
             | prompt
             | llm
             | StrOutputParser())

In [53]:
#Getting o/p from the llm using rag pipeline
rag_chain.invoke("What is course name")

'The context discusses the Deep Learning Specialization, which includes multiple courses. The second course of the Deep Learning Specialization is mentioned. However, the name of the course is not specified in the provided context.'

In [43]:
rag_chain.invoke("What is rating of this course")

"The course has an average rating of 4.96 stars based on 3,394 reviews. Instructor ratings are also high, with an average of 4.9 (4,890 ratings). Many learners have provided positive feedback on the course content and the instructors' teaching styles."

In [55]:
rag_chain.invoke("How many modules in the course")

'The course has a total of three modules. These modules cover topics such as optimization algorithms, hyperparameter tuning, and TensorFlow. The course also provides insight into the fundamentals of deep learning.'

In [56]:
rag_chain.invoke("What is the price of this course")

'The price of the course is not explicitly mentioned in the context. However, the text mentions an enrollment fee and the possibility of financial aid or scholarships if you cannot afford it. You can also audit the course for free to access the content.'

In [57]:
rag_chain.invoke("give any testimonial of the course")

'Yes, there are testimonials for the course. One learner, NNC, mentioned that the course really helped them gain a detailed understanding of optimization techniques such as RMSprop and Adam, as well as the inner workings of batch normalization. Another learner, J.Learner since 2020, said that they directly applied the concepts and skills they learned from the courses to an exciting new project at work.'