# RAG PROJECT

In [63]:
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
import os
load_dotenv()
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
llm = ChatGoogleGenerativeAI(model="models/gemini-2.5-flash", google_api_key=GEMINI_API_KEY)


In [64]:
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader(web_paths=["https://educosys.com/course/genai"])
docs = loader.load()
print(docs)
 # Optional: prints your API key

[Document(metadata={'source': 'https://educosys.com/course/genai', 'title': 'Hands-on Generative AI Course', 'description': 'Hands-on Generative AI Course', 'language': 'en'}, page_content="Hands-on Generative AI CourseCoursesBundle CoursesMentorFree ContentTestimonialsFAQLogin Signup Hands-on Generative AI CourseLearn, Build, Deploy and Apply Generative AI7 weeks · 3 classes/week · 2 hrs/class + Post-class Doubt SupportAccess all Live BatchesLifetime access of RecordingsAccess Discord CommunityCode availableBuild ProjectsLearn Future-Ready TechEnroll 1Week 1Foundations of Generative AI Introduction to AI Mathematical Foundations for AI Probability, Statistics, and Linear Algebra Basics of Neural Networks Gradient Descent and Optimization Basics Architectures: Feedforward, RNN, and CNN Mini Project - Build a Simple Neural Network Using TensorFlow Mini Project - Train an Autoencoder on the MNIST Dataset2Week 2Deep Generative Models Discriminative and Generative models Generative Adversa

In [65]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [66]:
print(splits[0])
print(splits[1])
print(len(splits))


page_content='Hands-on Generative AI CourseCoursesBundle CoursesMentorFree ContentTestimonialsFAQLogin Signup Hands-on Generative AI CourseLearn, Build, Deploy and Apply Generative AI7 weeks · 3 classes/week · 2 hrs/class + Post-class Doubt SupportAccess all Live BatchesLifetime access of RecordingsAccess Discord CommunityCode availableBuild ProjectsLearn Future-Ready TechEnroll 1Week 1Foundations of Generative AI Introduction to AI Mathematical Foundations for AI Probability, Statistics, and Linear Algebra Basics of Neural Networks Gradient Descent and Optimization Basics Architectures: Feedforward, RNN, and CNN Mini Project - Build a Simple Neural Network Using TensorFlow Mini Project - Train an Autoencoder on the MNIST Dataset2Week 2Deep Generative Models Discriminative and Generative models Generative Adversarial Networks (GANs) Variational Autoencoders (VAEs) Probabilistic Data Generation Using VAEs Four Mini Projects using TensorFlow Metrics Visualization using TensorBoard Mini P

In [67]:
# from langchain.embeddings.openai import Embeddings
# from langchain.vectorstores import Chroma

# vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding)


In [68]:
print(vectorstore._collection.count())

33


In [69]:
print(vectorstore._collection.get())

{'ids': ['3d4185c4-e991-4ec9-9c17-2ed15f1c2bed', 'e4e0b964-86f2-4b11-b1df-63b339ef2150', '30c9a24d-725a-4ada-8af9-60318da40763', '9f0c7e1c-eed8-4b40-b34e-8eb9c4061a5d', '091b6076-a331-4147-912d-b8914b0d8aed', '6ec242be-f0b5-49da-b1c5-e1720eb682b9', '129266e1-acb9-487f-a7d2-b2df45666d03', '6471c5e7-a2c7-4ea1-bd74-3683fadc05aa', 'bffab2a5-f78b-4fd0-9069-9ec82aa73849', '3e0a9567-0154-4ee5-8882-cdc31f74e0d0', '3a6e95bd-3eb4-4c9d-8c90-bc1c4d10796e', '062745b7-9691-4798-8b7e-dedf44a9aaed', 'f67df641-c395-4683-bca6-bc48d6c9ca53', '75086835-fe11-4bc1-9d30-40322c01428e', '873e0784-d550-473e-b9e4-3d3373eb0451', '412d3ce8-f95d-4c2f-bbb1-a8fd5a61a5c2', 'ad058fcc-5b0f-4665-b39c-aca02c81495f', 'd43aba9b-45d7-470c-809d-8d34de3371b9', 'aa8d1f36-2d10-4f0a-9209-0f006559cb68', 'bdd73c6f-6297-4d24-b54c-821953650123', 'e22b49bd-7071-45bd-a623-dc585cff4ef5', '5cb03837-2d18-4680-80ee-6c586100d676', 'c534d363-f167-4fa0-a555-554c455ea350', 'e0da477f-2821-4193-afcd-e0e030d7a956', '7421e995-450a-4a31-83a2-88d4bf

In [70]:
print("\nCollection 1- ", vectorstore._collection.get(ids=['3d4185c4-e991-4ec9-9c17-2ed15f1c2bed'], include=["embeddings", "documents", "metadatas"]))


Collection 1-  {'ids': ['3d4185c4-e991-4ec9-9c17-2ed15f1c2bed'], 'embeddings': array([[-9.63062197e-02, -8.97593945e-02,  4.69975807e-02,
        -1.12861283e-02, -2.90810987e-02,  2.91140247e-02,
        -2.46875044e-02, -3.76585685e-02, -1.09929703e-01,
        -5.26918583e-02, -1.38205616e-02, -9.62873027e-02,
        -6.02138275e-03, -9.81266424e-03, -5.78716546e-02,
         1.95925999e-02,  7.46515766e-02,  5.44828102e-02,
        -2.68450361e-02, -6.09801942e-03,  1.87825412e-02,
         5.61191812e-02,  1.51571156e-02, -4.96513844e-02,
         5.62588908e-02,  2.29792576e-02,  2.53116768e-02,
        -6.27807006e-02,  7.78359845e-02, -2.71667168e-02,
         1.09117981e-02,  4.46829088e-02,  1.65329948e-02,
         4.48449291e-02, -2.69264132e-02,  7.19821751e-02,
        -8.32602307e-02,  2.81116646e-02,  5.03980704e-02,
         7.80369202e-03, -6.50567785e-02,  4.00032755e-03,
         2.20578518e-02, -4.91427891e-02,  5.53391315e-02,
         2.31062118e-02, -4.1502054

In [71]:
retriever = vectorstore.as_retriever()

In [72]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")



In [73]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [74]:
def format_docs(docs):
    return "\n".join(doc.page_content for doc in docs)

In [75]:
rag_chain = ({"context" : retriever | format_docs, "question" : RunnablePassthrough()}
                | prompt
                | llm
                | StrOutputParser()
)


In [76]:
rag_chain.invoke("Are the recordings of courses available? For how long?")

'Yes, you can watch the class recordings. However, the provided context does not specify for how long the recordings are available.'

In [None]:
rag_chain.invoke("how many gen ai cource availabe?")


'Based on the provided context, the text repeatedly refers to "the GenAI course" or "this course," indicating a single Generative AI course. The context does not specify how many GenAI courses are available.'

In [81]:
rag_chain.invoke("testimonial ?")

"Testimonials are real stories and voices of delight from students sharing their learning journey and satisfaction. They reflect the quality, dedication, and excellence of the educational experience. For example, Sahitya Raj A's feedback on the Educosys Generative AI course is presented as a testimonial."

In [80]:
rag_chain.invoke("are the testimonials for the course available?")

"Yes, testimonials for the course are available. The provided context includes a testimonial from Manika Kaushik, a Senior Software Engineer at Optum-United HealthGroup. She highly recommends the course and praises the instructor's clear explanations."