In [1]:
!pip install langchain_community langchainhub chromadb langchain langchain-openai



In [4]:
from google.colab import userdata
import os
os.environ['HUGGING_FACE_API_KEY'] = userdata.get('huggingface')

In [22]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(web_paths=["https://www.educosys.com/course/genai"])

docs = loader.load()
print(docs)

[Document(metadata={'source': 'https://www.educosys.com/course/genai', 'title': 'Hands-on Generative AI Course', 'description': 'Hands-on Generative AI Course', 'language': 'en'}, page_content="Hands-on Generative AI CourseCoursesBundle CoursesMentorFree ContentTestimonialsFAQLogin Signup Ongoing LIVE CourseHands-on Generative AI CourseLearn, Build, Deploy and Apply Generative AIJoin Anytime!Get LifeTime AccessAccess all Live BatchesLifetime access of RecordingsAccess Discord CommunityCode availableBuild ProjectsLearn Future-Ready TechEnroll 1Week 1Foundations of Generative AI Introduction to AI Mathematical Foundations for AI Probability, Statistics, and Linear Algebra Basics of Neural Networks Gradient Descent and Optimization Basics Architectures: Feedforward, RNN, and CNN Mini Project - Build a Simple Neural Network Using TensorFlow Mini Project - Train an Autoencoder on the MNIST Dataset2Week 2Deep Generative Models Discriminative and Generative models Generative Adversarial Netwo

**Chunkings**

In [23]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
splits = text_splitter.split_documents(docs)

In [24]:
print(splits[0])
print(splits[1])
print(splits[2])

page_content='Hands-on Generative AI CourseCoursesBundle CoursesMentorFree ContentTestimonialsFAQLogin Signup Ongoing LIVE CourseHands-on Generative AI CourseLearn, Build, Deploy and Apply Generative AIJoin Anytime!Get LifeTime AccessAccess all Live BatchesLifetime access of RecordingsAccess Discord CommunityCode availableBuild ProjectsLearn Future-Ready TechEnroll 1Week 1Foundations of Generative AI Introduction to AI Mathematical Foundations for AI Probability, Statistics, and Linear Algebra Basics of Neural Networks Gradient Descent and Optimization Basics Architectures: Feedforward, RNN, and CNN Mini Project - Build a Simple Neural Network Using TensorFlow Mini Project - Train an Autoencoder on the MNIST Dataset2Week 2Deep Generative Models Discriminative and Generative models Generative Adversarial Networks (GANs) Variational Autoencoders (VAEs) Probabilistic Data Generation Using VAEs Four Mini Projects using TensorFlow Metrics Visualization using TensorBoard Mini Project - Imple

**Embeddings**

In [25]:

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from google.colab import userdata
import os

In [26]:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = userdata.get("huggingface")

In [27]:
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


**Stored in Chroma Database**

In [28]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from langchain.schema import Document
from transformers import pipeline


In [29]:
vectorstore = Chroma.from_documents(
    documents=docs,
    embedding=embeddings,
    persist_directory="./chroma_db_web"  # optional: persist locally
)

In [30]:
from langchain.vectorstores import Chroma

# --- Load existing vectorstore (if persisted) ---
vectorstore = Chroma(persist_directory="./chroma_db_web", embedding_function=embeddings)

# --- 1️⃣ List all stored document IDs ---
all_ids = vectorstore._collection.get()["ids"]
print(f"✅ Stored document IDs: {all_ids}")

# --- 2️⃣ Get all documents and embeddings ---
all_docs = vectorstore._collection.get(include=["metadatas", "documents", "embeddings"])
for i, doc in enumerate(all_docs["documents"], start=1):
    print(f"\n🔹 Document {i}:")
    print("Text:", doc[:300], "...")  # first 300 chars
    print("Metadata:", all_docs["metadatas"][i-1])
    print("Embedding length:", len(all_docs["embeddings"][i-1]))

✅ Stored document IDs: ['cc975923-a082-490a-95b8-5961fa112554', '04a8e8a7-0d35-4f47-bdb7-f1c03d0e9f6f', '13a6b123-0908-45c4-8b82-10a1bfbb8c8a', '5570e017-336e-4bb7-98a8-f597668f32b5']

🔹 Document 1:
Text: Groq builds hardware accelerators for AI inference. ...
Metadata: None
Embedding length: 384

🔹 Document 2:
Text: LangChain connects LLMs, embeddings, and vector databases. ...
Metadata: None
Embedding length: 384

🔹 Document 3:
Text: Hugging Face provides open-source NLP models and datasets. ...
Metadata: None
Embedding length: 384

🔹 Document 4:
Text: Hands-on Generative AI CourseCoursesBundle CoursesMentorFree ContentTestimonialsFAQLogin Signup Ongoing LIVE CourseHands-on Generative AI CourseLearn, Build, Deploy and Apply Generative AIJoin Anytime!Get LifeTime AccessAccess all Live BatchesLifetime access of RecordingsAccess Discord CommunityCode ...
Metadata: {'title': 'Hands-on Generative AI Course', 'source': 'https://www.educosys.com/course/genai', 'language': 'en', 'description

In [37]:
query = "Wgat is Generative AI?"
results = vectorstore.similarity_search(query, k=2)
print("\n🔎 Top 2 similar documents:")
for i, doc in enumerate(results, start=1):
    print(f"{i}. {doc.page_content[:300]} ...")


🔎 Top 2 similar documents:
1. Hands-on Generative AI CourseCoursesBundle CoursesMentorFree ContentTestimonialsFAQLogin Signup Ongoing LIVE CourseHands-on Generative AI CourseLearn, Build, Deploy and Apply Generative AIJoin Anytime!Get LifeTime AccessAccess all Live BatchesLifetime access of RecordingsAccess Discord CommunityCode ...
2. Groq builds hardware accelerators for AI inference. ...


**Generation Part**

In [38]:
from transformers import pipeline
from langchain.llms import HuggingFacePipeline

# The fix is to use the "text2text-generation" pipeline for T5 models
pipe = pipeline(
    "text2text-generation",
    model="google/flan-t5-base",
    max_new_tokens=256,
    model_kwargs={"temperature": 0.7, "top_k": 50} # Add some parameters for better generation
)

llm = HuggingFacePipeline(pipeline=pipe)

print("✅ LLM Pipeline successfully created!")

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Device set to use cuda:0


✅ LLM Pipeline successfully created!


**Query**

In [39]:
# Assuming 'llm' is now the corrected HuggingFacePipeline object from the step above
# and 'vectorstore' is your Chroma vector store.

from langchain.chains import RetrievalQA
import pprint # Using pprint for cleaner dictionary output

# 1. Create the RetrievalQA chain (as you already did)
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(search_kwargs={"k": 2}), # Retrieve top 2 documents
    return_source_documents=True # This is useful to see where the answer came from
)

# 2. Define a query and run the chain
query = "What is the structure of the Generative AI course?"
result = qa_chain.invoke({"query": query})

# 3. Print the results
print("❓ Query:", query)
print("\n✅ Answer:")
print(result["result"])
print("\n📚 Source Documents:")
pprint.pprint(result["source_documents"])

Token indices sequence length is longer than the specified maximum sequence length for this model (2156 > 512). Running this sequence through the model will result in indexing errors


❓ Query: What is the structure of the Generative AI course?

✅ Answer:
Learning, Build, Deploy and Apply Generative AIJoin Anytime!Get LifeTime AccessAccess all Live BatchesLifetime access of RecordingsAccess Discord CommunityCode availableBuild ProjectsLearn Future-Ready TechEnroll 1Week 1Foundations of Generative AI Introduction to AI Mathematical Foundations for AI Probability, Statistics, and Linear Algebra Basics of Neural Networks Gradient Descent and Optimization Basics Architectures: Feedforward, RNN, and CNN Mini Project - Build a Simple Neural Network Using TensorFlow Mini Project - Train an Autoencoder on the MNIST Dataset2Week 2Deep Generative Models Discriminative and Generative models Generative Adversarial Networks (GANs) Variational Autoencoders (VAEs) Probabilistic Data Generation Using VAEs Four Mini Projects using TensorFlow Metrics Visualization using TensorBoard Mini Project - Implement a GAN to Generate Handwritten Digits Mini Project - Train a VAE to Gene

📚 Sour

In [40]:
query = "Are the testimonials for the course available? Name the studenst who have shared testimonials?"
result = qa_chain.invoke({"query": query})

# 3. Print the results
print("❓ Query:", query)
print("\n✅ Answer:")
print(result["result"])
print("\n📚 Source Documents:")
pprint.pprint(result["source_documents"])

❓ Query: Are the testimonials for the course available? Name the studenst who have shared testimonials?

✅ Answer:
Yes

📚 Source Documents:
[Document(metadata={'language': 'en', 'title': 'Hands-on Generative AI Course', 'description': 'Hands-on Generative AI Course', 'source': 'https://www.educosys.com/course/genai'}, page_content="Hands-on Generative AI CourseCoursesBundle CoursesMentorFree ContentTestimonialsFAQLogin Signup Ongoing LIVE CourseHands-on Generative AI CourseLearn, Build, Deploy and Apply Generative AIJoin Anytime!Get LifeTime AccessAccess all Live BatchesLifetime access of RecordingsAccess Discord CommunityCode availableBuild ProjectsLearn Future-Ready TechEnroll 1Week 1Foundations of Generative AI Introduction to AI Mathematical Foundations for AI Probability, Statistics, and Linear Algebra Basics of Neural Networks Gradient Descent and Optimization Basics Architectures: Feedforward, RNN, and CNN Mini Project - Build a Simple Neural Network Using TensorFlow Mini Projec

In [41]:
query = "What all projects are covered in the course?"
result = qa_chain.invoke({"query": query})

# 3. Print the results
print("❓ Query:", query)
print("\n✅ Answer:")
print(result["result"])
print("\n📚 Source Documents:")
pprint.pprint(result["source_documents"])

❓ Query: What all projects are covered in the course?

✅ Answer:
Basic neural networks to advanced frameworks and architectures

📚 Source Documents:
[Document(metadata={'source': 'https://www.educosys.com/course/genai', 'title': 'Hands-on Generative AI Course', 'description': 'Hands-on Generative AI Course', 'language': 'en'}, page_content="Hands-on Generative AI CourseCoursesBundle CoursesMentorFree ContentTestimonialsFAQLogin Signup Ongoing LIVE CourseHands-on Generative AI CourseLearn, Build, Deploy and Apply Generative AIJoin Anytime!Get LifeTime AccessAccess all Live BatchesLifetime access of RecordingsAccess Discord CommunityCode availableBuild ProjectsLearn Future-Ready TechEnroll 1Week 1Foundations of Generative AI Introduction to AI Mathematical Foundations for AI Probability, Statistics, and Linear Algebra Basics of Neural Networks Gradient Descent and Optimization Basics Architectures: Feedforward, RNN, and CNN Mini Project - Build a Simple Neural Network Using TensorFlow Mi