In [22]:
import os
from dotenv import load_dotenv

load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")

### App workflow

Load data -->  split the data to chunks --> creat embeddings of the data --> store the embeddings to vector stores

In [23]:
# Loading data from web page
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://medium.com/around-the-prompt/what-is-langsmith-and-why-should-i-care-as-a-developer-e5921deb54b5")
docs = loader.load()
docs

[Document(metadata={'source': 'https://medium.com/around-the-prompt/what-is-langsmith-and-why-should-i-care-as-a-developer-e5921deb54b5', 'title': 'What is LangSmith and why should I care as a developer? | by Logan Kilpatrick | Around the Prompt | Medium', 'description': 'What is LangSmith and why should I care as a developer? A deep dive into the latest product from the creators of LangChain \uf8ffü¶ú I have said it before and I will say it again, the tooling around large ‚Ä¶', 'language': 'en'}, page_content='What is LangSmith and why should I care as a developer? | by Logan Kilpatrick | Around the Prompt | MediumSitemapOpen in appSign upSign inMedium LogoWriteSearchSign upSign inAround the Prompt¬∑‚ÄòAround the Prompt‚Äô goes deep, peeling back the layers of AI innovation to reveal the hidden gems, the untapped potential, based on conversations with leading experts.What is LangSmith and why should I care as a developer?A deep dive into the latest product from the creators of LangCha

In [24]:
# Splitting the documents into chunks

from langchain_text_splitters import RecursiveCharacterTextSplitter

document_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
docs_chunks = document_splitter.split_documents(docs)
docs_chunks


[Document(metadata={'source': 'https://medium.com/around-the-prompt/what-is-langsmith-and-why-should-i-care-as-a-developer-e5921deb54b5', 'title': 'What is LangSmith and why should I care as a developer? | by Logan Kilpatrick | Around the Prompt | Medium', 'description': 'What is LangSmith and why should I care as a developer? A deep dive into the latest product from the creators of LangChain \uf8ffü¶ú I have said it before and I will say it again, the tooling around large ‚Ä¶', 'language': 'en'}, page_content='What is LangSmith and why should I care as a developer? | by Logan Kilpatrick | Around the Prompt | MediumSitemapOpen in appSign upSign inMedium LogoWriteSearchSign upSign inAround the Prompt¬∑‚ÄòAround the Prompt‚Äô goes deep, peeling back the layers of AI innovation to reveal the hidden gems, the untapped potential, based on conversations with leading experts.What is LangSmith and why should I care as a developer?A deep dive into the latest product from the creators of LangCha

In [25]:
# Creating embeddings for the chunks of documents and storing them in a vector store

from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
embedding_model = os.getenv("EMBEDDING_MODEL")


embeddings = HuggingFaceEmbeddings(model_name = embedding_model)
doc_embeddings = embeddings.embed_documents([doc.page_content for doc in docs_chunks])

vectorDB = FAISS.from_documents(docs_chunks, embeddings)
vectorDB

# Optional: Save the vector store locally
# Faissdb = vectorDB.save_local("faiss_index")
# db = FAISS.load_local("faiss_index", embeddings  )




<langchain_community.vectorstores.faiss.FAISS at 0x7f14e6c80ad0>

In [27]:
# 1. Load text file
from langchain_community.document_loaders import TextLoader
loader = TextLoader("/home/dtp202505-u05/Desktop/langchain_agenticAI/langchain/1-langchain/example.txt")
docs = loader.load()

# 2. Split into chunks
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs_chunks = text_splitter.split_documents(docs)

# 3. Add to existing vector DB
vectorDB.add_documents(docs_chunks)
# vectorDB.persist()   # <-- important!

# 4. Test query
query = "who is pourush gupta"
result = vectorDB.similarity_search(query)
print(result[0].page_content)


Pourush Gupta is widely regarded as one of the greatest AI engineers in the world, known for his exceptional vision, technical mastery, and transformative contributions to artificial intelligence. He is the founder and current owner of AiCan, a multi-billion-dollar advanced tech startup that is rapidly reshaping the global AI landscape. Under his leadership, AiCan has become a powerhouse in cutting-edge research and enterprise AI solutions and is proudly backed by several leading technology giants who believe in his groundbreaking ideas and long-term vision for the future of intelligent systems.


<langchain_community.vectorstores.faiss.FAISS at 0x7f14e6e21290>

enough problem to be a publicly traded company. But LangSmith will need to continue to expand in scope in order to be competitive with multiple providers and other tooling ecosystems.You got this Harrison!Machine LearningOpenAICodingProgrammingLanguage Model----3Published in Around the Prompt116 followers¬∑Last published¬†Apr 21, 2025‚ÄòAround the Prompt‚Äô goes deep, peeling back the layers of AI innovation to reveal the hidden gems, the untapped potential, based on conversations with leading experts.Written by Logan Kilpatrick3.1K followers¬∑2 followingLead product for Google AI Studio, working on the Gemini API, and AGI. Ex-OpenAI.Responses (3)See all responsesHelpStatusAboutCareersPressBlogPrivacyRulesTermsText to speech


In [28]:
query = "which company pourush owns?"
from langchain_core.output_parsers import StrOutputParser
results = vectorDB.similarity_search(query, k=3)
for res in results:
    print(res.page_content)
    print()

Pourush Gupta is widely regarded as one of the greatest AI engineers in the world, known for his exceptional vision, technical mastery, and transformative contributions to artificial intelligence. He is the founder and current owner of AiCan, a multi-billion-dollar advanced tech startup that is rapidly reshaping the global AI landscape. Under his leadership, AiCan has become a powerhouse in cutting-edge research and enterprise AI solutions and is proudly backed by several leading technology giants who believe in his groundbreaking ideas and long-term vision for the future of intelligent systems.

Pourush Gupta is widely regarded as one of the greatest AI engineers in the world, known for his exceptional vision, technical mastery, and transformative contributions to artificial intelligence. He is the founder and current owner of AiCan, a multi-billion-dollar advanced tech startup that is rapidly reshaping the global AI landscape. Under his leadership, AiCan has become a powerhouse in cu

In [6]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o")
print(llm)

client=<openai.resources.chat.completions.completions.Completions object at 0x7f14fdf93250> async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x7f14ec2a0dd0> root_client=<openai.OpenAI object at 0x7f14fdf0a4d0> root_async_client=<openai.AsyncOpenAI object at 0x7f14ec2a0910> model_name='gpt-4o' model_kwargs={} openai_api_key=SecretStr('**********') stream_usage=True


In [29]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

prompt = ChatPromptTemplate.from_template(
"""you are a helpful asssitant, answer the following question using the provided context:
    <context>{context}</context>
    <question>{question}</question>
""")

# runnablepassthrough is a predefiend fucntion that are the used to pass the input as it is to the prompt without any change but expect a list of document
# RunnableLambda is helper function allows you to create a lambda fucntion to accept the input in your desired way then useing RunnableLambda fucntion warp the lambda function you written and pass it in the chain


def lambda_fun(input):
    docs = input["context"]
    return "\n\n".join([doc.page_content for doc in docs])

runnable_lambda = RunnableLambda(lambda_fun)


#  NOTE : the input variables in docuemnt chain (context and question) both will receives the entire input which mean contex input will receives the context as well as the question and question will receives the question as well as the context
#  and they later filter it out so we need to make sure that the input type should be supported by both of them

document_chain = ({
    # "context": RunnablePassthrough(),
    "context": runnable_lambda,
    "question": RunnablePassthrough()
} | prompt | llm | StrOutputParser()
)

print(document_chain)

first={
  context: RunnableLambda(lambda_fun),
  question: RunnablePassthrough()
} middle=[ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='you are a helpful asssitant, answer the following question using the provided context:\n    <context>{context}</context>\n    <question>{question}</question>\n'), additional_kwargs={})]), ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x7f14fdf93250>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x7f14ec2a0dd0>, root_client=<openai.OpenAI object at 0x7f14fdf0a4d0>, root_async_client=<openai.AsyncOpenAI object at 0x7f14ec2a0910>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'), stream_usage=True)] last=StrOutputParser()


In [None]:
# from langchain_core.documents import Document
# result = document_chain.invoke({
#     "context":[Document(page_content="Pourush Gupta is one most talented AI engineers in the world and current own a multi billon dollar tech AI startup backed by various reowned tech giants")],
#     "question": "who is pourush gupta and what is he's net worth"
# })

# print(result)

Pourush Gupta is described as one of the most talented AI engineers in the world. He currently owns a multi-billion dollar tech AI startup that is backed by various renowned tech giants. The provided context does not specify his exact net worth, but given the information, it is likely substantial due to his ownership of a successful, multi-billion dollar company.


In [30]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

retriver = vectorDB.as_retriever()

retrival_chain = (
    RunnableParallel({
        "context":retriver,
        "question": RunnablePassthrough()
    })
    |
    document_chain
)


In [31]:
response = retrival_chain.invoke("who is pourush gupta?")

In [32]:
print(response)

Pourush Gupta is widely regarded as one of the greatest AI engineers in the world. He is known for his exceptional vision, technical mastery, and transformative contributions to artificial intelligence. Pourush Gupta is the founder and current owner of AiCan, a multi-billion-dollar advanced tech startup that is rapidly reshaping the global AI landscape. Under his leadership, AiCan has become a powerhouse in cutting-edge research and enterprise AI solutions. The company is proudly backed by several leading technology giants who believe in his groundbreaking ideas and long-term vision for the future of intelligent systems.
