# Simple Gen AI APP Using Langchain

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY']=os.getenv("OPENAI_API_KEY")
## Langsmith Tracking
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")

## Data Ingestion
- From the website we need to scrape the data

In [None]:

from langchain_community.document_loaders import WebBaseLoader

In [None]:
loader = WebBaseLoader("https://en.wikipedia.org/wiki/World_Wide_Web")
loader

In [None]:
docs = loader.load()
docs

In [None]:
docs[0].page_content

### Load Data--> Docs--> Divide our Docuemnts into chunks dcouments--> text--> vectors--> Vector Embeddings ---> Vector Store DB

In [None]:

from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)

In [None]:
documents

In [None]:
from langchain_openai import OpenAIEmbeddings
embeddings=OpenAIEmbeddings()

In [None]:
from langchain_community.vectorstores import FAISS

vectorstoredb = FAISS.from_documents(documents,embeddings)

In [None]:
vectorstoredb

In [None]:
## Query From a vector db

# gives similar sentences as that of the query

query="who invented World wide web ?"
result=vectorstoredb.similarity_search(query)
result[0].page_content

In [None]:
from langchain_openai import ChatOpenAI
llm=ChatOpenAI(model="gpt-4o")

In [None]:
## Retrieval Chain, Document chain

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


# {context} is the info that i am giving to the LLM regarding the info about the documents
prompt = ChatPromptTemplate.from_template(
    """
Answer the following question based only on the provided context:
<context>
{context}
</context>

Question: {input}
Answer:
"""
)


# document_chain will be responsible in providing my prompt template the above specific context info

document_chain=create_stuff_documents_chain(llm,prompt)
document_chain

In [None]:
from langchain_core.documents import Document

# Adding context manually

document_chain.invoke({
    "input":"who invented World wide web ?",
    "context":[Document(page_content="The Web was invented by English computer scientist Tim Berners-Lee while at CERN in 1989 and opened to the public in 1993. It was conceived as a universal linked information system.[4][5][6] Documents and other media content are made available to the network through web servers and can be accessed by programs such as web browsers. Servers and resources on the World Wide Web are identified and located through character strings called uniform resource locators (URLs).")]
})

### However, we want the documents to first come from the retriever we just set up. That way, we can use the retriever to dynamically select the most relevant documents and pass those in for a given question.

In [None]:
### Input ---> Retriever---> vectorstoredb
# Retriever is an interface that gets the data from the db 
# we even dont have to do similarity search

vectorstoredb

In [None]:
retriever=vectorstoredb.as_retriever()

from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain)
# document_chain is responsible in giving the context information


In [None]:
retrieval_chain

In [None]:
## Get the response form the LLM


response=retrieval_chain.invoke({"input":"who invented World wide web and in which year ?"})
response['answer']

In [None]:

response

In [None]:
# shows the entire context it has
response['context']