In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
open_api_key=os.getenv("OPENAI_API_KEY")
## Langsmith Tracking
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")

In [3]:
## Data ingestion--From the website we need to scrap the data
from langchain_community.document_loaders import WebBaseLoader

In [6]:
loader=WebBaseLoader("https://smartcookie.in/core/teacher.php")

In [8]:
docs=loader.load()

In [10]:
#divide our text into chunks
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=50,chunk_overlap=5)
documents=text_splitter.split_documents(docs)

In [11]:
from langchain_openai import OpenAIEmbeddings
embedding=OpenAIEmbeddings(api_key=open_api_key)

In [12]:
from langchain_community.vectorstores import FAISS

In [14]:
vectorstore_db=FAISS.from_documents(documents,embedding)

In [15]:
vectorstore_db

<langchain_community.vectorstores.faiss.FAISS at 0x27dc1467160>

In [21]:
from langchain_openai import ChatOpenAI
llm=ChatOpenAI(api_key=open_api_key,model="gpt-4o")

In [20]:
#lets perform similarity search
query="what is the role of parents in smartcookie"
result=vectorstore_db.similarity_search(query)
result[0].page_content

'in Smart Cookie system.'

In [22]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt=ChatPromptTemplate.from_template(
    """
Answer the following question based on the provided context:
<context>
{context}
</context>

"""
)
document_chain=create_stuff_documents_chain(llm,prompt)

In [23]:
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), config={'run_name': 'format_inputs'})
| ChatPromptTemplate(input_variables=['context'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template='\nAnswer the following question based on the provided context:\n<context>\n{context}\n</context>\n\n'))])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x0000027DFD71B010>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x0000027DFD7C1270>, model_name='gpt-4o', openai_api_key=SecretStr('**********'), openai_proxy='')
| StrOutputParser(), config={'run_name': 'stuff_documents_chain'})

In [25]:
## Retriever
retriever=vectorstore_db.as_retriever()
from langchain.chains import create_retrieval_chain

In [26]:
retriever_chain=create_retrieval_chain(retriever,document_chain)

In [27]:
#get the response
response=retriever_chain.invoke({"input":"role of parents"})
response['answer']

'Based on the provided context, the teacher plays a crucial role in selecting the reason and method for the tasks. Additionally, the teacher has the authority to appoint a student coordinator.'

In [28]:
response

{'input': 'role of parents',
 'context': [Document(metadata={'source': 'https://smartcookie.in/core/teacher.php', 'title': '\n:: Smart Cookie -  Student/Teacher Rewards Program ::\n', 'language': 'en'}, page_content='has two tasks.'),
  Document(metadata={'source': 'https://smartcookie.in/core/teacher.php', 'title': '\n:: Smart Cookie -  Student/Teacher Rewards Program ::\n', 'language': 'en'}, page_content='Teacher is also one of the important entities in'),
  Document(metadata={'source': 'https://smartcookie.in/core/teacher.php', 'title': '\n:: Smart Cookie -  Student/Teacher Rewards Program ::\n', 'language': 'en'}, page_content='selecting the reason and method. Once the teacher'),
  Document(metadata={'source': 'https://smartcookie.in/core/teacher.php', 'title': '\n:: Smart Cookie -  Student/Teacher Rewards Program ::\n', 'language': 'en'}, page_content='Teacher can appoint student coordinator who is')],
 'answer': 'Based on the provided context, the teacher plays a crucial role in