In [5]:
##Data Ingestion
from langchain_community.document_loaders import TextLoader
loader = TextLoader("speech.txt")
text_documents = loader.load()

In [6]:
print(text_documents)

[Document(page_content='Sisters and Brothers of America,\nIt fills my heart with joy unspeakable to rise in response to the warm and cordial welcome which you have given us. I thank you in the name of the most ancient order of monks in the world, I thank you in the name of the mother of religions, and I thank you in the name of millions and millions of Hindu people of all classes and sects.\nMy thanks, also, to some of the speakers on this platform who, referring to the delegates from the Orient, have told you that these men from far-off nations may well claim the honor of bearing to different lands the idea of toleration. I am proud to belong to a religion which has taught the world both tolerance and universal acceptance. We believe not only in universal toleration, but we accept all religions as true. I am proud to belong to a nation which has sheltered the persecuted and the refugees of all religions and all nations of the earth. I am proud to tell you that we have gathered in our 

In [8]:
import os 
from dotenv import load_dotenv
load_dotenv()

openai_key = os.getenv("OPENAI_API_KEY")
os.environ["OPENAI_API_KEY"]= openai_key

In [20]:
from langchain_community.document_loaders import WebBaseLoader
import bs4

loader = WebBaseLoader(
    web_paths=["https://en.wikipedia.org/wiki/Regression_analysis"],
    bs_kwargs=dict(parse_only=bs4.SoupStrainer(
        class_=("post-title", "post-header", "post-content")
    ))
)
text_documents = loader.load()


In [55]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader('mov1.pdf')
docs = loader.load()

Ignoring wrong pointing object 6 0 (offset 0)


In [65]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=20)
documents = text_splitter.split_documents(docs)

In [66]:
##vector embedding and vector store
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
db = Chroma.from_documents(documents,OpenAIEmbeddings())

In [67]:
query = "who wrote merchant of venice"
result =db.similarity_search(query)
print(result)

[Document(page_content='1\xa0\n\xa0\nSummary\xa0of\xa0“The\xa0Merchant\xa0of\xa0Venice”\xa0\nby\xa0William\xa0Shakespeare\xa0\nIntroduction:\xa0\nThe\xa0play\xa0takes\xa0place\xa0in\xa0Venice,\xa0Italy\xa0and\xa0Belmont,\xa0Portia’s\xa0home,\xa0during\xa0the\xa0fourteenth\xa0century.\xa0\xa0\nThe\xa0main\xa0characters\xa0are\xa0Antonio,\xa0Bassiano,\xa0and\xa0Shylock.\xa0\xa0Antonio\xa0is\xa0a\xa0merchant\xa0and\xa0fair\xa0money‐\nlender,\xa0but\xa0he\xa0currently\xa0is\xa0overinvested.\xa0\xa0Bassiano\xa0is\xa0in\xa0debt.\xa0\xa0Both\xa0Antonio\xa0and\xa0Bassino\xa0are\xa0\nChristians.\xa0\xa0Shylock\xa0is\xa0a\xa0Jewish\xa0money‐lender.\xa0\xa0All\xa0of\xa0these\xa0characters\xa0are\xa0worried\xa0about\xa0money.\xa0\xa0\xa0\xa0\nAct\xa0I\xa0–\xa0The\xa0Agreement\xa0\nBassiano\xa0confesses\xa0his\xa0love\xa0for\xa0Portia\xa0to\xa0Antonio.\xa0\xa0He\xa0explains\xa0Antonio\xa0that\xa0he\xa0has\xa0been\xa0unwise\xa0\nwith\xa0money\xa0throughout\xa0his\xa0life,\xa0and\xa0that\xa0he\xa0is\

In [68]:
result[0].page_content

'1\xa0\n\xa0\nSummary\xa0of\xa0“The\xa0Merchant\xa0of\xa0Venice”\xa0\nby\xa0William\xa0Shakespeare\xa0\nIntroduction:\xa0\nThe\xa0play\xa0takes\xa0place\xa0in\xa0Venice,\xa0Italy\xa0and\xa0Belmont,\xa0Portia’s\xa0home,\xa0during\xa0the\xa0fourteenth\xa0century.\xa0\xa0\nThe\xa0main\xa0characters\xa0are\xa0Antonio,\xa0Bassiano,\xa0and\xa0Shylock.\xa0\xa0Antonio\xa0is\xa0a\xa0merchant\xa0and\xa0fair\xa0money‐\nlender,\xa0but\xa0he\xa0currently\xa0is\xa0overinvested.\xa0\xa0Bassiano\xa0is\xa0in\xa0debt.\xa0\xa0Both\xa0Antonio\xa0and\xa0Bassino\xa0are\xa0\nChristians.\xa0\xa0Shylock\xa0is\xa0a\xa0Jewish\xa0money‐lender.\xa0\xa0All\xa0of\xa0these\xa0characters\xa0are\xa0worried\xa0about\xa0money.\xa0\xa0\xa0\xa0\nAct\xa0I\xa0–\xa0The\xa0Agreement\xa0\nBassiano\xa0confesses\xa0his\xa0love\xa0for\xa0Portia\xa0to\xa0Antonio.\xa0\xa0He\xa0explains\xa0Antonio\xa0that\xa0he\xa0has\xa0been\xa0unwise\xa0\nwith\xa0money\xa0throughout\xa0his\xa0life,\xa0and\xa0that\xa0he\xa0is\xa0in\xa0need\xa0of\xa0

In [69]:
from langchain_community.vectorstores import FAISS
db_faiss = FAISS.from_documents(documents,OpenAIEmbeddings())


In [70]:
query = "Name the characters of mentioned in the pdf"
result = db_faiss.similarity_search(query)
result[0].page_content

'1\xa0\n\xa0\nSummary\xa0of\xa0“The\xa0Merchant\xa0of\xa0Venice”\xa0\nby\xa0William\xa0Shakespeare\xa0\nIntroduction:\xa0\nThe\xa0play\xa0takes\xa0place\xa0in\xa0Venice,\xa0Italy\xa0and\xa0Belmont,\xa0Portia’s\xa0home,\xa0during\xa0the\xa0fourteenth\xa0century.\xa0\xa0\nThe\xa0main\xa0characters\xa0are\xa0Antonio,\xa0Bassiano,\xa0and\xa0Shylock.\xa0\xa0Antonio\xa0is\xa0a\xa0merchant\xa0and\xa0fair\xa0money‐\nlender,\xa0but\xa0he\xa0currently\xa0is\xa0overinvested.\xa0\xa0Bassiano\xa0is\xa0in\xa0debt.\xa0\xa0Both\xa0Antonio\xa0and\xa0Bassino\xa0are\xa0\nChristians.\xa0\xa0Shylock\xa0is\xa0a\xa0Jewish\xa0money‐lender.\xa0\xa0All\xa0of\xa0these\xa0characters\xa0are\xa0worried\xa0about\xa0money.\xa0\xa0\xa0\xa0\nAct\xa0I\xa0–\xa0The\xa0Agreement\xa0\nBassiano\xa0confesses\xa0his\xa0love\xa0for\xa0Portia\xa0to\xa0Antonio.\xa0\xa0He\xa0explains\xa0Antonio\xa0that\xa0he\xa0has\xa0been\xa0unwise\xa0\nwith\xa0money\xa0throughout\xa0his\xa0life,\xa0and\xa0that\xa0he\xa0is\xa0in\xa0need\xa0of\xa0

In [83]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o-mini")
llm

ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x11930e3a0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x119311d30>, model_name='gpt-4o-mini', openai_api_key=SecretStr('**********'), openai_proxy='')

In [85]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template(
    """ 
Answer the following question based only on the provided context.
Think step by step before providing the detailed answer.
<context>
{context}
</context>
Question:{input}
"""
)

In [86]:
from langchain.chains.combine_documents import create_stuff_documents_chain
document_chain = create_stuff_documents_chain(llm,prompt)

In [87]:
retriever = db.as_retriever()

In [88]:
from langchain.chains import create_retrieval_chain
retriver_chain=create_retrieval_chain(retriever,document_chain)


In [90]:
response = retriver_chain.invoke({'input':"descrive Act I  The Agreement"})
response["answer"]

'In Act I of "The Merchant of Venice," titled "The Agreement," the stage is set in Venice, where the main characters introduce their conflicts and desires. The act begins with Bassiano expressing his love for Portia, a wealthy heiress. He confides in his friend Antonio, explaining that he needs 3,000 gold coins to pursue her, as he feels he has been financially irresponsible in the past.\n\nAntonio, who is a merchant, is sympathetic to Bassiano’s plight but reveals that he is currently overinvested; his wealth is tied up in three trading ships that are out at sea. Despite his own financial challenges, Antonio agrees to help Bassiano. However, since he cannot provide the money himself, he resolves to borrow it from Shylock, a Jewish money-lender whom he despises. This sets the stage for the central conflict of the play, as the agreement between Antonio and Shylock will lead to significant consequences later in the story.\n\nOverall, this act establishes the themes of love, friendship, a