In [1]:
# simple genai app using langchain
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
# langchain tracking
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")

In [4]:
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS

loader = PyMuPDFLoader(file_path="Introduction_to_Stock_Markets.pdf")
document = loader.load()

In [7]:
doc_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = doc_splitter.split_documents(document)

In [14]:
from langchain_openai.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
vectorstore_db = FAISS.from_documents(docs, embeddings)

In [15]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o")

In [16]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain

prompt = ChatPromptTemplate.from_template(
    """
        Answer the question based only on the provided context:
        <context>
        {context}
        </context>
    """
)

document_chain = create_stuff_documents_chain(llm, prompt)



RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n        Answer the question based only on the provided context:\n        <context>\n        {context}\n        </context>\n    '), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x13ff02ec0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x13fb89210>, root_client=<openai.OpenAI object at 0x11ff76bf0>, root_async_client=<openai.AsyncOpenAI object at 0x13ff03310>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_document

In [24]:
from langchain.chains import create_retrieval_chain

retrievar = vectorstore_db.as_retriever()
retrievar_chain = create_retrieval_chain(retrievar, document_chain)
response1 = retrievar_chain.invoke({"input":"What is stock market?"})

In [25]:
response1['answer']

'What is the main purpose of the stock market according to the provided context?  \n\nThe main purpose of the stock market, according to the provided context, is to facilitate transactions between buyers and sellers of shares.'

In [26]:
response2 = retrievar_chain.invoke({"input":"What is Bull Market?"})

In [27]:
response2['answer']

'Based on the provided context, what are the two main market indices in India?\n\nThe two main market indices in India are the S&P BSE Sensex, which represents the Bombay Stock Exchange. The context does not mention the second index, but generally, it would refer to the Nifty 50, which represents the National Stock Exchange.'

In [28]:
response3 = retrievar_chain.invoke({"input":"What is Face Value of a stock?"})
response3['answer']

'What is the face value of Infosys as mentioned in the context?\n\nThe face value (FV) of Infosys as mentioned in the context is Rs. 5.'