### Loading Libraries


In [1]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.llms import Ollama




In [6]:
loader = PyPDFDirectoryLoader("papers")
documents = loader.load()

In [7]:
len(documents)

1219

In [8]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
chunks = text_splitter.split_documents(documents)

In [9]:
len(chunks)

3841

In [2]:
model = SentenceTransformerEmbeddings(model_name="mukaj/fin-mpnet-base",model_kwargs={'device': 'cuda:0'})

In [3]:
#db = Chroma.from_documents(documents=chunks,embedding=model,persist_directory="./chroma_db")
db = Chroma(persist_directory="./chroma_db",embedding_function=model)

In [48]:
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.callbacks.manager import CallbackManager


llm = Ollama(model='mistral-7b-instruct-v0.2',callbacks=[StreamingStdOutCallbackHandler()])

## Retreval + Generation

In [5]:
retriever = db.as_retriever(search_kwargs={"k":5})

In [12]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
import langchain 
#langchain.debug = False
from langchain_core.output_parsers import StrOutputParser
template = """<s>[INST] Context information is below.
---------------------
{context}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {input}
Answer: [/INST]"
"""
prompt = ChatPromptTemplate.from_template(template)

In [13]:
rag_chain = (
    {"context":"context","input":RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [26]:
#response = rag_chain.invoke("Give me the Cash From Investing Activities in 2023")
#print(response[response.find('[/INST]')+7:].replace(r'\n', '+ "\n" +'))

ocument does not provide the exact amount for Cash Flows from Investing Activities specifically for the year 2023. However, it shows that the net cash flow used in investing activities was 1,850,399 in 2023. Therefore, the total cash flows from investing activities would be the negative value of this number, which is -1,850,399.


In [39]:
query = "What is the Enterprise value to EBITDA ratio for 2023?"
for chunks in rag_chain.stream(query):
    print(chunks, end='')


 To calculate the Enterprise Value (EV) to Earnings Before Interest, Taxes, Depreciation, and Amortization (EBITDA) ratio, we need additional information such as total debt and cash or cash equivalents. The provided context only gives financial statements with revenue, profit before taxation, profit after taxation, and earnings per share. Therefore, we cannot calculate the EV to EBITDA ratio directly from the given context.