In [13]:
from langchain_community.document_loaders import PyPDFLoader
import tqdm
import os
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_groq import ChatGroq
from langchain_chroma import Chroma
from langchain_experimental.text_splitter import SemanticChunker

load_dotenv()

True

In [10]:
!pip install langchain-experimental

Collecting langchain-experimental
  Downloading langchain_experimental-0.0.64-py3-none-any.whl.metadata (1.7 kB)
Collecting langchain-community<0.3.0,>=0.2.10 (from langchain-experimental)
  Downloading langchain_community-0.2.11-py3-none-any.whl.metadata (2.7 kB)
Collecting langchain-core<0.3.0,>=0.2.27 (from langchain-experimental)
  Downloading langchain_core-0.2.28-py3-none-any.whl.metadata (6.2 kB)
Collecting langchain<0.3.0,>=0.2.12 (from langchain-community<0.3.0,>=0.2.10->langchain-experimental)
  Downloading langchain-0.2.12-py3-none-any.whl.metadata (7.1 kB)
Downloading langchain_experimental-0.0.64-py3-none-any.whl (204 kB)
Downloading langchain_community-0.2.11-py3-none-any.whl (2.3 MB)
   ---------------------------------------- 0.0/2.3 MB ? eta -:--:--
   ---------------------------------------- 2.3/2.3 MB 21.7 MB/s eta 0:00:00
Downloading langchain_core-0.2.28-py3-none-any.whl (379 kB)
Downloading langchain-0.2.12-py3-none-any.whl (990 kB)
   ----------------------------

In [37]:
loader = PyPDFLoader('sample1.pdf')
documents = loader.load()

OPENAI

In [6]:
# os.environ['OPENAI-KEY'] = os.getenv('OPENAI_KEY')
# openai = ChatOpenAI(model='gpt-3.5-turbo', api_key=os.environ['OPENAI-KEY'])

HuggingFace 

In [7]:
##

GROQ LLAMA-3

In [29]:
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_KEY')
groq_model = ChatGroq(name='llama3-8b-8192', api_key=os.environ['GROQ_API_KEY'], temperature=0.75)

Splitting the document

In [38]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 4000, chunk_overlap = 250)
# text_splitter = SemanticChunker()
docs = text_splitter.split_documents(documents)

In [39]:
docs

[Document(metadata={'source': 'sample1.pdf', 'page': 0}, page_content='To,\t \t\t\t\t\t\t\t\t Date:\tMay\t24,\t2024\t\nDeputy\tGeneral \tManager \t\nDept.\tof\tCorporate \tServices, \t\nBSE\tLimited \t\nP\tJ\tTowers,\t Dalal\tStreet,\t\nMumbai ‐\t400021. \t\n Ref:\tScrip\tCode\tNo.:\t511463 \t(BSE)\t\n \nSub:\tOutcome \tof\tthe\tBoard\tMeeting\theld\ton\tMay\t24,\t2024\t–\tApproval \tof\tAudited\t\nAnnual\tFinancial \tStatements\t &\tAudited\tFi\n nancial\tResults\tof\tthe\tCompany \tfor\tthe\t\nFinancial \tYear\tended\tMarch\t31,\t2024(2023 ‐24)\t\n \nListing\tRegulation: \tDisclosure \tunder\tReg\t30read\twith\tPara\tA(4)\t of\tPart\tA\tof\t\nSchedule \tIII,\tReg.\t33\t&\tall\tthe\tapplicable \tRegulations, \tif\tany,\tof\tthe\tSEBI\t(LODR)\t\nRegulations, \t2015(Listing \tRegulations) \tas\tamended\tfrom \ttime\tto\tti me. \t\n_________________________________________________________________________________________________ \t\n\tDear Sir/Madam,   With reference to the above-mentione

In [40]:
print(docs[6].page_content)

(b) Diluted                 (0.0084)                            (0.0003)                  0.01             (0.011)                     (0.02)
 NOTES :  
1
2
3
4
5
6
For Alexander Stamps & Coin Limited
Anirudh Sethi
Managing Director
DIN:06864789
Place: Vadodara Place: Vadodara
Date: 24/05/2024 Date: 24/05/2024STATEMENT OF STANDALONE AUDITED FINANCIAL RESULTS FOR QUARTER AND YEAR ENDED MARCH 31 2024.
The above result has been audited by Statutory auditor, recommended by audit committee and approved by the Board of Director of the CompanyAlexander Stamps & Coin Limited
CIN: L74110GJ1992PLC083816
Regd.Office : SF-7, Silver Rock  Complex, Nr. Dairy Teen Rasta, Manjalpur, Vadodara-390010, Gujarat, India.
Quarter Ended Year Ended
Due to non-payment of income tax demand and filling of appeal against the demand for the Assessment Year 2017-2018, CBDT Freeze the bank
account of company. Investments as stated in Non-Current Investments amounting to INR 113.67/- Lakhs, the requisite documents wit

In [15]:
# Embeddings model 
embedding = HuggingFaceEmbeddings(model_name = 'BAAI/bge-base-en-v1.5')

  from tqdm.autonotebook import tqdm, trange


CHROMA VECTOR DB

In [41]:
vectorstore = Chroma.from_documents(documents = docs, embedding=embedding)

In [17]:
from langchain_core.prompts import ChatPromptTemplate
prompt_template = ChatPromptTemplate.from_template(
    """
    You are a financial assistant that generates responses to queries based on provided financial reports. Answer the following question strictly based on the given context. Do not add any additional information or context.

    Format your output as follows:
    <format>{format}</format>
    If information is not retrievable, simply state 'Not Retrievable.'

    <context>{context}</context>
    Question: {input}
    """
)

In [21]:
## Context is autofilled with the help of chains 
# chains are sequence of calls to llm, tool or preprocessing step 

In [42]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

retriever = vectorstore.as_retriever()
document_chain = create_stuff_documents_chain(llm = groq_model, prompt = prompt_template)
bot = create_retrieval_chain(retriever, document_chain)

In [43]:
financial_result_output = """
    Company : 
    Total Income/ Revenue : 
    Total Profit before Tax :
    Total Profit after Tax : 
    """

prompt = 'What is the Total Income/Revenue, Profit before and after tax of the company in the latest quarter and the previous quarter? Give consolidated results.'
response = bot.invoke({'format' : financial_result_output, 'input' : prompt})

In [44]:
print(response['answer'])

<format>
Company : Brahmayya&co• 
Total Income/ Revenue : Not Retrievable for the latest quarter and Rs. 6,563 Lakhs for the previous quarter
Total Profit before Tax : Not Retrievable for the latest quarter and Rs. 1,175 Lakhs for the previous quarter
Total Profit after Tax : Not Retrievable for the latest quarter and Rs. 1,175 Lakhs for the previous quarter
</format>


In [76]:
from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain = create_stuff_documents_chain(llm=groq_model, prompt=result_prompt)
# this chain will put the context retrieved from the model into the prompt and the model will generate a response from it

In [77]:
from langchain.chains import create_retrieval_chain
# This retriver is an interface to the vectorstore. It retrieves the documents from the document vector store using a similarity function and apends it into the prompt
retriever = vectorstore.as_retriever() # kwargs = {'k' : top_k} 
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000001C6636A7AF0>)

In [70]:
agent = create_retrieval_chain(retriever, document_chain)

In [78]:
response = agent.invoke({
    'input' : 'What is the Total Income/Revenue that the company made in the latest quarter in comparison with the last quarter. What is its Net Proft before or after tax. '
})

In [79]:
print(response['answer'])

The company made a total income of Rs. 685.00 lakhs in the latest quarter (Half Year Ended 31-Mar-24) compared to Rs. 836.70 lakhs in the last quarter (Half Year Ended 30-Sep-23).

The net profit before tax for the latest quarter is Rs. 37.63 lakhs and the net profit after tax is Rs. 26.81 lakhs.


**Actor and Critic Agent Architecture** 

In [2]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ['groq_key'] = os.getenv('GROQ_KEY')

In [22]:
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_groq import ChatGroq
from typing import Annotated, Sequence, TypedDict, Literal
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from langchain_huggingface import HuggingFaceEmbeddings

In [7]:
embedding = HuggingFaceEmbeddings(model_name = 'BAAI/bge-base-en-v1.5')

  from tqdm.autonotebook import tqdm, trange


In [8]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader('sample.pdf')
document = loader.load()

In [11]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 2000, chunk_overlap = 200)
docs = text_splitter.split_documents(document)

In [12]:
from langchain_chroma import Chroma
vectorstore = Chroma.from_documents(documents = docs, embedding=embedding)

In [18]:
from langchain.tools.retriever import create_retriever_tool

## To use a RAG model in Actor-Critic model, we need to build a retriever tool
retriever = vectorstore.as_retriever()
retriever_tool = create_retriever_tool(
    retriever, 
    name="Financial_Info_Retriver", 
    description = "Retrieve information from the provided financial documents like quarterly financial results and order receipt."
)
tools = [retriever_tool]

In [21]:
from langchain_core.messages import BaseMessage

class AgentState(TypedDict):
    messages : Annotated[Sequence[BaseMessage], add_messages]

In [None]:
from langchain_core.prompts import PromptTemplate

def grader(state : AgentState) -> Literal["generate", "rewrite"]:
    llm = ChatGroq(model='llama3-8b-8192', api_key=os.environ['groq_key'], streaming=True)
    llm_with_tool = llm.with_stru

    



In [1]:
!marker_single sample1.pdf results/ --batch_multiplier 2 --max_pages 10 --langs English

^C


Loaded detection model vikp/surya_det3 on device cpu with dtype torch.float32
Loaded detection model vikp/surya_layout3 on device cpu with dtype torch.float32
Loaded reading order model vikp/surya_order on device cpu with dtype torch.float32
Loaded recognition model vikp/surya_rec on device cpu with dtype torch.float32
Loaded texify model to cpu with torch.float32 dtype
Saved markdown to the results/sample1 folder



Detecting bboxes:   0%|          | 0/2 [00:00<?, ?it/s]
Detecting bboxes:  50%|█████     | 1/2 [00:27<00:27, 27.55s/it]
Detecting bboxes: 100%|██████████| 2/2 [00:33<00:00, 14.66s/it]
Detecting bboxes: 100%|██████████| 2/2 [00:33<00:00, 16.59s/it]

Detecting bboxes:   0%|          | 0/1 [00:00<?, ?it/s]
Detecting bboxes: 100%|██████████| 1/1 [00:18<00:00, 18.90s/it]
Detecting bboxes: 100%|██████████| 1/1 [00:18<00:00, 18.90s/it]

Finding reading order:   0%|          | 0/1 [00:00<?, ?it/s]
Finding reading order: 100%|██████████| 1/1 [00:38<00:00, 38.40s/it]
Finding reading order: 100%|██████████| 1/1 [00:38<00:00, 38.40s/it]
