In [1]:
import os
os.chdir("../")

In [13]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.llms.ollama import Ollama
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [3]:
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain

In [4]:
#Extract Data From the PDF File
def load_pdf_file(data):
    loader= DirectoryLoader(data,
                            glob="*.pdf",
                            loader_cls=PyPDFLoader)

    documents=loader.load()

    return documents

In [5]:
extracted_data=load_pdf_file(data='data/')

In [7]:
extracted_data

[Document(metadata={'source': 'data\\1InvestmentandPortfolioManagement.pdf', 'page': 0}, page_content='PONDICHERRY UNIVERSITY\n(A Central University)\nDIRECTORATE OF DISTANCE EDUCATION\nInvestment and Portfolio Management\nMBA - FINANCE        Paper Code : MBFM 4001\nMBA - GENERAL        Paper Code : MBGN 4001\n  \nIV - Semester'),
 Document(metadata={'source': 'data\\1InvestmentandPortfolioManagement.pdf', 'page': 1}, page_content='Author\nDR. R. Kasilingam\nReader,\nDepartment of Management Studies,\nPondicherry University\nPuducherry\nAll Rights Reserved\nFor Private Circulation Only\nISBN 978-93-81932-13-1'),
 Document(metadata={'source': 'data\\1InvestmentandPortfolioManagement.pdf', 'page': 2}, page_content='             TABLE OF CONTENTS\nUNIT LESSON TITLE PAGE NO.\nI\n1.1  Investment 3\n1.2  Investment Alternatives 12\n1.3 Securities Market 27\n1.4 Stock Exchange 41\nII\n2.1 Fundamental Security Analysis - Economic Analysis 71\n2.2 Industry Analysis 81\n2.3 Company Analysis 91\

In [8]:
#Split the Data into Text Chunks
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [9]:
text_chunks=text_split(extracted_data)
print("Length of Text Chunks", len(text_chunks))

Length of Text Chunks 6249


In [10]:
text_chunks

[Document(metadata={'source': 'data\\1InvestmentandPortfolioManagement.pdf', 'page': 0}, page_content='PONDICHERRY UNIVERSITY\n(A Central University)\nDIRECTORATE OF DISTANCE EDUCATION\nInvestment and Portfolio Management\nMBA - FINANCE        Paper Code : MBFM 4001\nMBA - GENERAL        Paper Code : MBGN 4001\n  \nIV - Semester'),
 Document(metadata={'source': 'data\\1InvestmentandPortfolioManagement.pdf', 'page': 1}, page_content='Author\nDR. R. Kasilingam\nReader,\nDepartment of Management Studies,\nPondicherry University\nPuducherry\nAll Rights Reserved\nFor Private Circulation Only\nISBN 978-93-81932-13-1'),
 Document(metadata={'source': 'data\\1InvestmentandPortfolioManagement.pdf', 'page': 2}, page_content='TABLE OF CONTENTS\nUNIT LESSON TITLE PAGE NO.\nI\n1.1  Investment 3\n1.2  Investment Alternatives 12\n1.3 Securities Market 27\n1.4 Stock Exchange 41\nII\n2.1 Fundamental Security Analysis - Economic Analysis 71\n2.2 Industry Analysis 81\n2.3 Company Analysis 91\nIII\n3.1 Tec

In [14]:
embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

  embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
  from .autonotebook import tqdm as notebook_tqdm


NOTE: embedding dimension = 384

In [15]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

Length 384


In [16]:
vectordb = Chroma.from_documents(text_chunks, embedding=embeddings, persist_directory='./db')

In [17]:
vectordb.persist()

  vectordb.persist()


In [18]:
# Load vector database from persisted directory
vectordb = Chroma(
    persist_directory="./db"
).as_retriever(search_type="similarity", k=3)

  vectordb = Chroma(


In [30]:
llm = Ollama(model="hf.co/sathvik123/llama3-finnace-chat-gguf")

In [24]:
my_prompt = """
### Instruction:
You are a financial advisor chatbot that provides clear, accurate, and helpful responses to financial questions.Explain financial concepts in simple terms, offer relevant examples when appropriate, and ensure your answers are concise, objective, and tailored to the user's query.

### Query:
{query}
"""

In [26]:
prompt_template = ChatPromptTemplate.from_template(my_prompt)

In [27]:
query_text = "What is bank"

In [28]:
prompt = prompt_template.format(query=query_text)

In [31]:
response_text = llm.invoke(prompt)

In [32]:
response_text

"Step 1: Understand what a bank is\nA bank is an institution that provides financial services to individuals and businesses. Its primary function is to manage money by accepting deposits, making loans, and facilitating payments.\n\nStep 2: Know the different types of banks\nThere are several types of banks, including:\n\n- Commercial banks: These banks provide basic banking services such as accepting deposits, making loans, and processing transactions.\n- Investment banks: These banks specialize in providing financial advice and executing financial transactions related to mergers and acquisitions, IPOs, and other corporate finance activities.\n- Central banks: These banks serve as the country's monetary authority and are responsible for managing the money supply, regulating the banking system, and implementing monetary policies.\n- Online banks or digital banks: These banks operate entirely online, offering electronic banking services and eliminating traditional branch networks.\n\nSte