In [None]:
!pip install openai

In [None]:
!pip install langchain rank_bm25 pypdf unstructured chromadb
!pip install unstructured['pdf'] unstructured


In [None]:
!pip install -U langchain-community

### Load the required Packages

In [None]:
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
from langchain.llms import HuggingFaceHub


from langchain.retrievers import BM25Retriever, EnsembleRetriever

import os

In [None]:
### Load the PDF file

In [None]:
!apt-get install poppler-utils

In [None]:
!apt-get install -y tesseract-ocr
!pip install pytesseract

In [None]:
file_path = "/content/BarodaDigitalCarLoan.pdf"
data_file = UnstructuredPDFLoader(file_path)
docs = data_file.load()

In [None]:
print(docs[0].page_content)

[{

"BarodaDigitalCarLoan": {

"Benefits": {

"PaperlessProcess": true,

"Financing": "Up to 93%",

"DirectDisbursement": true,

"PrepaymentCharges": {

"FloatingRate": {

"Charge": "None",

"Details": "No prepayment charges for Floating Rate of Interest"

}

},

"ForeclosureCharges": "None",

"InterestRates": "Attractive",

"ProcessingTime": "Quick",

"ProcessingCharges": "Concessional till 30.09.2024."

},

"Features": {

"EligibleApplicants": [

"Salaried employees",

"Self-employed professionals (Doctor, Engineer, etc.)",

"Self-employed business persons",

"Insurance agents"

],

"AgeCriteria": {

"Minimum": 21,

"Maximum": {

"Salaried": 58,

"SelfEmployed": 65

}

},

"InterestRateCalculation": "Daily reducing balance; based on credit score (minimum 701)",

"RepaymentTenure": "Flexible, 12 to 84 months",

"Collateral": "Hypothecation of financed vehicle"

},

"EligibilityCriteria": {

"Salaried": true,

"SelfEmployedProfessionals": true,

"SelfEmployedBusinessPersons": true

},


### Split Documents and Chunking

In [None]:
# create chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=800,
                                          chunk_overlap=100)
chunks = splitter.split_documents(docs)

In [None]:
chunks[0].page_content

'[{\n\n"BarodaDigitalCarLoan": {\n\n"Benefits": {\n\n"PaperlessProcess": true,\n\n"Financing": "Up to 93%",\n\n"DirectDisbursement": true,\n\n"PrepaymentCharges": {\n\n"FloatingRate": {\n\n"Charge": "None",\n\n"Details": "No prepayment charges for Floating Rate of Interest"\n\n}\n\n},\n\n"ForeclosureCharges": "None",\n\n"InterestRates": "Attractive",\n\n"ProcessingTime": "Quick",\n\n"ProcessingCharges": "Concessional till 30.09.2024."\n\n},\n\n"Features": {\n\n"EligibleApplicants": [\n\n"Salaried employees",\n\n"Self-employed professionals (Doctor, Engineer, etc.)",\n\n"Self-employed business persons",\n\n"Insurance agents"\n\n],\n\n"AgeCriteria": {\n\n"Minimum": 21,\n\n"Maximum": {\n\n"Salaried": 58,\n\n"SelfEmployed": 65\n\n}\n\n},\n\n"InterestRateCalculation": "Daily reducing balance; based on credit score (minimum 701)",'

###extra

In [None]:
# Get Embedding Model from HF via API

from google.colab import userdata
HF_TOKEN = userdata.get('HUGGINGFACEHUB_API_TOKEN')

embeddings = HuggingFaceInferenceAPIEmbeddings(
    api_key=HF_TOKEN, model_name="BAAI/bge-base-en-v1.5"
)

### VectorStore

In [None]:
# Vector store with the selected embedding model
vectorstore = Chroma.from_documents(chunks, embeddings)

In [None]:
vectorstore_retreiver = vectorstore.as_retriever(search_kwargs={"k": 3})

In [None]:
keyword_retriever = BM25Retriever.from_documents(chunks)
keyword_retriever.k =  3

### Ensemble Retriever

In [None]:
ensemble_retriever = EnsembleRetriever(retrievers=[vectorstore_retreiver,
                                                   keyword_retriever],
                                       weights=[0.5, 0.5])

###extra


In [None]:
llm = HuggingFaceHub(
    repo_id="HuggingFaceH4/zephyr-7b-beta",
    model_kwargs={"temperature": 0.3,"max_new_tokens":1024},
    huggingfacehub_api_token=HF_TOKEN,
)

### Prompt Template:

In [None]:
template = """
CONTEXT: {context}
</s>

QUERY: {query}
</s>

INSTRUCTIONS:
- Use only the information provided in the CONTEXT section to answer the QUERY.
- Do not provide information or answers outside of the given CONTEXT.

ANSWER:
The answer to the query is:
"""

In [None]:
prompt = ChatPromptTemplate.from_template(template)
output_parser = StrOutputParser()

In [None]:
chain = (
    {"context": ensemble_retriever, "query": RunnablePassthrough()}
    | prompt
    | llm
    | output_parser
)

In [None]:
def extract_answer(response):
    # Extract only the portion of the response that follows "ANSWER:"
    parts = response.split('ANSWER:')
    if len(parts) > 1:
        return parts[1].strip()
    return response.strip()

In [None]:
from langchain_core.output_parsers import StrOutputParser
raw_response = chain.invoke("what is car loan ")
answer = extract_answer(raw_response)
print(answer)

The answer to the query is:

A car loan is a type of loan provided by financial institutions to individuals or non-individuals for the purpose of purchasing a car. The maximum loan amount for individuals is Rs. 200 lakh, and for non-individuals, it is Rs. 500 lakh, as mentioned in the CONTEXT section. The interest rate for a car loan varies based on factors such as income, debt, and credit score, and can be checked on the Interest Rate page. The loan can be applied for through various methods, such as branch visits, online applications, missed calls, or toll-free numbers, as stated in the CONTEXT section. Additionally, the CONTEXT section mentions that the minimum CIBIL score required for a car loan is 701, but CIBIL Score (-1) will also be considered.


In [None]:
raw_response = chain.invoke("i want to purchases a car on loan which cost me 20 lakh rupees suggest a loan")
answer = extract_answer(raw_response)
print(answer)

The answer to the query is:

Based on the information provided in the CONTEXT section, Bank of Baroda offers car loans up to a maximum amount of Rs. 200 lakh for individuals. This loan amount should meet your requirement of purchasing a car for Rs. 20 lakh. You can apply for the loan through branch visit, online application, missed call, or toll-free numbers. The car loan interest rates may vary based on factors such as income, debt, and credit score. It is recommended to check the Interest Rate page for applicable rates. The loan processing charges are minimal, and there are no pre-closure charges after 1 year. The loan tenure can be flexible within 5 years. Additionally, Bank of Baroda offers attractive interest rates, minimal processing charges, and no pre-closure charges for its two-wheeler loans, which may also be of interest to you. For more information, you can refer to the FAQs section provided in the CONTEXT.


In [None]:
raw_response = chain.invoke("what are the required documents for the it")
answer = extract_answer(raw_response)
print(answer)

The answer to the query is:

The required documents for the car loan, as mentioned in the CONTEXT section, are:

1. Photo ID with age proof (such as PAN card, passport, driving license)
2. Signed application form with 3 passport sized photographs
3. Residence proof (such as a valid passport, voter ID card)
4. Bank statement for the last six months
5. Additional documents based on employment type (such as salary slips, ITR, business proof)

Note: The required documents may vary based on the specific loan application process and eligibility criteria. It's always recommended to check with the lender for the most up-to-date and accurate list of required documents.


In [None]:
!pip install Flask

In [None]:
!pip install flask-sqlalchemy

#Interface Using Flask

In [None]:
# app.py
from flask import Flask, render_template, request, jsonify
from flask_sqlalchemy import SQLAlchemy
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
from langchain.llms import HuggingFaceHub
from langchain.retrievers import BM25Retriever, EnsembleRetriever
import os
from threading import Thread

app = Flask(__name__,template_folder="/content/sample_data/templates")

# Initialize your bot components here (outside of any route)
# This is a simplified version, you'll need to adapt it to your specific setup
def initialize_bot():
    # Load and process the PDF
    file_path = "FILE"  ##Document File
    data_file = UnstructuredPDFLoader(file_path)
    docs = data_file.load()

    # Split documents and create chunks
    splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
    chunks = splitter.split_documents(docs)

    # Initialize embeddings
    HF_TOKEN = userdata.get('HUGGINGFACEHUB_API_TOKEN')
    embeddings = HuggingFaceInferenceAPIEmbeddings(
        api_key=HF_TOKEN, model_name="BAAI/bge-base-en-v1.5"
    )

    # Create vector store
    vectorstore = Chroma.from_documents(chunks, embeddings)
    vectorstore_retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

    # Create keyword retriever
    keyword_retriever = BM25Retriever.from_documents(chunks)
    keyword_retriever.k = 3

    # Create ensemble retriever
    ensemble_retriever = EnsembleRetriever(
        retrievers=[vectorstore_retriever, keyword_retriever],
        weights=[0.5, 0.5]
    )

    # Initialize LLM
    llm = HuggingFaceHub(
        repo_id="HuggingFaceH4/zephyr-7b-beta",
        model_kwargs={"temperature": 0.3, "max_new_tokens": 1024},
        huggingfacehub_api_token=HF_TOKEN,
    )

    # Create prompt template
    template = """
    CONTEXT: {context} </s>

    QUERY: {query} </s>

    INSTRUCTIONS: - Use only the information provided in the CONTEXT section to answer the QUERY. - Do not provide information or answers outside of the given CONTEXT.

    ANSWER: The answer to the query is:
    """
    prompt = ChatPromptTemplate.from_template(template)
    output_parser = StrOutputParser()

    # Create the chain
    chain = (
        {"context": ensemble_retriever, "query": RunnablePassthrough()}
        | prompt
        | llm
        | output_parser
    )

    return chain

# Initialize the bot
bot_chain = initialize_bot()

@app.route('/')
def home():
    return render_template('index.html')

@app.route('/query', methods=['POST'])
def query_bot():
    user_query = request.json['query']
    raw_response = bot_chain.invoke(user_query)
    answer = extract_answer(raw_response)
    return jsonify({'response': answer})

def extract_answer(response):
    parts = response.split('ANSWER:')
    if len(parts) > 1:
        return parts[1].strip()
    return response.strip()

def run_flask(port):
    app.run(port=port, debug=True, use_reloader=False)

# Start the Flask app in a separate thread
flask_thread = Thread(target=run_flask, args=(8000,))
flask_thread.start()

# Use ngrok to create a public URL
from google.colab.output import eval_js
print(eval_js("google.colab.kernel.proxyPort(8000)"))



 * Serving Flask app '__main__'
 * Debug mode: on


Address already in use
Port 8000 is in use by another program. Either identify and stop that program, or start the server with a different port.


https://xq9s0p00fz-496ff2e9c6d22116-8000-colab.googleusercontent.com/


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
