In [0]:
# !pip install lxml
# !pip install html5lib
# !pip install chromadb==0.3.21
# !pip install ctransformers

Collecting chromadb==0.3.21
  Using cached chromadb-0.3.21-py3-none-any.whl (46 kB)
Collecting fastapi>=0.85.1
  Downloading fastapi-0.100.0-py3-none-any.whl (65 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/65.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.7/65.7 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting clickhouse-connect>=0.5.7
  Using cached clickhouse_connect-0.6.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (966 kB)
Collecting numpy>=1.21.6
  Using cached numpy-1.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)
Collecting duckdb>=0.7.1
  Using cached duckdb-0.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)
Collecting uvicorn[standard]>=0.18.3
  Using cached uvicorn-0.23.1-py3-none-any.whl (59 kB)
Collecting posthog>=2.4.0
  Using cached posthog-3.0.1-py2.py3-none-any.whl (37 kB)
Collecting hns

In [0]:
dbutils.library.restartPython()

# Chunking

In [0]:
import os
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain import HuggingFacePipeline
from langchain.llms import HuggingFaceHub

# Manual Model building
from transformers import pipeline

In [0]:
# with open(File_Path,'r') as f:
#   file_text = f.read()
# file_text

In [0]:
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader

# File_Path = '/dbfs/nab_demo/output_text_files/parsed_capital-gains-tax.txt'
# File_Path = '/dbfs/nab_demo/output_text_files/parsed_nab-low-rate-card.txt'
File_Path = '/dbfs/nab_demo/output_text_files/parsed_first-debit-card.txt'

# load the document and split it into chunks
loader = TextLoader(File_Path)
file_content = loader.load()

#Define Character Splitter
text_splitter = CharacterTextSplitter(        
    separator = ".",
    chunk_size = 1000,
    chunk_overlap  = 200,
    length_function = len,
)
#Load the text splitter and split documents based on chunk size 
docs = text_splitter.split_documents(file_content)
doc_list = [x.page_content for x in docs]
docs

[Document(page_content='How to use a debit card | Banking for teens -  .  Notification: NAB Mobile Banking app  How to use your first debit card How to use your first debit card Tips and tricks to help you manage your money with a debit card. What is a debit card? A debit card is a card that’s linked to your everyday transaction account. It lets you easily access your own money from the account at an ATM or when shopping. NAB Debit Card A is available to customers who are 13 years or younger. This card allows you to withdraw money from your account at an ATM. You can make purchases at shops using EFTPOS, but you can’t shop online or add the card to your digital wallet. You can apply for a NAB Debit Card by visiting a branch. NAB Visa Debit card A is available to customers aged 14 years and older. With this card you can: withdraw from or deposit into your account at an ATM make purchases at shops shop online add the card to your digital wallet. If you’re 14 or older you can for a NAB Vi

### Vector DB

In [0]:
#section to rebuild the vector store 
dbutils.fs.rm('dbfs:/nab_demo/vectorstore_persistence/db', True)

True

In [0]:
#Create text embeddings from Hugging Face model
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2',
                                   model_kwargs={'device': 'cpu'})

In [0]:
#Defining the persistant storage of the chroma DB
vector_store_directory = '/dbfs/nab_demo/vectorstore_persistence/db'

#Load the embedding data into Chroma 
docsearch = Chroma.from_documents(docs, 
                                  embeddings ,
                                  collection_name="nab_testing",
                                  persist_directory=vector_store_directory )
                                  
print('The collection : {} index includes: {} documents'.format(docsearch._collection,docsearch._collection.count()))

Using embedded DuckDB with persistence: data will be stored in: /dbfs/nab_demo/vectorstore_persistence/db


The collection : name='nab_testing' metadata=None index includes: 6 documents


## Similarity search result from Chroma DB

#### File = 'parsed_capital-gains-tax.txt'

In [0]:
#Validation of vector indexing 
query = "what is capital gain tax?"

query_result = docsearch.similarity_search(query)
print(query_result[0].page_content)
# [i.page_content for i in query_result ]

Calculating and paying capital gains tax -  .  Notification: NAB Mobile Banking app  ​Calculating and paying capital gains tax ​Calculating and paying capital gains tax Don’t be put off – here’s a quick guide to help you understand capital gains tax. Understanding capital gains and tax A capital gain or loss is the difference between what you paid for an asset and what you sold it for. This takes into account any incidental costs on the purchase and sale. So, if you sell an asset for more than you paid for it, that’s a capital gain. And if you sell it for less, that is considered a capital loss. Capital gains tax applies to capital gains made when you dispose of any asset, except for specific exemptions (the most common exemption being the family home). Being organised is key when trying to quickly calculate and pay capital gains tax


In [0]:
query = "how to be organized for capital gain?"
query_result = docsearch.similarity_search(query)
print(query_result[0].page_content)
# [i.page_content for i in query_result ]

Being organised is key when trying to quickly calculate and pay capital gains tax. And a good way to be organised is to keep up to date records by holding on to things like: initial sale contracts and other receipts for other expenses interest paid on related borrowings receipts for ongoing expenses expense records valuations. Deciding how to calculate capital gains tax There are different ways to calculate your capital gains tax. Capital gains tax discount If you sell or dispose of your capital gains tax assets in less than 12 months you’ll pay the full capital gain. But, you (as an individual) could get a 50% discount on your capital gain (after applying capital losses) for any capital gains tax asset held for over 12 months before you sell it. Indexation You can choose indexation if you acquired your assets before 21 September 1999, and have held it for at least 12 months. This is an alternative option to the discount method


In [0]:
query = "what is indexation?"
query_result = docsearch.similarity_search(query)
print(query_result[0].page_content)

Indexation You can choose indexation if you acquired your assets before 21 September 1999, and have held it for at least 12 months. This is an alternative option to the discount method. The indexation method applies a multiplier to account for inflation on the cost base of your asset (up to September 1999). You can choose the indexation method if you’ve carried forward any capital losses for assets held before 1999. Capital loss If you’ve made a capital loss, you can deduct this from your capital gains (that you’ve made from other sources) to reduce the amount of tax. If you don’t have other capital gains (during that income year) you can carry over any capital losses to other income years—something handy for another time. Paying capital gains tax When to pay Although it sounds like it, capital gains tax isn’t a separate tax. Your net capital gains form part of your assessable income in whatever year your capital gains tax happened


#### File = 'parsed_nab-low-rate-card.txt'

In [0]:
#Quick validation of vector indexing
query = "what is a NAB low rate credit card?"

query_result = docsearch.similarity_search(query)
print(query_result[0].page_content)

Low rate credit card | Balance transfer or cash back offer -  Notification: NAB Mobile Banking app .   NAB Low Rate Credit Card NAB Low Rate Card A simple low interest rate credit card with your choice of offers: get a promotional balance transfer or select our cash back offer. Apply online in 15 minutes and get a response in 60 seconds. Rates and fees Learn more about our standard interest rates and minimum credit limits. Variable purchase rate 12.49% p.a. Interest free days on purchases Up to 55 Annual card fee $59 p.a. Minimum credit limit $1,000 Variable cash advance rate This is the interest rate charged on amounts you withdraw as cash, gambling transactions (including lottery ticket purchases), or transfer from your credit card to another account. 21.74% p.a. Choose one of our available credit card offers Your choice of offers to suit your credit needs, select either our promotional balance transfer offer or our cash back offer. Offer Promotional 0% p.a


In [0]:
query = "Is there any promotion running on balance transfer?"

query_result = docsearch.similarity_search(query)
print(query_result[0].page_content)

a. Choose one of our available credit card offers Your choice of offers to suit your credit needs, select either our promotional balance transfer offer or our cash back offer. Offer Promotional 0% p.a. balance transfer (BT) for 32 months with no BT fee Take advantage of our balance transfer offer to help pay off your existing credit card balance/s sooner. Available on new NAB Low Rate credit card. This could help you to consolidate your debt and reduce the amount of interest you pay on it. Try using our to see how much you could save. Enjoy no annual fee for the first year (usually $59). No balance transfer fee applies. Unpaid BT reverts to the variable cash advance rate after 32 months. Minimum monthly repayments required. NAB may vary or end this offer at any time. See important information. Offer Up to $300 cash back Looking for a credit card that not only offers a low interest rate but provides up to $300 cash back? Available on a new NAB Low Rate credit card


#### File = 'parsed_first-debit-card.txt'

In [0]:
#Second query with a right question and evaluate if the right paragraph is returned 
query = "what is the difference between a credit card and a debit card ? "

query_result = docsearch.similarity_search(query)
print(query_result[0].page_content)

The difference between debit cards and credit cards With a debit card, you’re spending your own money. You can only spend what’s in your transaction account. A credit card is a way to borrow money or get ‘credit’ from a bank, which gives you funds up to an agreed limit. You can spend up to that agreed limit, but you have to pay the money back by the due date. If you don’t, you’ll be charged interest and other fees may apply. You must be at least 18 years old and meet other eligibility criteria to apply for a . Security tips When you’re using your debit card in-person or online, follow these tips. Sign the back of your new card as soon as you get it. When setting up your PIN, don’t use numbers that are easy to guess, like your birthday . Never write down or share your PIN with anyone. This also applies to your passwords for internet banking and your devices used to make payments, like smartphones and wearables


In [0]:
#Second query with a right question and evaluate if the right paragraph is returned 
query = "Give me some suggestions on how i protect my password or PIN"

query_result = docsearch.similarity_search(query)
print(query_result[0].page_content)

Never write down or share your PIN with anyone. This also applies to your passwords for internet banking and your devices used to make payments, like smartphones and wearables. Don’t let other people use your card and treat your device like your wallet – always keep it close. Always cover the keypad on an ATM or EFTPOS machine when entering your PIN Keep your receipts and regularly check your transactions in the NAB app to make sure you’re being charged correctly. Using your NAB Visa Debit card safely online You can shop online with your NAB Visa Debit card by entering the card number, expiry date and CVV. Always be careful when entering your card details online. Read the fine print and make sure you’re not accidentally signing up for subscriptions or future payments. If you’re shopping on overseas websites, be aware you may be charged an . Learn more about how to


## Merge with LLM (Retrieval QA)

#### Model = open-llama-7B-v2-open-instruct-GGML

In [0]:
from ctransformers.langchain import CTransformers
llm_model = CTransformers(model='TheBloke/open-llama-7B-v2-open-instruct-GGML',
                          model_type='llama')

Fetching 0 files: 0it [00:00, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

In [0]:
# We need to add a search key here
# k affects the number of documents retrieved.
### NOTE a document is not document in the human sense but a chunk from the `CharacterTextSplitter`
qa = RetrievalQA.from_chain_type(llm=llm_model, chain_type="stuff", 
                                 retriever=docsearch.as_retriever(search_kwargs={"k": 2}))

In [0]:
# Query 1: Based on first-debit-card file
query = "Give me some suggestions on how i protect my password or PIN ? "
qa.run(query)

In [0]:
# Query 2 : Based on  first-debit-card file
query = """Provide me some suggestions on how i can apply for a NAB Debit card ?
           I am a first time account holder of any bank."""
qa.run(query)

"How do not needed - debit cards and Tips How to Use the difference between debit card tips The pieces you need more advice Not all pieces You should I have you’s When using internet banking apps: I amznNABOften it is a debit card or Link:How can help: NABOften people don's If your question:I am I don’s\nThe first questionWhat do not needed for the difference between debit cards and Tips and tips to my question: How to use the NABSure tips on Using debit cards are ready when it is a debit card?"

In [0]:
# Query 3: Based on the parsed_capital-gains-tax file 
query = "Briefly mention what is indexation ? "
qa.run(query)

In [0]:
# Query 3: Based on NAB low rate credit card 
query = "what is a NAB low rate credit card ? "
qa.run(query)

'Low rate 1. Please type of Low rate (NABNABNABNABNABNABNABNABNABNABNABNABNABNABNABNABNABNABNABNABNABOral NABNABNABNABNABNAB Low Rate\nNABNABNABNABNABNABNABNABOry'