In [2]:
from langchain.llms import Ollama
from langchain.embeddings import OllamaEmbeddings
from langchain.document_loaders import CSVLoader, PyPDFLoader
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import Chroma
from langchain.chains import LLMChain, RetrievalQA

In [3]:
llm = Ollama(model="llama2")

In [4]:
embeddings = OllamaEmbeddings(model="llama2")

In [5]:
dictionary = "../data/Data Dictionary.pdf"
data = "../data/data.csv"

In [9]:
dict_loader = PyPDFLoader(dictionary)
data_loader = CSVLoader(data)

In [10]:
data_docs = data_loader.load()

In [11]:
dict_doc = dict_loader.load()

In [24]:
print(len(dict_doc), dict_doc[0])

1 page_content='Data\nDictionary\nColumn\nDescription\nclnt_id\nClient\nID\nbank_id\nBank\nID\nacc_id\nAccount\nID\ntxn_id\nTransaction\nID\ntxn_date\nTransaction\ndate\ndesc\nDescription\namt\nAmount\ncat\nCategory\nof\nthe\ntransaction\nmerchant\nMerchant\nof\nthe\ntransaction' metadata={'source': '../data/Data Dictionary.pdf', 'page': 0}


In [13]:
print(data_docs[1])

page_content='clnt_id: 6\nbank_id: 1\nacc_id: 1\ntxn_id: 27\ntxn_date: 31/07/2023 0:00\ndesc: CLOC Advance\namt: 6.286\ncat: Shops\nmerchant: NA' metadata={'source': '../data/data.csv', 'row': 1}


In [None]:
# dict_chroma_db = Chroma.from_documents(dict_doc, embeddings, persist_directory="../data/dict_chroma_db")

In [16]:
dict_chroma_db = Chroma(persist_directory="../data/dict_chroma_db", embedding_function=embeddings)

In [None]:
# data_chroma_db = Chroma.from_documents(data_docs[:100], embeddings, persist_directory="../data/100_chunk_chroma_db")

In [17]:
data_chroma_db = Chroma(persist_directory="../data/100_chunk_chroma_db", embedding_function=embeddings)

In [57]:
len(data_chroma_db)

100

In [97]:
data_chroma_db.get()['documents']

['clnt_id: 67\nbank_id: 1\nacc_id: 1\ntxn_id: 27\ntxn_date: 19/08/2023 0:00\ndesc: Apple Cash\namt: 10.836\ncat: Shops\nmerchant: Apple',
 'clnt_id: 51\nbank_id: 1\nacc_id: 1\ntxn_id: 8\ntxn_date: 19/07/2023 0:00\ndesc: 07-18-23 VISA DIRECT CA 2990 APPLE CASH INST XFER VISA MONEY TRANSFER CREDIT\namt: 34.476\ncat: Shops\nmerchant: NA',
 'clnt_id: 110\nbank_id: 1\nacc_id: 1\ntxn_id: 4\ntxn_date: 12/06/2023 0:00\ndesc: APPLE CASH VISA DIRECT CA 25\namt: 9.456\ncat: Shops\nmerchant: Apple',
 'clnt_id: 87\nbank_id: 1\nacc_id: 1\ntxn_id: 126\ntxn_date: 08/09/2023 0:00\ndesc: APPLE CASH INST XFER VISA DIRECT CA\namt: 3.546\ncat: Shops\nmerchant: Apple',
 'clnt_id: 87\nbank_id: 1\nacc_id: 1\ntxn_id: 57\ntxn_date: 31/08/2023 0:00\ndesc: APPLE CASH INST XFER 240 CA\namt: 45.31\ncat: Shops\nmerchant: Apple',
 'clnt_id: 104\nbank_id: 1\nacc_id: 1\ntxn_id: 105\ntxn_date: 02/06/2023 0:00\ndesc: BEST BUY 1200 FLINT MI\namt: 4.666\ncat: Shops\nmerchant: Best Buy',
 'clnt_id: 73\nbank_id: 1\nacc_id: 1

In [95]:
dict_chroma_db.get()

{'ids': ['d5859313-fef4-4da4-be12-59537cb1a52f'],
 'embeddings': None,
 'metadatas': [{'page': 0, 'source': '../data/Data Dictionary.pdf'}],
 'documents': ['Data\nDictionary\nColumn\nDescription\nclnt_id\nClient\nID\nbank_id\nBank\nID\nacc_id\nAccount\nID\ntxn_id\nTransaction\nID\ntxn_date\nTransaction\ndate\ndesc\nDescription\namt\nAmount\ncat\nCategory\nof\nthe\ntransaction\nmerchant\nMerchant\nof\nthe\ntransaction'],
 'uris': None,
 'data': None}

In [12]:
data_chroma_db.persist()

In [13]:
dict_chroma_db.persist()

In [21]:
dict_retriever = dict_chroma_db.as_retriever()

In [22]:
data_retriever = data_chroma_db.as_retriever()

In [117]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [118]:
template="""
        Given these transactions: {context}
        
        for a client with clnt_id: {client_id}
        ,please answer the question: {question}
        
        Make sure you answer in a human readable sentence.
        Answer directly, don't refer to the how you answered the question.
        
        This is what each column/keyword means:
        Column ->> Description
        clnt_id ->> Client ID
        bank_id ->>  Bank ID
        acc_id ->>  Account ID
        txn_id ->> Transaction ID
        txn_date ->> Transaction date
        desc ->> Description
        amt ->> Amount
        cat ->> Category of the transaction
        merchant ->> Merchant of the transaction
        """
        
prompt = ChatPromptTemplate.from_template(template)

In [194]:
template="""
        You are a helpful assistant that serves as customer care service for our company.
        You asnwer their questions.
        If you don't have their answer, reply that you cannot help in the moment.
                
        You have this data about the customers, provided by the company: {context}
        
        please answer this customer's question directly to them: {question}
        Their client clnt_id: {client_id}
        
        
        Make sure you answer in one human readable sentence.
        Answer directly, like you are in charge.
        Don't add extra informations or extra questions, don't refer to your sources and don't let the customers know about the context documents in your output.
        
        This is what each column/keyword means:
        Column ->> Description
        clnt_id ->> Client ID
        bank_id ->>  Bank ID
        acc_id ->>  Account ID
        txn_id ->> Transaction ID
        txn_date ->> Transaction date
        desc ->> Description
        amt ->> Amount
        cat ->> Category of the transaction
        merchant ->> Merchant of the transaction
        """
        
prompt = ChatPromptTemplate.from_template(template)

In [None]:
"""
Example:
        input: How much did i spend on August 1st, 2023?
        good output: "Dear valued customer,
        According to our records, your transaction on August 1st, 2023, had a total spend of $2.252.
        Please let me know if you have any further questions or concerns."
        Bad Output: "Good afternoon, thank you for reaching out to us. Based on the information provided in your documents, 
        it appears that you spent $2.252 on August 1st, 2023. Please let me know if there's anything else I can help you with."
"""

In [195]:
from operator import itemgetter

def invoke_chain(question: str, client_id=6):
    chain = (
        {
            'context': itemgetter("question") | data_retriever, "question": itemgetter("question"), "client_id": itemgetter('client_id'), 
         }
        | prompt
        | llm
        | StrOutputParser()
    )
    
    return chain.invoke({
        "client_id": client_id,
        "question": question
    })

In [196]:
reply = invoke_chain(question="How much did I spend on August 1st, 2023?", client_id=86)

[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence] Entering Chain run with input:
[0m{
  "client_id": 86,
  "question": "How much did I spend on August 1st, 2023?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel<context,question,client_id>] Entering Chain run with input:
[0m{
  "client_id": 86,
  "question": "How much did I spend on August 1st, 2023?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel<context,question,client_id> > 3:chain:RunnableSequence] Entering Chain run with input:
[0m{
  "client_id": 86,
  "question": "How much did I spend on August 1st, 2023?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel<context,question,client_id> > 3:chain:RunnableSequence > 4:chain:RunnableLambda] Entering Chain run with input:
[0m{
  "client_id": 86,
  "question": "How much did I spend on August 1st, 2023?"
}
[36;1m[1;3m[chain/end][0m [1m[1:chain:Ru

[36;1m[1;3m[chain/end][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel<context,question,client_id> > 3:chain:RunnableSequence] [23.32s] Exiting Chain run with output:
[0m[outputs]
[36;1m[1;3m[chain/end][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel<context,question,client_id>] [23.34s] Exiting Chain run with output:
[0m[outputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 6:prompt:ChatPromptTemplate] Entering Prompt run with input:
[0m[inputs]
[36;1m[1;3m[chain/end][0m [1m[1:chain:RunnableSequence > 6:prompt:ChatPromptTemplate] [0ms] Exiting Prompt run with output:
[0m[outputs]
[32;1m[1;3m[llm/start][0m [1m[1:chain:RunnableSequence > 7:llm:Ollama] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: \n        You are a helpful assistant that serves as customer care service for our company.\n        You asnwer their questions.\n        If you don't have their answer, reply that you cannot help in the moment.\n      

In [179]:
print(reply)

Dear valued customer,

According to our records, your transaction on August 1st, 2023, had a total spend of $2.252.

Please let me know if you have any further questions or concerns.
