python==3.10.13  
langchain==0.1.6  
langchain-community==0.0.19  
openai==1.12.0  
tiktoken==0.6.0  
gradio==3.48.0  
chromadb==0.4.22  

reference:
1. https://www.youtube.com/watch?v=eqRMeCrcelM
2. https://www.youtube.com/watch?v=iGZ0cV-SRLI&t=1424s

In [29]:
from dotenv import load_dotenv
import os
import numpy as np

In [86]:
from langchain.chains import ConversationalRetrievalChain
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import DirectoryLoader
from langchain.indexes.vectorstore import VectorstoreIndexCreator, VectorStoreIndexWrapper
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.chroma import Chroma
from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory
from langchain.prompts import PromptTemplate
from langchain.text_splitter import CharacterTextSplitter
from langchain.chat_models import ChatOpenAI

In [2]:
load_dotenv()

True

### 1. Load the raw file and parse it into proper format

In [3]:
file_path = 'raw_doc/knowledge.txt'
with open(file_path, 'r') as file:
    content = file.read()

In [18]:
keyword = "Question"
questions = [line[len(keyword):].strip("\n") for line in content.split('#') if keyword in line]
questions

['How do I change my address in the Kora app?',
 'Can international students use Kora?',
 'How Do I Get in Touch with Kora?',
 'App not working? Try closing it and downloading the most recent version.',
 'Is Kora available for Android?',
 'What is credit and how does it work?']

In [19]:
keyword = "Answer"
answers = [line[len(keyword):].strip("\n") for line in content.split('#') if keyword in line]
answers

['If you entered your address incorrectly or need to update it to a new address, we can help.\nIn your Kora app, go to “Settings”, click on “Profile”, you should be able to change the address by tapping on “Edit”',
 "International students are welcome to use the Kora app and apply for both KoraCash and KoraDrive.\nAt Kora, we believe all college students should have access to our financial products, with no exceptions. Kora provides loan services for qualified college and university students or recent graduates (graduated within a 3 year period).\n\nWhat if I don't have a credit score in the U.S.?\nIf you're an international student looking to build credit in the U.S. you are eligible to apply for both KoraCash and KoraDrive without prior credit history. Making payments on both can help establish a positive credit history.\n\nEven if you do not have a Social Security Number (“SSN”), major credit bureaus will hold onto your payment records reported by Kora. Once you get your SSN, the cr

In [25]:
def write_file(question, answer, file_path):

    text = f"""
Q: {question}
A: {answer}
""".strip()
    
    with open(file_path, 'w') as text_file:
        text_file.write(text)

In [33]:
for i in range(len(questions)):

    export_file = f'parsed_doc/question_{i}.txt'

    write_file(
        question=questions[i], 
        answer=answers[i], 
        file_path=export_file)

### 2. Create vectorDB from the raw Q&A

In [35]:
embeddings = OpenAIEmbeddings()
loader = DirectoryLoader("./parsed_doc/", glob="**/*txt")
documents = loader.load()
len(documents)

6

In [39]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
texts[0]

Document(page_content='Q: How do I change my address in the Kora app?\n\nA: If you entered your address incorrectly or need to update it to a new address, we can help.\n\nIn your Kora app, go to “Settings”, click on “Profile”, you should be able to change the address by tapping on “Edit”', metadata={'source': 'parsed_doc\\question_0.txt'})

In [43]:
db = Chroma.from_documents(documents=texts, embedding=embeddings)

In [45]:
db.similarity_search('international student')

[Document(page_content="Q: Can international students use Kora? A: International students are welcome to use the Kora app and apply for both KoraCash and KoraDrive. At Kora, we believe all college students should have access to our financial products, with no exceptions. Kora provides loan services for qualified college and university students or recent graduates (graduated within a 3 year period).\n\nWhat if I don't have a credit score in the U.S.? If you're an international student looking to build credit in the U.S. you are eligible to apply for both KoraCash and KoraDrive without prior credit history. Making payments on both can help establish a positive credit history.\n\nEven if you do not have a Social Security Number (“SSN”), major credit bureaus will hold onto your payment records reported by Kora. Once you get your SSN, the credit records you previously created (with your Kora loan for example) will be linked to your SSN. Note that you do need an SSN to apply for KoraCash.", 

### 3. export and reload db
say we dont want to re-create vectorDB every single time to save time and cost
we could export the vectorDB and load it again

In [41]:
local_persist_path = "./vector_store"

def get_index_path(index_name):
    return os.path.join(local_persist_path, index_name)

def load_and_save_to_index(loader, index_name):
    
    # by default, VectorstoreIndexCreator uses openAI API
    index = VectorstoreIndexCreator(
        vectorstore_kwargs={'persist_directory':get_index_path(index_name)}
        ).from_loaders([loader])

    # save the vectorDB to avoid regenerating vectors next time
    index.vectorstore.persist()

# load_and_save_to_index(loader=loader, index_name='test_db')

In [79]:
# load the exported vectorDB
def load_index(index_name):
    index_path = get_index_path(index_name)
    # you need to use the same embedding API as in VectorstoreIndexCreators
    embedding = OpenAIEmbeddings()
    vectordb = Chroma(
        persist_directory=index_path,
        embedding_function=embedding
    )
    return vectordb

test_db = load_index('test_db')

In [80]:
test_db.similarity_search('international student')

[Document(page_content="Q: Can international students use Kora? A: International students are welcome to use the Kora app and apply for both KoraCash and KoraDrive. At Kora, we believe all college students should have access to our financial products, with no exceptions. Kora provides loan services for qualified college and university students or recent graduates (graduated within a 3 year period).\n\nWhat if I don't have a credit score in the U.S.? If you're an international student looking to build credit in the U.S. you are eligible to apply for both KoraCash and KoraDrive without prior credit history. Making payments on both can help establish a positive credit history.\n\nEven if you do not have a Social Security Number (“SSN”), major credit bureaus will hold onto your payment records reported by Kora. Once you get your SSN, the credit records you previously created (with your Kora loan for example) will be linked to your SSN. Note that you do need an SSN to apply for KoraCash.", 

### 4. Conversational Chain

In [50]:
llm = ChatOpenAI(model_name='gpt-3.5-turbo')

In [83]:
template = """
### Instruction: You are a customer support agent at Kora that is talking to a customer. Use only the chat history and the following information
{context}
to answer in a helpful and precise manner to the question. 
If you do not know the answer - say that you do not know and ask the customer to fill out the form at the {link}.
If the customer thinks your answer not helpful - say sorry and ask the customer to fill out the form at the {link}.
If the customer says bye - say good bye and ask the customer to give you a rating
Keep your replies short, compassionate and informative.
{chat_history}
### Input: {question}
### Response:
""".strip()

In [84]:
prompt = PromptTemplate(
    input_variables=['context', 'question', 'chat_history'],
    partial_variables={'link': 'link hidden for privacy. replace with your own link'},
    template=template
)

In [87]:
# only look for recent 20 chat history in memory
memory = ConversationBufferWindowMemory(
    memory_key='chat_history',
    human_prefix='### Input',
    ai_prefix="### Response",
    input_key="question",
    output_key="output_text",
    return_messages=False,
    k=20
)

# only look for all chat history in memory
# memory = ConversationBufferMemory(
#     memory_key='chat_history',
#     human_prefix='### Input',
#     ai_prefix="### Response",
#     input_key="question",
#     output_key="output_text",
#     return_messages=False,
# )

chain = load_qa_chain(
    llm=llm,
    chain_type='stuff',
    prompt=prompt,
    memory=memory,
    verbose=True
)

In [88]:
question = "how to use Kora website"
docs = db.similarity_search(question)
answer = chain.run({
    "input_documents": docs, 
    "question": question
})



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m### Instruction: You are a customer support agent at Kora that is talking to a customer. Use only the chat history and the following information
Q: Can international students use Kora? A: International students are welcome to use the Kora app and apply for both KoraCash and KoraDrive. At Kora, we believe all college students should have access to our financial products, with no exceptions. Kora provides loan services for qualified college and university students or recent graduates (graduated within a 3 year period).

What if I don't have a credit score in the U.S.? If you're an international student looking to build credit in the U.S. you are eligible to apply for both KoraCash and KoraDrive without prior credit history. Making payments on both can help establish a positive credit history.

Even if you do not have a Social Security Number (“SSN”), majo

In [89]:
answer

'Hello! To use the Kora website, simply visit our homepage at www.kora.com. From there, you can explore our products and services, learn more about what we offer, and even apply for KoraCash or KoraDrive. If you have any specific questions or need assistance navigating the website, feel free to ask!'

### 4. demo

In [None]:
import gradio as gr

In [None]:
memory = ConversationBufferWindowMemory(
    memory_key='chat_history',
    human_prefix='### Input',
    ai_prefix="### Response",
    input_key="question",
    output_key="output_text",
    return_messages=False,
    k=20
)

chain = load_qa_chain(
    llm=llm,
    chain_type='stuff',
    prompt=prompt,
    memory=memory,
    verbose=True
)

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.components.Textbox()
    clear = gr.ClearButton([msg, chatbot])

    def respond(message, chat_history):

        docs = db.similarity_search(message)
        bot_message = chain.run({
            "input_documents": docs, 
            "question": message
        })
        print(bot_message)
        chat_history.append((message, bot_message))
        return "", chat_history
    
    msg.submit(respond, [msg, chatbot], [msg, chatbot])

demo.launch(debug=True)