# Question-Answer with Directives in a bank

In this notebook we evaluate the performance of LLM to respond to questions in banking

### Process
1. Embedding Similarity based 
2. Use Vector Database FAISS to store results and find k best documents
3. Generate Answer with local LLM
4. Evalutate Performance 




### Install 

In [None]:
!pip install langchain
!pip install transformers
!pip install unstructured
!pip install sentence-transformers
!pip install faiss-gpu #pip install faiss-cpu
!pip install gpt4all
!pip install openai
!pip install -U langchain-community
!pip install --upgrade typing_extensions
%pip install faiss-cpu

In [1]:
import glob
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import CharacterTextSplitter
import textwrap
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain import HuggingFaceHub
import os
import textwrap
import pandas as pd
import time

from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import OpenAI

In [3]:
# This function is used to present the output of approach 1 in a more human readable manner
def wrap_text_preserve_newlines(text, width=110):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')
    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)
    return wrapped_text

In [None]:
txt_loader = DirectoryLoader('Downloads_TXT', glob = "**/*.txt", recursive= True)

documents = txt_loader.load()

In [None]:
# Text Splitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = text_splitter.split_documents(documents)
len(chunks),len(documents)

## Step 1: Embedding 


In [None]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
    model_kwargs={'device': device},
    encode_kwargs={'normalize_embeddings': False}
)

In [None]:
print(chunks[3])

### 2. Store into Vetor DB

In [None]:
# Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html
db = FAISS.from_documents(chunks, embeddings)


In [8]:
db.save_local("faiss_embeddings")

In [None]:
db = FAISS.load("faiss_embeddings")



### Generate Answer with local LLM


In [24]:
local_path = r"C:\Users\Michael\models\orca-mini-3b-gguf2-q4_0.gguf" # replace with your desired local file path
local_path = r"C:\Users\Michael\models\mistral-7b-openorca.Q4_0.gguf" # replace with your desired local file path
#local_path = r"C:\Users\Michael\models\Apollo2-7B-Q6_K.gguf" # replace with your desired local file path
#local_path = r"C:\Users\Michael\models\Llama-2-7b-chat-hf-finedtuned-to-GGUF.gguf" # replace with your desired local file path
local_path = r"C:\Users\Michael\models\gemma-7b.gguf" # replace with your desired local file path


callbacks = [StreamingStdOutCallbackHandler()]
llm = GPT4All(model=local_path, callbacks=callbacks, verbose=True)
str(llm)
print(llm.size)
llm_chain = load_qa_chain(llm, chain_type="stuff")

### Check Docs delivered with example query. To check if LLM gets relevant information

In [None]:
query = "Ab welchem Volumen einer Einzelanlage eines Kunden sind in der Anlageberatung Massnahmen betreffend Klumpenrisiken zu treffen"
docs = db.similarity_search(query, k=3)
print(wrap_text_preserve_newlines(docs[0].page_content))
print(docs[0].metadata)
print(wrap_text_preserve_newlines(docs[1].page_content))
print(docs[1].metadata)
print(wrap_text_preserve_newlines(docs[2].page_content))
print(docs[2].metadata)
llm_chain.run(input_documents=docs, question=query) # very slow

### 4. Evalutate Performance 


In [None]:
query_dict = {"wann müssen Gruppenmitglieder MWST-Abstimmung liefern": "Ende März",
             "inner welcher Frist sind Auskunftsbegehren gemäss Datemschutz durch die Bank zu beantworten": "30 Kalendertage",
             "Welche Spesen sind bei Übernachtungen bei Freunden zulässig": "Effektive Kosten bis 80 CHF, pauschal 60 CHF",
             "wann darf das Originaldokument nach dem Scan vernichtet werden":"Nach Abschluss des vorgegebenen Prozesses inklusive Backup",
             "Ab welchem Volumen einer Einzelanlage eines Kunden sind in der Anlageberatung Massnahmen betreffend Klumpenrisiken zu treffen": "ab 20% des Portfolios",
             }

print("\n\n\n-------------------------------------------------------------------------------------------------------")
print("Try llm with the following queries:")
print("used llm: " + str(llm))
for query,answer in query_dict.items():
    print("\n\n\n-------------------------------------------------------------------------------------------------------")
    print(f"Query: {query}")
    print(f"Sample Response: {answer}")
    start_time = time.time()
    docs = db.similarity_search(query, k=3)
    llm_chain.run(input_documents=docs, question=query)
    time_taken = round((time.time() - start_time),3)
    print("\n\nTime taken: ")
    print(time_taken)
    
