In [71]:
import os
import json
from groq import Groq
from elasticsearch import Elasticsearch
from tqdm import tqdm  # tqdm used for progress visualization


In [72]:
# Directory containing all JSON files
json_folder_path = 'D:\\NBEDirectivesAssist\\ConvertedToJson\\'

# List to hold all documents from all JSON files
all_documents = []

# Loop through each JSON file in the directory
for json_file in os.listdir(json_folder_path):
    if json_file.endswith(".json"):
        json_file_path = os.path.join(json_folder_path, json_file)
        with open(json_file_path, 'rt', encoding='utf-8') as f_in:
            docs_raw = json.load(f_in)
        
        # Extract documents from the current JSON file
        documents = []
        for directive_id_dict in docs_raw:
            for doc in directive_id_dict['sections']:
                # Add additional fields to the doc
                doc['document_id'] = directive_id_dict['document_id']
                doc['title'] = directive_id_dict['title']
                documents.append(doc)

        # Append documents from this file to the global list
        all_documents.extend(documents)

In [73]:
def build_prompt(query,search_results):
    # Create the prompt for the LLM
    prompt_template = """
You're National Bank of Ethiopia Directives assistant. Answer the QUESTION based on the CONTEXT from the directive document.
Use  facts from the CONTEXT when answering the QUESTION.
If the CONTEXT does not contain the answer, Answer from your general knowledge.

QUESTION: {question}
CONTEXT: {context}
     """.strip()

# Generate the context from the search results
    context = ""

    for doc in search_results:
        context += f"section_id: {doc['section_id']}\nsection_title: {doc['section_title']}\npage_number: {doc['page_number']}\ncontent: {doc['content']}\ndocument_id: {doc['document_id']}\ntitle: {doc['title']}\n\n"

# Format the prompt
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [74]:
def llm(prompt):
    # Initialize Groq client
    client = Groq(api_key="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")

# Get the answer from the LLM model
    chat_completion = client.chat.completions.create(
        messages=[
        {
            "role": "user",
            "content": prompt,
        }
    ],
    model="llama3-8b-8192",
    )
    llm_answer=chat_completion.choices[0].message.content
# return the reslut
    return llm_answer

In [53]:
# Initialize Elasticsearch client
es_client = Elasticsearch('http://localhost:9200')

# Define index settings and mappings
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "title": {"type": "text"},
            "section_title": {"type": "text"},
            "content": {"type": "text"},
            "document_id": {"type": "keyword"},
            "section_id": {"type": "integer"}  # This section is the only integer field
        }
    }
}

# Create index
index_name = "directivesanalyi"
es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'directivesanalyi'})

In [54]:
# Index documents (`all_documents` is a list of dictionaries)

for doc in tqdm(all_documents):
    es_client.index(index=index_name, document=doc)

100%|██████████| 295/295 [00:16<00:00, 17.68it/s]


In [76]:
# Search query
def elastic_search(query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["content^10", "title", "section_title", "document_id"],  # Only text fields
                        "type": "best_fields"
                    }
                }
            }
        }
    }

# Perform search
    response = es_client.search(index=index_name, body=search_query)
    result_docs=[]
    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])
    return result_docs

In [78]:
def rag(query):
    search_results=elastic_search(query)
    prompt=build_prompt(query,search_results)
    answer=llm(prompt)
    return(answer)

In [83]:
rag('what are the principles that govern foreign exchange rate determination?')

'Based on the provided context from Section 5 of the Foreign Exchange Directive, the principles that govern foreign exchange rate determination are:\n\n1. Banks and authorized foreign exchange dealers are allowed to buy and sell foreign currencies from/to their clients and among themselves at freely negotiated rates.\n2. Banks shall report to the NBE the exchange rate used in their daily foreign exchange transactions by the end of the day, in a format specified in the "Foreign Exchange Market Operations Guideline".\n3. The NBE shall compile the exchange rates used by all banks and authorized foreign exchange dealers in their daily transactions and publicly communicate an Indicative Daily Exchange Rate, calculated on the basis of these submissions, to the market by the start of the following business day.\n4. The Indicative Daily Exchange Rate shall serve as a reference price, but not a mandatory transaction price, for banks and authorized foreign exchange dealers in their foreign excha