### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

# 0. Requirements

In [None]:
!pip install -q langchain_community
!pip install -q unstructured # for directory_reader
!pip install -q unstructured[xlsx] # for reading xlsx files
!pip install -q gradio
!pip install -q qdrant-client
!pip install -q oci==2.112.1

In [None]:
!pip install "unstructured[xlsx]"

In [None]:
!pip install unstructured

In [None]:
!pip list

In [None]:
!pip show langchain_community

In [2]:
import os
import datetime
import csv
import oci
from langchain_community.embeddings import OCIGenAIEmbeddings
from my_directory_loader import MyDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter #for directory_reader
from langchain.vectorstores import Qdrant
import gradio as gr

In [8]:
compartment_id = os.environ["NB_SESSION_COMPARTMENT_OCID"]
service_endpoint = "https://inference.generativeai.eu-frankfurt-1.oci.oraclecloud.com"
oci_signer = oci.auth.signers.get_resource_principals_signer() # this has changed from its original version to resource principal, do not forget to use this
#oci_signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner() #preferred way but policies needs to be in place to enable instance principal or resource principal https://docs.oracle.com/en-us/iaas/data-flow/using/resource-principal-policies.htm
DIRECTORY = 'data'
NEW_DIRECTORY = 'new_data'

In [6]:
compartment_id

'ocid1.compartment.oc1..aaaaaaaa2mybnl25h6pdqqicnuog4gzfrcdyjjwmpq3246nklfsv5s3obdkq'

In [4]:
embeddings = OCIGenAIEmbeddings(
    model_id="cohere.embed-english-v3.0",
    service_endpoint="https://inference.generativeai.eu-frankfurt-1.oci.oraclecloud.com",
    compartment_id= compartment_id,
    auth_type="RESOURCE_PRINCIPAL", # this is used for serverless architecture like this env
)

In [5]:
embeddings_test = OCIGenAIEmbeddings(
    model_id="cohere.embed-multilingual-v3.0",
    service_endpoint=service_endpoint,
    compartment_id=compartment_id,
    # auth_type="INSTANCE_PRINCIPAL",
    auth_type="RESOURCE_PRINCIPAL",
)

In [9]:
def load_docs(directory):
  loader = MyDirectoryLoader(directory)
  documents = loader.load()
  return documents
documents = load_docs(NEW_DIRECTORY)
def split_docs(documents,chunk_size=1000,chunk_overlap=100):
#def split_docs(documents,chunk_size=256,chunk_overlap=20): # second try
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)      
  docs = text_splitter.split_documents(documents)
  return docs
docs = split_docs(documents)

In [None]:
docs

In [13]:
def get_similiar_docs(query,k=4,score=False):
  if score:
    similar_docs = db.similarity_search_with_score(query,k=k)
  else:
    similar_docs = db.similarity_search(query,k=k)
  return similar_docs

In [11]:
db = Qdrant.from_documents(docs, embeddings, location=":memory:", collection_name="my_documents", distance_func="Dot")

In [41]:
prompt_context = """
    Answer the questions based on the context below. You will use your knowledge base for reliable sources to provide context and legal citations where applicable.
    ###
    Context: Your an agent with a vast knowledge base and expertise in business conduct. You can answer your questions on a wide range of topics related to ethical practices, legal regulations, and best practices in the business world.
"""

# 1. Implementation

## 1.1. Implementation with New Dataset

In [47]:
IRELAND_DIRECTORY = "new_data/Ireland"
US_DIRECTORY = "new_data/USA"

PROMPT_CONTEXT = """
    Answer the questions based on the context below. You will use your knowledge base for reliable sources to provide context and legal citations where applicable.
    ###
    Context: Your an agent with a vast knowledge base and expertise in business conduct. You can answer your questions on a wide range of topics related to ethical practices, legal regulations, and best practices in the business world.
"""

In [50]:
def load_docs(directory):
  loader = MyDirectoryLoader(directory)
  documents = loader.load()
  return documents

ireland_documents = load_docs(IRELAND_DIRECTORY)
us_documents = load_docs(US_DIRECTORY)

def split_docs(documents,chunk_size=1000,chunk_overlap=100):
#def split_docs(documents,chunk_size=256,chunk_overlap=20): # second try
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)      
  docs = text_splitter.split_documents(documents)
  return docs

ireland_docs = split_docs(ireland_documents)
us_docs = split_docs(us_documents)

In [51]:
ireland_db = Qdrant.from_documents(ireland_docs, embeddings, location=":memory:", collection_name="my_documents", distance_func="Dot")
us_db = Qdrant.from_documents(us_docs, embeddings, location=":memory:", collection_name="my_documents", distance_func="Dot")

In [None]:
# I have customized this in the function below
def get_similiar_docs(query,k=2,score=False):
  if score:
    similar_docs = db.similarity_search_with_score(query,k=k)
  else:
    similar_docs = db.similarity_search(query,k=k)
  return similar_docs

In [106]:
#Constants
MODEL_ID_16K = 'cohere.command-r-16k'
MODEL_ID_PLUS = 'cohere.command-r-plus'
FLAGGED_DATA_PATH = "flagged_data_folder/log.csv"
default_values = {
    "user_input": "",
    "model_dropdown": "Cohere 16k",
    "temperature_slider": 0,
    "top_p_slider": 0.7,
    "max_tokens_slider": 1000,
    "response_output": "",
    "source_output": "",
    "warning_message":"",
}
PROMPT_CONTEXT = """
    Answer the questions based on the context below. You will use your knowledge base for reliable sources to provide context and legal citations where applicable.
    ###
    Context: Your an agent with a vast knowledge base and expertise in business conduct. You can answer your questions on a wide range of topics related to ethical practices, legal regulations, and best practices in the business world.
"""

# Utility function to get current timestamp
def get_current_timestamp():
    return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

# Step 1: Set up the LLM Model
def oci_llm_model(user_input, country="Irelnad", model="Cohere 16k", temperature=0, top_p=0.7, max_tokens=1000):    
    if len(user_input) > 10:
        prompts, source_name= setup_chat_context(user_input, country)
        chat_response_text = process_chat_request(prompts, model, max_tokens, temperature, top_p)
        return chat_response_text, source_name, "Your feedback is much appreciated."
    else:
        return "", "", "⚠️ Warning: Your input is too short."

def get_similiar_docs(query, country, k=2):#,score=False):
    similar_docs = ireland_db.similarity_search(query,k=k) if country == 'Ireland' else us_db.similarity_search(query,k=k)
    return similar_docs     
    
def setup_chat_context(user_input, country):
    similar_docs = get_similiar_docs(user_input, country)
    concatenated_content, source_name, sourceLinks = "", "", ""
    for document in similar_docs:
        concatenated_content += document.page_content 
        source_name = document.metadata['source'].replace("new_data/", "") # since the folder data/ was always present
        #source_link = document.metadata["source"]
        #sourcePageNumber = int(document.metadata["page"])
        #sourcePageNumber = sourcePageNumber + 1
#         if source_link not in unique_source_links:
#           unique_source_links.add(source_link)  
#           sourceLinks += "<a href='" + objectStorageLink + source_link + "#page=" + "' target='_blank'>" + source_link[source_link.rfind("/")+1:] + " (page "+str(sourcePageNumber) + ")</a>\n"
        #question
        prompts = [f"""Go through this text: {concatenated_content}\nQuestion: {user_input}\nInstruction: {PROMPT_CONTEXT}"""]
        prompts = "".join(prompts)
        return prompts, source_name       
        
def process_chat_request(prompts, model, max_tokens, temperature, top_p):
        #prompt template
        generative_ai_inference_client = oci.generative_ai_inference.GenerativeAiInferenceClient(config={},signer=oci_signer, service_endpoint=service_endpoint, retry_strategy=oci.retry.NoneRetryStrategy(), timeout=(10,240))
        chat_request = setup_chat_request(prompts, max_tokens, temperature, top_p)
        chat_detail = setup_chat_detail(model, chat_request)
        chat_response = generative_ai_inference_client.chat(chat_detail)
        data = chat_response.data
        chat_response_data = data.chat_response
        chat_response_text = chat_response_data.text
        return chat_response_text        

def setup_chat_request(prompts, max_tokens, temperature, top_p):
        chat_request = oci.generative_ai_inference.models.CohereChatRequest()
        chat_request.message = prompts
        chat_request.max_tokens = max_tokens 
        chat_request.is_stream = False # True results to an error
        chat_request.temperature = temperature 
        chat_request.top_p = top_p 
        #chat_request.top_k = 10 #top_k # Only support topK within [0, 500]
        #chat_request.frequency_penalty = 0 #frequency_penalty

        chat_request.documents = [
        {
            "title": "Tell more about MCOAssistant",
            "snippet": "The primary objective of this integration is to advance the solution’s capabilities in delivering personalised user interactions and efficient data retrieval directly tailored to user needs. Specifically, not only is the ambition to make it easier to understand how to use the product but to also improve policy-related data retrieval for users of MCO. This AI is designed to assist financial services firms in effectively managing and mitigating compliance risk by providing real-time insights into relevant business rules and regulations. It should also be able to answer financial questions within the context of these regulations.",
            "website": "https://mco.mycomplianceoffice.com/"
        },
        # {
        #     "title": "More rules and regularation information in the United Stated",
        #     "snippet": "FINRA's rules and guidance in the US strive to protect investors and ensure the integrity of today's rapidly evolving market.",
        #     "website": "https://www.finra.org/rules-guidance/rulebooks/finra-rules"
        # },
        # {
        #     "title": "More rules and regularation information in the United Kingdom",
        #     "snippet": "This Handbook contains the complete record of Financial Conduct Authority (FCA) Legal Instruments.",
        #     "website": "http://www.handbook.fca.org.uk/"
        # }
        ]
        return chat_request
        
def setup_chat_detail(model, chat_request):
        chat_detail = oci.generative_ai_inference.models.ChatDetails()
        model_id = MODEL_ID_16K if model == "Cohere 16k" else MODEL_ID_PLUS
        chat_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(model_id=model_id) 
        chat_detail.compartment_id = compartment_id
        chat_detail.chat_request = chat_request
        return chat_detail
    
# Specific UI Functions:   
    
def clear_input():
    # Return an empty string to clear the user_input Textbox
    #return "", "Cohere", 0, 0.7, 1000, ""
    return default_values["user_input"], default_values["model_dropdown"], default_values["temperature_slider"], default_values["top_p_slider"], default_values["max_tokens_slider"], default_values["response_output"], default_values['source_output'], default_values["warning_message"]

# def handle_feedback(user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output, feedback_type):
#     feedback = 'liked' if feedback_type == 'positive' else 'disliked'
#     timestamp = get_current_timestamp()
#     with open(FLAGGED_DATA_CSV, "a", newline='', encoding='utf-8') as file:
#         writer = csv.writer(file)
#         writer.writerow([timestamp, user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output, feedback])
#     return "Thanks for your feedback."

def handle_positive_feedback(user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output):
    feedback ='liked'
    timestamp = get_current_timestamp()
    with open(FLAGGED_DATA_PATH, "a", newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow([timestamp, user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output, feedback])
    warning_message = "Thanks for your feedback."
    return warning_message

def handle_negative_feedback(user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output):
    feedback ='disliked'
    timestamp = get_current_timestamp()
    with open(FLAGGED_DATA_PATH, "a", newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow([timestamp, user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output, feedback])
    warning_message = "Thanks for your feedback."
    return warning_message

def flag_response(user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output):
    timestamp = get_current_timestamp()
    with open(FLAGGED_DATA_PATH, "a", newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        # Write the timestamp, user input, and model response to the CSV file
        writer.writerow([timestamp, user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output, feedback])
    warning_message = "Content flagged for review."
    return warning_message


# Step 2: Set up Gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as app: #theme=gr.themes.Soft()
    gr.Markdown("# <span style='color:#29339B'> MCO Assistant </span>")
    gr.Markdown("""    
                    This interface allows you to interact with different language models. Choose your settings and input your question below. You do not need to change any specifications.""")
    with gr.Row():
        with gr.Column(scale=3): #'scale' value should be an integer. Using 2.5 will cause issues.
            user_input = gr.Textbox(label="Your Question", placeholder="Ask the model what it thinks...", lines=3 ,show_copy_button=False)
                
            with gr.Row():
                submit_button = gr.Button("✔️ Submit")
                clear_button = gr.Button("🗑️ Clear")
                flag_button = gr.Button("🚩 Flag")
            response_output = gr.Textbox(label="Model Response", lines=8, show_copy_button=True, placeholder="The model's response will be displayed here...")
            source_output = gr.Textbox(label="Source", lines=1, show_copy_button=True, placeholder="The reponse's source will be displayed here...")
            with gr.Row():
                like_button = gr.Button("👍 Like")
                dislike_button = gr.Button("👎 Dislike")
        with gr.Column(scale=1):
            #with gr.Row():
                #country_checkbox = gr.CheckboxGroup(["USA", "Ireland"], label="Country", info="Which countries should the regulations be applied for?"),
                country_dropdown = gr.Dropdown(choices=["USA", "Ireland"], label="Country", value="Ireland", info="Which countries should the regulations be applied for?"),
                model_dropdown = gr.Dropdown(choices=["Cohere 16k", "Cohere Plus"], label="Model", value='Cohere 16k', info="Cohere 16k is a smaller-scale language model than Cohere Plus. While Cohere 16k offers high-quality responses, it might not possess the same level of sophistication and depth as Cohere Plus. ")
                temperature_slider = gr.Slider(minimum=0, maximum=1, step=0.1, value=0, label="Temperature", info="The level of randomness used to generate the output text. A lower temperature means less random generations.")
                top_p_slider = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.7, label="Top P", info="This ensures that only the most likely tokens, with total probability mass of p, are considered for generation at each step.")
                max_tokens_slider = gr.Slider(minimum=100, maximum=4000, step=500, value=1000, label="Max Tokens", info="The maximum number of output tokens that the model will generate for the response.")
                #country_checkbox = gr.CheckboxGroup(["USA", "UK"], label="Countries", info="Which countries should the regulations be applied for?"),
            #with gr.Row():
                warning_message = gr.Textbox(label="System Message", visible=True, lines=1)
                #file_upload = gr.File(label="Upload your documents here.")                
    
    #Define examples
    # examples = [
    #     #{"user_input": "Can a member give anything of value more than a hundred dollar?", "model": "Cohere", "temperature": 0.5, "top_p": 0.7, "max_tokens":1000}
    #     #{"user_input": "Explain the concept of deep learning.", "model": "Cohere", "temperature": 0.5, "top_p": 0.7}
    #     ["Tell me more about MCOAssitant.",  "Cohere 16k",  0.2,  1, 800],
    #     ["Provide contact information for the Bank of England.",  "Cohere 16k",  0,  1, 500],
    #     ["Can you summarize Private Securities Transactions of an Associated Person rules?", "Cohere 16k",  0,  1, 1500],
    #     ["Can a member give anything of value more than a hundred dollar?",  "Cohere 16k",  0,  0.9, 1000],
    #     ["Which obligations do not apply when successive personal transactions are carried out on behalf of a person? Make a list.",  "Cohere 16k",  0,  1, 1000],
    #     ["What are examples of giving advice, or providing services, to an employer in connection with a group personal pension scheme or group stakeholderpension scheme? Make a list.", "Cohere 16k", 0,  1, 1500]
    # ]

    # Add examples to the interface
    # gr.Examples(examples=examples, inputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider])#, outputs=response_output)
        
    
    submit_button.click(fn=oci_llm_model, 
                        inputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_tokens_slider], 
                        outputs=[response_output, source_output, warning_message])
    clear_button.click(fn=clear_input, 
                       inputs=[],  # No input is needed for clearing
                       outputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_tokens_slider, response_output, source_output, warning_message])
    flag_button.click(fn=flag_response, 
                      inputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_tokens_slider, response_output],  
                      outputs=[warning_message])
    like_button.click(fn= handle_positive_feedback,
                      inputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_tokens_slider, response_output],  
                      outputs=[warning_message])
    dislike_button.click(fn=handle_negative_feedback,
                      inputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_tokens_slider, response_output],  
                      outputs=[warning_message])
    
    gr.Markdown("""
    <div style="text-align:center;">
    <span style="font-size:13px; color:#3c3c3c"> 
    Version 1.0.0.
    </span> </div>""") 
    
    gr.Markdown("""
    <div style="text-align:center;">
    <span style="font-size:11px; color:#3c3c3c"> 
    Note: While the model strives to be accurate and helpful, it is not a substitute for legal advice.  It's always recommended to consult with a qualified attorney for specific legal issues. 
    </span> </div>""")

# Step 3: Launch the interface
app.launch(share=True)#, server_port=44510)

#### To do list
# 1. Add country dropdown
# 2. Make the checkbox work
# 3. Add user drop down
# 4. Add similary search for different countries
# 5. Add country for like, dislike, clear button
# 6. Add examples from their examples
# 7. Check if country check box is compatible to have one value only

AttributeError: 'tuple' object has no attribute '_id'

## 1.2. Previous Implementation

In [44]:
prompt_context = "You are an AI Assistant trained to give answers based only the information provided. Given only the above text provided and not prior knowledge, answer the query. If someone asks you a question and you don't know the answer, don't try to make up a response, simply say: I don't know."

In [45]:
model_id_16k = 'cohere.command-r-16k'
model_id_plus = 'cohere.command-r-plus'
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
feedback = "None"

# Define the path to flagged CSV file
flagged_data_csv = "flagged_data_folder/log.csv"
data_folder_path = "data/"

default_values = {
    "user_input": "",
    "model_dropdown": "Cohere 16k",
    "temperature_slider": 0,
    "top_p_slider": 0.7,
    "max_tokens_slider": 1000,
    "response_output": "",
    "source_output": "",
    "warning_message":"",
}

# Step 1: Set up the LLM Model
def oci_llm_model(user_input, model=default_values['model_dropdown'], temperature=default_values['temperature_slider'] , top_p=default_values['top_p_slider'], max_tokens=default_values['max_tokens_slider']):

    ############################################################################# OCI Function Starts Here
    
    if len(user_input) > 10:
        warning_message = "Your feedback is much appreciated."
    
        message = user_input
        similar_docs = get_similiar_docs(message)
        #similar_docs = get_similiar_docs_faiss(message)

    # The output of sources is not totally desirable atm
    #     context
        concatenated_content = ""
        sourceLinks = ""
        #sourcePageNumber = ""
        #unique_source_links = set() 
        for document in similar_docs:
            concatenated_content += document.page_content 
            source_link = document.metadata["source"]
            #sourcePageNumber = int(document.metadata["page"])
            #sourcePageNumber = sourcePageNumber + 1
    #         if source_link not in unique_source_links:
    #           unique_source_links.add(source_link)  
    #           sourceLinks += "<a href='" + objectStorageLink + source_link + "#page=" + "' target='_blank'>" + source_link[source_link.rfind("/")+1:] + " (page "+str(sourcePageNumber) + ")</a>\n"

        #context
        source_name = ""
        for document in similar_docs:
            source_name = document.metadata['source'].replace("data/", "") # since the folder data/ was always present

        #question
        prompt_token_input = prompt_context

        prompts = [
        f"""Please go through this text: {concatenated_content}
        Question: {message}
        Instruction: {prompt_token_input}"""]
        prompts = "".join(prompts)

        #prompt template
        generative_ai_inference_client = oci.generative_ai_inference.GenerativeAiInferenceClient(config={},signer=oci_signer, service_endpoint=service_endpoint, retry_strategy=oci.retry.NoneRetryStrategy(), timeout=(10,240))

        chat_detail = oci.generative_ai_inference.models.ChatDetails()
        chat_request = oci.generative_ai_inference.models.CohereChatRequest()
        #chat_request = oci.generative_ai_inference.models.GenericChatRequest() # using this result into an error, it might need a different authentication method
        chat_request.message = prompts
        chat_request.max_tokens = max_tokens #1000 #max_tokens
        chat_request.is_stream = False # True results to an error
        chat_request.temperature = temperature #temperature
        chat_request.top_p = top_p # top_p
        #chat_request.top_k = 10 #top_k # Only support topK within [0, 500]
        chat_request.frequency_penalty = 0 #frequency_penalty
        #chat_request.chat_history = oci.generative_ai_inference.models.CohereMessage(role="USER", message="")

        # This part is redundant since the model keeps the history
        # previous_chat_message = oci.generative_ai_inference.models.CohereMessage(role="USER", message=previous_chat_message)
        # previous_chat_reply = oci.generative_ai_inference.models.CohereMessage(role="CHATBOT", message=previous_chat_reply)
        # chat_request.chat_history = [previous_chat_message, previous_chat_reply]

        chat_request.documents = [
        {
            "title": "Tell more about MCOAssistant",
            "snippet": "The primary objective of this integration is to advance the solution’s capabilities in delivering personalised user interactions and efficient data retrieval directly tailored to user needs. Specifically, not only is the ambition to make it easier to understand how to use the product but to also improve policy-related data retrieval for users of MCO. This AI is designed to assist financial services firms in effectively managing and mitigating compliance risk by providing real-time insights into relevant business rules and regulations. It should also be able to answer financial questions within the context of these regulations.",
            "website": "https://mco.mycomplianceoffice.com/"
        },
        # {
        #     "title": "More rules and regularation information in the United Stated",
        #     "snippet": "FINRA's rules and guidance in the US strive to protect investors and ensure the integrity of today's rapidly evolving market.",
        #     "website": "https://www.finra.org/rules-guidance/rulebooks/finra-rules"
        # },
        # {
        #     "title": "More rules and regularation information in the United Kingdom",
        #     "snippet": "This Handbook contains the complete record of Financial Conduct Authority (FCA) Legal Instruments.",
        #     "website": "http://www.handbook.fca.org.uk/"
        # }
        ]

        # if "<compartment_ocid>" in compartment_id:
        #     print("ERROR:Please update your compartment id in target python file")
        #     quit()

        # Selectiong the model
        if model == "Cohere 16k":
            chat_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(model_id=model_id_16k)
        elif model == "Cohere Plus":
            chat_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(model_id=model_id_plus)

        #chat_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(model_id=model_id)

        chat_detail.compartment_id = compartment_id
        chat_detail.chat_request = chat_request

        chat_response = generative_ai_inference_client.chat(chat_detail)

        # result
        data = chat_response.data
        chat_response_data = data.chat_response
        chat_response_text = chat_response_data.text
        #print("Extracted text:", chat_response_text)
        return chat_response_text, source_name, warning_message
        #oci.util.to_dict(chat_response_text+' For more info check the below links: \n'+sourceLinks)
        
    else: # if the user prompt's length is less than 10, just return the warning message and do not go into LLM model
        chat_response_text = ""
        source_name = ""
        warning_message = "⚠️ Warning: Your input is too short."
        return chat_response_text, source_name, warning_message

    ############################################################################# OCI Function Ends Here   
    
def clear_input():
    # Return an empty string to clear the user_input Textbox
    #return "", "Cohere", 0, 0.7, 1000, ""
    return default_values["user_input"], default_values["model_dropdown"], default_values["temperature_slider"], default_values["top_p_slider"], default_values["max_tokens_slider"], default_values["response_output"], default_values['source_output'], default_values["warning_message"]

def handle_positive_feedback(user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output):
    feedback ='liked'
    with open(flagged_data_csv, "a", newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow([timestamp, user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output, feedback])
    warning_message = "Thanks for your feedback."
    return warning_message

def handle_negative_feedback(user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output):
    feedback ='disliked'
    with open(flagged_data_csv, "a", newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow([timestamp, user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output, feedback])
    warning_message = "Thanks for your feedback."
    return warning_message

def flag_response(user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output):
    # Generate a timestamp for when the content was flagged
    #timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    # Open the CSV file in append mode
    
    with open(flagged_data_csv, "a", newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        # Write the timestamp, user input, and model response to the CSV file
        writer.writerow([timestamp, user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output, feedback])
    warning_message = "Content flagged for review."
    return warning_message



# Step 2: Set up Gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as app: #theme=gr.themes.Soft()
    gr.Markdown("# <span style='color:#29339B'> MCO Assistant </span>")
    gr.Markdown("""    
                    This interface allows you to interact with different language models. Choose your settings and input your question below. You do not need to change any specifications.""")
    with gr.Row():
        with gr.Column(scale=3): #'scale' value should be an integer. Using 2.5 will cause issues.
            user_input = gr.Textbox(label="Your Question", placeholder="Ask the model what it thinks...", lines=3 ,show_copy_button=False)
                
            with gr.Row():
                submit_button = gr.Button("✔️ Submit")
                clear_button = gr.Button("🗑️ Clear")
                flag_button = gr.Button("🚩 Flag")
            response_output = gr.Textbox(label="Model Response", lines=8, show_copy_button=True, placeholder="The model's response will be displayed here...")
            source_output = gr.Textbox(label="Source", lines=1, show_copy_button=True, placeholder="The reponse's source will be displayed here...")
            with gr.Row():
                like_button = gr.Button("👍 Like")
                dislike_button = gr.Button("👎 Dislike")
        with gr.Column(scale=1):
            #with gr.Row():
                model_dropdown = gr.Dropdown(choices=["Cohere 16k", "Cohere Plus"], label="Model", value='Cohere 16k', info="Cohere 16k is a smaller-scale language model than Cohere Plus. While Cohere 16k offers high-quality responses, it might not possess the same level of sophistication and depth as Cohere Plus. ")
                temperature_slider = gr.Slider(minimum=0, maximum=1, step=0.1, value=0, label="Temperature", info="The level of randomness used to generate the output text. A lower temperature means less random generations.")
                top_p_slider = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.7, label="Top P", info="This ensures that only the most likely tokens, with total probability mass of p, are considered for generation at each step.")
                max_token_slider = gr.Slider(minimum=100, maximum=4000, step=500, value=1000, label="Max Tokens", info="The maximum number of output tokens that the model will generate for the response.")
                #country_checkbox = gr.CheckboxGroup(["USA", "UK"], label="Countries", info="Which countries should the regulations be applied for?"),
            #with gr.Row():
                warning_message = gr.Textbox(label="System Message", visible=True, lines=1)
                #file_upload = gr.File(label="Upload your documents here.")                
    
    #Define examples
    examples = [
        #{"user_input": "Can a member give anything of value more than a hundred dollar?", "model": "Cohere", "temperature": 0.5, "top_p": 0.7, "max_tokens":1000}
        #{"user_input": "Explain the concept of deep learning.", "model": "Cohere", "temperature": 0.5, "top_p": 0.7}
        ["Tell me more about MCOAssitant.",  "Cohere 16k",  0.2,  1, 800],
        ["Provide contact information for the Bank of England.",  "Cohere 16k",  0,  1, 500],
        ["Can you summarize Private Securities Transactions of an Associated Person rules?", "Cohere 16k",  0,  1, 1500],
        ["Can a member give anything of value more than a hundred dollar?",  "Cohere 16k",  0,  0.9, 1000],
        ["Which obligations do not apply when successive personal transactions are carried out on behalf of a person? Make a list.",  "Cohere 16k",  0,  1, 1000],
        ["What are examples of giving advice, or providing services, to an employer in connection with a group personal pension scheme or group stakeholderpension scheme? Make a list.", "Cohere 16k", 0,  1, 1500]
    ]

    # Add examples to the interface
    gr.Examples(examples=examples, inputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider])#, outputs=response_output)

        
    with gr.Row():
        gr.Markdown("**The features that need to be added to the model in the later versions:**")
    with gr.Row():
        with gr.Column():
            gr.Markdown(""" 
            - The dropdown button of model selection actually works ✔️ v. 0.4.6.
            - Add info for the parameters ✔️ v. 0.4.3. & 0.4.6. 
            - Add frequency penalty and max tokens ✔️ v. 0.4.4.
            - Add examples ✔️ v. 0.4.5.
            - Add a clear button ✔️ v. 0.4.2.
            - Compare which models give more accurate answers (16k or Plus) ✔️ v. 0.4.6.
            - The reference of the reponse is provided ✔️ v. 0.5.1.
            - More efficient code ✔️ v. 0.4.7.
            - Delete Source box with clear button ✔️v. 0.5.2.
            - More prompt engineering ✔️v. 0.8.2.
            - The user has the ability to add their own documents ✔️v. 0.8.3.
            - Add flag button ✔️v. 0.6.4.

            """)
        with gr.Column():
            gr.Markdown(""" 
            - Check for prompt injection
            - Handle errors if any ✔️ v. 0.6.3. query length
            - More descriptions for the app
            - Add MCO logo 
            - Add more examples ✔️v. 0.7.2.
            - Add like and dislike button ✔️v. 0.9.1.
            - Show custom messages like flag saved ✔️v. 0.7.1.
            - Maybe tweak the source names ✔️v. 0.7.3.
            - Check why Cohere plus cannot respond well
            - Add multiple sources if available
            - Check if streaming works ❌ not feasible
            - Add this to hugging face space ❌ not feasible
            """)

    # When the submit button is clicked, call the oci_llm_model function
    # Include temperature_slider in the inputs
    
    #user_input.change(fn=check_input, inputs=[user_input], outputs=[warning_message])
    submit_button.click(fn=oci_llm_model, 
                        inputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider], 
                        outputs=[response_output, source_output, warning_message])
    clear_button.click(fn=clear_input, 
                       inputs=[],  # No input is needed for clearing
                       outputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output, source_output, warning_message])
    flag_button.click(fn=flag_response, 
                      inputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output],  
                      outputs=[warning_message])
    like_button.click(fn=handle_positive_feedback, 
                      inputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output],  
                      outputs=[warning_message])
    dislike_button.click(fn=handle_negative_feedback, 
                      inputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output],  
                      outputs=[warning_message])
    #file_upload.change(fn=handle_file_upload, inputs=file_upload, outputs=warning_message)
    
    gr.Markdown("""
    <div style="text-align:center;">
    <span style="font-size:13px; color:#3c3c3c"> 
    Version 0.9.1.
    </span> </div>""") 
    
    gr.Markdown("""
    <div style="text-align:center;">
    <span style="font-size:11px; color:#3c3c3c"> 
    Note: While the model strives to be accurate and helpful, it is not a substitute for legal advice.  It's always recommended to consult with a qualified attorney for specific legal issues. 
    </span> </div>""")

# Step 3: Launch the interface
app.launch(share=True)#, server_port=44510)

## test: Can a member give anything of value more than a hundred dollar?

Running on local URL:  http://127.0.0.1:7879
Running on public URL: https://f5354e95d0c9ecaa10.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/site-packages/gradio/queueing.py", line 536, in process_events
    response = await route_utils.call_process_api(
  File "/opt/conda/lib/python3.8/site-packages/gradio/route_utils.py", line 276, in call_process_api
    output = await app.get_blocks().process_api(
  File "/opt/conda/lib/python3.8/site-packages/gradio/blocks.py", line 1923, in process_api
    result = await self.call_function(
  File "/opt/conda/lib/python3.8/site-packages/gradio/blocks.py", line 1508, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
  File "/opt/conda/lib/python3.8/site-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
  File "/opt/conda/lib/python3.8/site-packages/anyio/_backends/_asyncio.py", line 2177, in run_sync_in_worker_thread
    return await future
  File "/opt/conda/lib/python3.8/site-packages/anyio/_backends/_asyncio.py", line 85

## 1.2.1. Previous Implemention with New Dataset ⭐

In [114]:
IRELAND_DIRECTORY = "new_data/Ireland"
US_DIRECTORY = "new_data/USA"

# PROMPT_CONTEXT = """
#     Answer the questions based on the context below. You will use your knowledge base for reliable sources to provide context and legal citations where applicable.
#     ###
#     Context: Your an agent with a vast knowledge base and expertise in business conduct. You can answer your questions on a wide range of topics related to ethical practices, legal regulations, and best practices in the business world.
# """

PROMPT_CONTEXT = """
    Answer the questions based on the context below. You will use your knowledge base for reliable sources to provide context and legal citations where applicable. Consider the user's country whenever applicable.
    ###
    Context: Your an agent with a vast knowledge base and expertise in business conduct. You can answer your questions on a wide range of topics related to ethical practices, legal regulations, and best practices in the business world.
"""

In [126]:
def load_docs(directory):
  loader = MyDirectoryLoader(directory)
  documents = loader.load()
  return documents

ireland_documents = load_docs(IRELAND_DIRECTORY)
us_documents = load_docs(US_DIRECTORY)

def split_docs(documents,chunk_size=1000,chunk_overlap=100):
#def split_docs(documents,chunk_size=256,chunk_overlap=20): # second try
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)      
  docs = text_splitter.split_documents(documents)
  return docs

ireland_docs = split_docs(ireland_documents)
us_docs = split_docs(us_documents)

In [127]:
ireland_db = Qdrant.from_documents(ireland_docs, embeddings, location=":memory:", collection_name="my_documents", distance_func="Dot")
us_db = Qdrant.from_documents(us_docs, embeddings, location=":memory:", collection_name="my_documents", distance_func="Dot")

In [81]:
#message = 'Can I receive a gift of $50?'
message = 'The contact details?'
similar_docs = ireland_db.similarity_search(message,k=2)

In [121]:
for document in similar_docs:
    print(document.page_content)
    print(document.metadata['source'])

4  
  5. The Bank’s Support Structure  
The Bank has put in place a number of supports to assist staff in complying with their ethical 
obligations, including those contained in this Policy. The Compliance Function, located in 
ORD, is available to assist with any queries. The contact details are set out below:  
  
compliance@centralbank.ie    
  
Staff may also wish to discuss these matters with the external Ethics Officer.  
  
6. Registers  
  
Divisional management will maintain the Register of Gifts and Business Hospitality for staff 
within their divi sions. GSD will maintain the Register of Gifts and Business Hospitality for 
members of the Senior Leadership Committees. The content of these registers will be reported 
periodically to the Compliance Function.  
  
The Compliance Function will also keep a re gister of gifts which have been surrendered to the 
Bank in accordance with the requirements of this Policy. The Compliance Function will manage
new_data/Ireland/Central Bank

In [128]:
#### VERSION 2.0 ####
import re

model_id_16k = 'cohere.command-r-16k'
model_id_plus = 'cohere.command-r-plus'
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
feedback = "None"


# Define the path to flagged CSV file
flagged_data_csv = "flagged_data_folder/log.csv"
data_folder_path = "data/"

default_values = {
    "user_input": "",
    "model_dropdown": "Cohere 16k",
    "temperature_slider": 0,
    "top_p_slider": 0.7,
    "max_tokens_slider": 1000,
    "response_output": "",
    "source_output": "",
    "warning_message":"",
}

# Step 1: Set up the LLM Model
def oci_llm_model(user_input, country='Ireland', model=default_values['model_dropdown'], temperature=default_values['temperature_slider'] , top_p=default_values['top_p_slider'], max_tokens=default_values['max_tokens_slider']):

    ############################################################################# OCI Function Starts Here
    
    if len(user_input) > 10:
        warning_message = "Your feedback is much appreciated."
    
        message = user_input
        # change k placement later
        similar_docs = ireland_db.similarity_search(message,k=2) if country == 'Ireland' else us_db.similarity_search(message,k=2)
        #similar_docs = get_similiar_docs(message)
        #similar_docs = get_similiar_docs_faiss(message)

    # The output of sources is not totally desirable atm
    #     context
        concatenated_content = ""        
        sourcePageNumber = ""
        source = ""
        source_name = ""
        unique_source_set = set() 
        # sourceLinks = ""
        # unique_source_links = set() 
        for document in similar_docs:
            concatenated_content += document.page_content 
            #source_link = document.metadata["source"]
            file_pattern = r"^new_data/(?:USA|Ireland)/?"
            source = re.sub(file_pattern, "",  document.metadata["source"])
            #source = document.metadata["source"].replace("new_data/", "").replace("Ireland/","")
            if source.endswith(".csv"):
                sourcePageNumber = 0
            elif source.endswith(".pdf"):
                sourcePageNumber = int(document.metadata["page"])+1
            # sourcePageNumber = int(document.metadata["page"]) if document.metadata["page"] else 0
            # sourcePageNumber = sourcePageNumber + 1
            if source_name not in unique_source_set:
                source_name = "File Name:" + source + '\n' + "Page Number:" + str(sourcePageNumber)
                unique_source_set.add(source_name) 
                
            # if source_link not in unique_source_links:
                #unique_source_links.add(source_link)  
                #sourceLinks += "<a href='" + objectStorageLink + source_link + "#page=" + "' target='_blank'>" + source_link[source_link.rfind("/")+1:] + " (page "+str(sourcePageNumber) + ")</a>\n"

        #source
        # source_name = ""
        # for document in similar_docs:
        #     source_name = document.metadata['source'].replace("new_data/", "") # since the folder data/ was always present

        #question
        prompt_token_input = prompt_context

        # prompts = [
        # f"""Please go through this text: {concatenated_content}
        # Question: {message}
        # Instruction: {prompt_token_input}"""]
        # prompts = "".join(prompts)
        
        prompts = [
        f"""Please go through this text: {concatenated_content}, the user's country is {country_dropdown}
        Question: {message}
        Instruction: {prompt_token_input}"""]
        prompts = "".join(prompts)

        #prompt template
        generative_ai_inference_client = oci.generative_ai_inference.GenerativeAiInferenceClient(config={},signer=oci_signer, service_endpoint=service_endpoint, retry_strategy=oci.retry.NoneRetryStrategy(), timeout=(10,240))

        chat_detail = oci.generative_ai_inference.models.ChatDetails()
        chat_request = oci.generative_ai_inference.models.CohereChatRequest()
        #chat_request = oci.generative_ai_inference.models.GenericChatRequest() # using this result into an error, it might need a different authentication method
        chat_request.message = prompts
        chat_request.max_tokens = max_tokens #1000 #max_tokens
        chat_request.is_stream = False # True results to an error
        chat_request.temperature = temperature #temperature
        chat_request.top_p = top_p # top_p
        #chat_request.top_k = 10 #top_k # Only support topK within [0, 500]
        chat_request.frequency_penalty = 0 #frequency_penalty
        #chat_request.chat_history = oci.generative_ai_inference.models.CohereMessage(role="USER", message="")

        # This part is redundant since the model keeps the history
        # previous_chat_message = oci.generative_ai_inference.models.CohereMessage(role="USER", message=previous_chat_message)
        # previous_chat_reply = oci.generative_ai_inference.models.CohereMessage(role="CHATBOT", message=previous_chat_reply)
        # chat_request.chat_history = [previous_chat_message, previous_chat_reply]

        chat_request.documents = [
        {
            "title": "Tell more about MCOAssistant",
            "snippet": "The primary objective of this integration is to advance the solution’s capabilities in delivering personalised user interactions and efficient data retrieval directly tailored to user needs. Specifically, not only is the ambition to make it easier to understand how to use the product but to also improve policy-related data retrieval for users of MCO. This AI is designed to assist financial services firms in effectively managing and mitigating compliance risk by providing real-time insights into relevant business rules and regulations. It should also be able to answer financial questions within the context of these regulations.",
            "website": "https://mco.mycomplianceoffice.com/"
        },
        # {
        #     "title": "More rules and regularation information in the United Stated",
        #     "snippet": "FINRA's rules and guidance in the US strive to protect investors and ensure the integrity of today's rapidly evolving market.",
        #     "website": "https://www.finra.org/rules-guidance/rulebooks/finra-rules"
        # },
        # {
        #     "title": "More rules and regularation information in the United Kingdom",
        #     "snippet": "This Handbook contains the complete record of Financial Conduct Authority (FCA) Legal Instruments.",
        #     "website": "http://www.handbook.fca.org.uk/"
        # }
        ]

        # if "<compartment_ocid>" in compartment_id:
        #     print("ERROR:Please update your compartment id in target python file")
        #     quit()

        # Selectiong the model
        if model == "Cohere 16k":
            chat_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(model_id=model_id_16k)
        elif model == "Cohere Plus":
            chat_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(model_id=model_id_plus)

        #chat_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(model_id=model_id)

        chat_detail.compartment_id = compartment_id
        chat_detail.chat_request = chat_request

        chat_response = generative_ai_inference_client.chat(chat_detail)

        # result
        data = chat_response.data
        chat_response_data = data.chat_response
        chat_response_text = chat_response_data.text
        #print("Extracted text:", chat_response_text)
        return chat_response_text, source_name, warning_message
        #oci.util.to_dict(chat_response_text+' For more info check the below links: \n'+sourceLinks)
        
    else: # if the user prompt's length is less than 10, just return the warning message and do not go into LLM model
        chat_response_text = ""
        source_name = ""
        warning_message = "⚠️ Warning: Your input is too short."
        return chat_response_text, source_name, warning_message

    ############################################################################# OCI Function Ends Here   
    
def clear_input():
    # Return an empty string to clear the user_input Textbox
    #return "", "Cohere", 0, 0.7, 1000, ""
    return default_values["user_input"], default_values["model_dropdown"], default_values["temperature_slider"], default_values["top_p_slider"], default_values["max_tokens_slider"], default_values["response_output"], default_values['source_output'], default_values["warning_message"]

def handle_positive_feedback(user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output):
    feedback ='liked'
    with open(flagged_data_csv, "a", newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow([timestamp, user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output, feedback])
    warning_message = "Thanks for your feedback."
    return warning_message

def handle_negative_feedback(user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output):
    feedback ='disliked'
    with open(flagged_data_csv, "a", newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow([timestamp, user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output, feedback])
    warning_message = "Thanks for your feedback."
    return warning_message

def flag_response(user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output):
    # Generate a timestamp for when the content was flagged
    #timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    # Open the CSV file in append mode
    
    with open(flagged_data_csv, "a", newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        # Write the timestamp, user input, and model response to the CSV file
        writer.writerow([timestamp, user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output, feedback])
    warning_message = "Content flagged for review."
    return warning_message



# Step 2: Set up Gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as app: #theme=gr.themes.Soft()
    gr.Markdown("# <span style='font-size:34px; color:#29339B'> MCO Assistant </span>")
    gr.Markdown("""    
                    This interface allows you to interact with different language models. Choose your settings and input your question below. You do not need to change any specifications.""")
    with gr.Row():
        with gr.Column(scale=3): #'scale' value should be an integer. Using 2.5 will cause issues.
            user_input = gr.Textbox(label="Your Question", placeholder="Ask the model what it thinks...", lines=3 ,show_copy_button=False)
                
            with gr.Row():
                submit_button = gr.Button("✔️ Submit")
                clear_button = gr.Button("🗑️ Clear")
                flag_button = gr.Button("🚩 Flag")
            response_output = gr.Textbox(label="Model Response", lines=8, show_copy_button=True, placeholder="The model's response will be displayed here...")
            source_output = gr.Textbox(label="Source", lines=1, show_copy_button=True, placeholder="The reponse's source will be displayed here...")
            with gr.Row():
                like_button = gr.Button("👍 Like")
                dislike_button = gr.Button("👎 Dislike")
        with gr.Column(scale=1):
            #with gr.Row():
                country_dropdown = gr.Dropdown(choices=["Ireland", "USA"], label="Country", value='Ireland', info="Which country should the regulation be applied to?")
                model_dropdown = gr.Dropdown(choices=["Cohere 16k", "Cohere Plus"], label="Model", value='Cohere 16k', info="Cohere 16k is a smaller-scale language model than Cohere Plus. While Cohere 16k offers high-quality responses, it might not possess the same level of sophistication and depth as Cohere Plus. ")
                temperature_slider = gr.Slider(minimum=0, maximum=1, step=0.1, value=0, label="Temperature", info="The level of randomness used to generate the output text. A lower temperature means less random generations.")
                top_p_slider = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.7, label="Top P", info="This ensures that only the most likely tokens, with total probability mass of p, are considered for generation at each step.")
                max_token_slider = gr.Slider(minimum=100, maximum=4000, step=500, value=1000, label="Max Tokens", info="The maximum number of output tokens that the model will generate for the response.")
                #country_checkbox = gr.CheckboxGroup(["USA", "UK"], label="Countries", info="Which countries should the regulations be applied for?"),
            #with gr.Row():
                warning_message = gr.Textbox(label="System Message", visible=True, lines=1)
                #file_upload = gr.File(label="Upload your documents here.")                
    
    #Define examples
    # examples = [
    #     #{"user_input": "Can a member give anything of value more than a hundred dollar?", "model": "Cohere", "temperature": 0.5, "top_p": 0.7, "max_tokens":1000}
    #     #{"user_input": "Explain the concept of deep learning.", "model": "Cohere", "temperature": 0.5, "top_p": 0.7}
    #     ["Tell me more about MCOAssitant.",  "Cohere 16k",  0.2,  1, 800],
    #     ["Provide contact information for the Bank of England.",  "Cohere 16k",  0,  1, 500],
    #     ["Can you summarize Private Securities Transactions of an Associated Person rules?", "Cohere 16k",  0,  1, 1500],
    #     ["Can a member give anything of value more than a hundred dollar?",  "Cohere 16k",  0,  0.9, 1000],
    #     ["Which obligations do not apply when successive personal transactions are carried out on behalf of a person? Make a list.",  "Cohere 16k",  0,  1, 1000],
    #     ["What are examples of giving advice, or providing services, to an employer in connection with a group personal pension scheme or group stakeholderpension scheme? Make a list.", "Cohere 16k", 0,  1, 1500]
    # ]

    # Add examples to the interface
    # gr.Examples(examples=examples, inputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider])#, outputs=response_output)

        
    # When the submit button is clicked, call the oci_llm_model function
    # Include temperature_slider in the inputs
    
    #user_input.change(fn=check_input, inputs=[user_input], outputs=[warning_message])
    submit_button.click(fn=oci_llm_model, 
                        inputs=[user_input, country_dropdown, model_dropdown, temperature_slider, top_p_slider, max_token_slider], 
                        outputs=[response_output, source_output, warning_message])
    clear_button.click(fn=clear_input, 
                       inputs=[],  # No input is needed for clearing
                       outputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output, source_output, warning_message])
    flag_button.click(fn=flag_response, 
                      inputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output],  
                      outputs=[warning_message])
    like_button.click(fn=handle_positive_feedback, 
                      inputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output],  
                      outputs=[warning_message])
    dislike_button.click(fn=handle_negative_feedback, 
                      inputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output],  
                      outputs=[warning_message])
    #file_upload.change(fn=handle_file_upload, inputs=file_upload, outputs=warning_message)
    
    gr.Markdown("""
    <div style="text-align:center;">
    <span style="font-size:13px; color:#3c3c3c"> 
    Version 0.9.1.
    </span> </div>""") 
    
    gr.Markdown("""
    <div style="text-align:center;">
    <span style="font-size:11px; color:#3c3c3c"> 
    Note: While the model strives to be accurate and helpful, it is not a substitute for legal advice.  It's always recommended to consult with a qualified attorney for specific legal issues. 
    </span> </div>""")

# Step 3: Launch the interface
app.launch(share=True)#, server_port=44510)

## test: Can a member give anything of value more than a hundred dollar?

Running on local URL:  http://127.0.0.1:7914
Running on public URL: https://d2bb3349bf7b3013a7.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




### Concerns, Current Issues
- When added the similarity search inside the loop, the model repeatition problem seems to be resolved. This could be due to lower number of k as well which I decreased from 4 to 2.
- The model is able to respond based from the Excel file, however it is not able to comprehend the mixed logical questions, like adding two or more column values in a row alongside their column labels. It might give back false responses. Furthermore, the Excel source is never mentioned.
- The model is somehow well able to read the table inside a document (pdf file) and  the column headers are properly rendered. (check further). However the table inside a table (csv, excel) is not properly read. Maybe converting the excel file to pdf would help?
- Acronyms should be avoided both in questions and the document sources, unless they are well defined in the context.
- It is better to state which MasterCard groups are allowed instead of resitricted since the model have tendendy to bring back the names of restricted secuirties as the answers, instead of taking this into account that they are forbidden.
- We have to decide the file types (pdf, document, html, excel, even images) and check which ones are the common ones and the proper data cleaning each one needs. At the moment we are going to one-size-fits-all text parsing which which is not quiet tailored to each files nature. Based on my experience, it is better to have all the information/context in the same file formats as much as possible.
- We also need to decide on the common user levels as well.
- The more we need human intervention in cleaning and formatting the data, before feeding it to the model, the more error prone the knowledge bank would be. So we need to be able to automate this process as much as we can.
- The source is almost never the restricted file, either because this is too small or either the first document is the first source.
- As the number of clients and their respective client groups increases, the level of complexities arises, which can lead to slower database reading and eventually higher latency.


### To do list
1. Add country dropdown ✔️
2. Make the country work ✔️
3. Add similary search for different countries ✔️
4. Check about more options for metadata ✔️
5. Add default country ✔️
6. Make a csv file of their excel file and upload it ✔️
7. Hide database folder from the source ✔️
8. Page metadata is not working for csv file, add that restriction to the loop ✔️
9. Add client, company and user group ✔️
10. Better define k for similiar search ✔️
11. Omit model, top p and max tokens if they seem redundant ✔️
12. check model output or the regulations that doesn't apply in certain clients and countries ✔️
13. Chat request documents could be changed based on the client ✔️
14. Layer the database based on country and user level ✔️
15. Put resitriction for countries in a sepatarate file and upload them separately ✔️
16. Check if clear button properly works17. Check if clear button properly works ✔️
17. Check if like button properly works ✔️
18. Check if dislike button properly works ✔️
19. Check if flag button properly works ✔️
20. Add client and user group for like, dislike, clear button- Add client and user group for like, dislike, clear button ✔️
21. Add examples from their examples ✔️
22. Add error messages instead of system message ✔️


- Add color to the warning message - this didn't work
- Check if country check box is compatible to have one value only  
- Check out how frequency penalty works
- Check if there are restriction in multipe boxes in Gradio


## Model for Presentation ⭐⭐

In [175]:
CENTRAL_BANK_OF_IRELAND_DIRECTORY = "new_data/central_bank_of_Ireland"
MASTERCARD_US_EMPLOYEE_DIRECTORY = "new_data/mastercard/us_employee"
MASTERCARD_IRISH_EMPLOYEE_DIRECTORY = "new_data/mastercard/irish_employee"

# PROMPT_CONTEXT = """
#     Answer the questions based on the context below. You will use your knowledge base for reliable sources to provide context and legal citations where applicable.
#     ###
#     Context: Your an agent with a vast knowledge base and expertise in business conduct. You can answer your questions on a wide range of topics related to ethical practices, legal regulations, and best practices in the business world.
# """

PROMPT_CONTEXT = """
    Answer the questions based on the context below. You will use your knowledge base for reliable sources to provide context and legal citations where applicable. Consider the user's country whenever applicable.
    ###
    Context: Your an agent with a vast knowledge base and expertise in business conduct. You can answer your questions on a wide range of topics related to ethical practices, legal regulations, and best practices in the business world.
"""

In [179]:
PROMPT_CONTEXT = """
    Answer the questions based on the context below. You will use your knowledge base for reliable sources to provide context and legal citations where applicable. Consider the user's country whenever applicable.
    If the response includes an 'X', it indicates that the corresponding column is applicable. Please interpret these 'X' marks as indicators of applicability.
    ###
    Context: Your an agent with a vast knowledge base and expertise in business conduct. You can answer your questions on a wide range of topics related to ethical practices, legal regulations, and best practices in the business world.
"""

In [180]:
def load_docs(directory):
  loader = MyDirectoryLoader(directory)
  documents = loader.load()
  return documents

central_bank_of_ireland_documents = load_docs(CENTRAL_BANK_OF_IRELAND_DIRECTORY)
mastercard_us_documents = load_docs(MASTERCARD_US_EMPLOYEE_DIRECTORY)
mastercard_irish_documents = load_docs(MASTERCARD_IRISH_EMPLOYEE_DIRECTORY)

def split_docs(documents,chunk_size=1000,chunk_overlap=100):
#def split_docs(documents,chunk_size=256,chunk_overlap=20): # second try
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)      
  docs = text_splitter.split_documents(documents)
  return docs

central_bank_of_ireland_split_docs = split_docs(central_bank_of_ireland_documents)
mastercard_us_split_docs = split_docs(mastercard_us_documents)
mastercard_irish_split_docs = split_docs(mastercard_irish_documents)

central_bank_of_ireland_db = Qdrant.from_documents(central_bank_of_ireland_split_docs, embeddings, location=":memory:", collection_name="my_documents", distance_func="Dot")
mastercard_us_db = Qdrant.from_documents(mastercard_us_split_docs, embeddings, location=":memory:", collection_name="my_documents", distance_func="Dot")
mastercard_irish_db = Qdrant.from_documents(mastercard_irish_split_docs, embeddings, location=":memory:", collection_name="my_documents", distance_func="Dot")

In [214]:
#### VERSION 2.0 ####
import re

model_id_16k = 'cohere.command-r-16k'
model_id_plus = 'cohere.command-r-plus'
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
feedback = "None"
k = 2


# Define the path to flagged CSV file
flagged_data_csv = "flagged_data_folder/log.csv"
data_folder_path = "data/"

default_values = {
    "user_input": "",
    "model_dropdown": "Cohere 16k",
    "temperature_slider": 0,
    "top_p_slider": 0.7,
    "max_tokens_slider": 1000,
    "response_output": "",
    "source_output": "",
    "warning_message":"",
    "client_dropdown":"Mastercard",
    "user_group_dropdown":"US Employee"
}

# Step 1: Set up the LLM Model
def oci_llm_model(user_input, client, user_group):

    ############################################################################# OCI Function Starts Here
    
    if len(user_input) > 10:
        warning_message = "Your feedback helps me learn and grow. Please let me know if my answer was helpful."
    
        message = user_input
        # change k placement later
        if client == 'Central Bank of Ireland' and user_group == 'Irish Employee':
            similar_docs = central_bank_of_ireland_db.similarity_search(message,k=k)
                
        elif client == 'Mastercard' and user_group == 'US Employee':
            similar_docs = mastercard_us_db.similarity_search(message,k=k) 
            
        elif client == 'Mastercard' and user_group == 'Irish Employee':
            similar_docs = mastercard_irish_db.similarity_search(message,k=k) 
        
        else: #how about the model output?
            warning_message = f"{client} regulations don't apply for {user_group}s."
            chat_response_text = ""
            source_name = ""
            raise gr.Error(f"{client} regulations don't apply for {user_group}s.")
            return chat_response_text, source_name, warning_message
        
        concatenated_content = ""        
        sourcePageNumber = ""
        source = ""
        source_name = ""
        unique_source_set = set() 
        # sourceLinks = ""
        # unique_source_links = set() 
        for document in similar_docs:
            concatenated_content += document.page_content 
            file_pattern = r"^new_data/"
            source = re.sub(file_pattern, "",  document.metadata["source"])
            if source.endswith(".csv"):
                sourcePageNumber = 0
            elif source.endswith(".pdf"):
                sourcePageNumber = int(document.metadata["page"]) + 1
            if source_name not in unique_source_set:
                source_name = "🌎 File Name: " + source + '\n' + "🌏 Page Number: " + str(sourcePageNumber)
                unique_source_set.add(source_name) 
                
        #question
        prompt_token_input = prompt_context

        prompts = [
        f"""Please go through this text: {concatenated_content}
        Question: {message}
        Instruction: {prompt_token_input}"""]
        prompts = "".join(prompts)
        
        #prompt template
        generative_ai_inference_client = oci.generative_ai_inference.GenerativeAiInferenceClient(config={},signer=oci_signer, service_endpoint=service_endpoint, retry_strategy=oci.retry.NoneRetryStrategy(), timeout=(10,240))
        chat_detail = oci.generative_ai_inference.models.ChatDetails()
        chat_request = oci.generative_ai_inference.models.CohereChatRequest()
        chat_request.message = prompts
        chat_request.max_tokens = 2000 #1000 #max_tokens
        chat_request.is_stream = False # True results to an error
        chat_request.temperature = 0 #temperature #temperature
        chat_request.top_p = 1 # top_p
        #chat_request.top_k = 10 #top_k # Only support topK within [0, 500]
        chat_request.frequency_penalty = 0 #frequency_penalty


        chat_request.documents = [
        {
            "title": "Tell more about MCOAssistant",
            "snippet": "The primary objective of this integration is to advance the solution’s capabilities in delivering personalised user interactions and efficient data retrieval directly tailored to user needs. Specifically, not only is the ambition to make it easier to understand how to use the product but to also improve policy-related data retrieval for users of MCO. This AI is designed to assist financial services firms in effectively managing and mitigating compliance risk by providing real-time insights into relevant business rules and regulations. It should also be able to answer financial questions within the context of these regulations.",
            "website": "https://mco.mycomplianceoffice.com/"
        },
        # {
        #     "title": "More rules and regularation information in the United Stated",
        #     "snippet": "FINRA's rules and guidance in the US strive to protect investors and ensure the integrity of today's rapidly evolving market.",
        #     "website": "https://www.finra.org/rules-guidance/rulebooks/finra-rules"
        # },
        # {
        #     "title": "More rules and regularation information in the United Kingdom",
        #     "snippet": "This Handbook contains the complete record of Financial Conduct Authority (FCA) Legal Instruments.",
        #     "website": "http://www.handbook.fca.org.uk/"
        # }
        ]


        chat_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(model_id=model_id_16k)

        chat_detail.compartment_id = compartment_id
        chat_detail.chat_request = chat_request

        chat_response = generative_ai_inference_client.chat(chat_detail)

        # result
        data = chat_response.data
        chat_response_data = data.chat_response
        chat_response_text = chat_response_data.text
        gr.Info("Please let me know if my answer was helpful by providing feedback.")
        return chat_response_text, source_name, warning_message

        
    else: # if the user prompt's length is less than 10, just return the warning message and do not go into LLM model
        chat_response_text = ""
        source_name = ""
        warning_message = "⚠️ Warning: Your question is too short."
        raise gr.Error("⚠️Your question is too short!")
        return chat_response_text, source_name, warning_message

    ############################################################################# OCI Function Ends Here   
    
def clear_input():
    # Return an empty string to clear the user_input Textbox
    return default_values["user_input"], default_values["client_dropdown"], default_values["user_group_dropdown"],  default_values["response_output"], default_values['source_output'], default_values["warning_message"]

def handle_positive_feedback(user_input, client_dropdown, user_group_dropdown, response_output):
    feedback ='liked'
    with open(flagged_data_csv, "a", newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow([timestamp, user_input, client_dropdown, user_group_dropdown, response_output, feedback])
    warning_message = "Your feedback is valuable. I'll use it to improve my responses."
    return warning_message

def handle_negative_feedback(user_input, client_dropdown, user_group_dropdown, response_output):
    feedback ='disliked'
    with open(flagged_data_csv, "a", newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow([timestamp, user_input, client_dropdown, user_group_dropdown, response_output, feedback])
    warning_message = "Your feedback is valuable. I'll use it to improve my responses."
    return warning_message

def flag_response(user_input, client_dropdown, user_group_dropdown, response_output):
    # Open the CSV file in append mode
    feedback = 'flagged'
    with open(flagged_data_csv, "a", newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        # Write the timestamp, user input, and model response to the CSV file
        writer.writerow([timestamp, user_input, client_dropdown, user_group_dropdown, response_output, feedback])
    warning_message = "Content flagged for review."
    return warning_message


# Step 2: Set up Gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as app: #theme=gr.themes.Soft()
    gr.Markdown("# <span style='font-size:34px; color:#29339B'> MCO Assistant </span>")
    gr.Markdown("""<span style='color:#3D52D5'> MCOAssistant </span> is a large language model specializing in compliance best practices. It offers expert guidance on a wide range of regulations, policies, and industry standards. By providing clear and concise answers, ComplianceGPT empowers businesses to navigate complex compliance landscapes with confidence. 
    Whether you need to interpret regulations, assess risks, or develop compliance strategies, <span style='color:#3D52D5'> MCOAssistant </span> is your trusted AI companion.""")
    with gr.Row():
        with gr.Column(scale=3):
            user_input = gr.Textbox(label="Your Question", placeholder="What is in your mind...", lines=3 ,show_copy_button=False)
                
            with gr.Row():
                submit_button = gr.Button("✔️ Submit")
                clear_button = gr.Button("🗑️ Clear")
                flag_button = gr.Button("🚩 Flag")
            response_output = gr.Textbox(label="Model Response", lines=4, show_copy_button=True, placeholder="The model's response will be displayed here...")
            source_output = gr.Textbox(label="Source", lines=1, show_copy_button=True, placeholder="The reponse's source will be displayed here...")
            with gr.Row():
                like_button = gr.Button("👍 Like")
                dislike_button = gr.Button("👎 Dislike")
        with gr.Column(scale=1):
            client_dropdown = gr.Dropdown(choices=["Central Bank of Ireland", "Mastercard"], label="Client Name", value='Mastercard', info="This is for presentation purpose only. The client's name is not allowed to change by the client.")
            user_group_dropdown = gr.Dropdown(choices=["Irish Employee", "US Employee"], label="User Group", value='US Employee', info="Which user is asking the question. For presentation purpose only, it is not visible in production level.")
            warning_message = gr.Textbox(label="System Message", lines=3, visible=True, placeholder= "📋 For more detailed and informative answers, try asking longer questions. 📋 If you don't know what to ask try one the examples below.")   

    
    #Define examples
    examples = [
        ["Can I receive a gift of $50?",  "Central Bank of Ireland",  "Irish Employee"],
        ["What kind of a gift can I not receive?",  "Central Bank of Ireland", "Irish Employee"],
        ["Who do I need to inform about my gift?", "Central Bank of Ireland", "Irish Employee"],
        ["What are the approval levels to give a gift to A Government Official?",  "Mastercard", "US Employee"],
        ["Can I give a gift to a government official of $49?",  "Mastercard",  "US Employee"],
        ["Do I need approval to give a gift of $50 to a government official?", "Mastercard", "US Employee"],
        ["What are the approval levels for a gift to a Non Government business partner?", "Mastercard", "US Employee"],
        ["What are the pre-approval levels for gifts for a Commercial Business Partner?", "Mastercard", "US Employee"],
        ["Can I trade VICO?", "Mastercard", "US Employee"],
        ["Can I trade VICO?", "Mastercard", "Irish Employee"],
    ]
    
    #Add examples to the interface
    gr.Examples(examples=examples, inputs=[user_input, client_dropdown, user_group_dropdown])#, outputs=response_output)

    
    submit_button.click(fn=oci_llm_model, 
                        inputs=[user_input, client_dropdown, user_group_dropdown], 
                        outputs=[response_output, source_output, warning_message])
    clear_button.click(fn=clear_input, 
                       inputs=[],  # No input is needed for clearing
                       outputs=[user_input, client_dropdown, user_group_dropdown, response_output, source_output, warning_message])
    flag_button.click(fn=flag_response, 
                      inputs=[user_input, client_dropdown, user_group_dropdown, response_output],  
                      outputs=[warning_message])
    like_button.click(fn=handle_positive_feedback, 
                      inputs=[user_input, client_dropdown, user_group_dropdown, response_output],  
                      outputs=[warning_message])
    dislike_button.click(fn=handle_negative_feedback, 
                      inputs=[user_input, client_dropdown, user_group_dropdown, response_output],  
                      outputs=[warning_message])    
    
    gr.Markdown("""
    <div style="text-align:center;">
    <span style="font-size:11px; color:#3c3c3c"> 
    Note: While the model strives to be accurate and helpful, it is not a substitute for legal advice.  It's always recommended to consult with a qualified attorney for specific legal issues. 
    </span> </div>""")

# Step 3: Launch the interface
app.launch(share=True, server_port=44510)

Running on local URL:  http://127.0.0.1:44510
Running on public URL: https://dfccd96d35eb98eb22.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




## Model with Error Handling

In [211]:
#### VERSION 2.0 ####
import re

model_id_16k = 'cohere.command-r-16k'
model_id_plus = 'cohere.command-r-plus'
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
feedback = "None"
k = 2


# Define the path to flagged CSV file
flagged_data_csv = "flagged_data_folder/log.csv"
data_folder_path = "data/"

default_values = {
    "user_input": "",
    "model_dropdown": "Cohere 16k",
    "temperature_slider": 0,
    "top_p_slider": 0.7,
    "max_tokens_slider": 1000,
    "response_output": "",
    "source_output": "",
    "warning_message":"",
    "client_dropdown":"Mastercard",
    "user_group_dropdown":"US Employee"
}

# Step 1: Set up the LLM Model
def oci_llm_model(user_input, client, user_group):

    ############################################################################# OCI Function Starts Here
    
    if len(user_input) > 10:
        warning_message = "Your feedback helps me learn and grow. Please let me know if my answer was helpful."
    
        message = user_input
        # change k placement later
        if client == 'Central Bank of Ireland' and user_group == 'Irish Employee':
            similar_docs = central_bank_of_ireland_db.similarity_search(message,k=k)
                
        elif client == 'Mastercard' and user_group == 'US Employee':
            similar_docs = mastercard_us_db.similarity_search(message,k=k) 
            
        elif client == 'Mastercard' and user_group == 'Irish Employee':
            similar_docs = mastercard_irish_db.similarity_search(message,k=k) 
        
        else: #how about the model output?
            warning_message = f"{client} regulations don't apply for {user_group}s."
            chat_response_text = ""
            source_name = ""
            raise gr.Error(f"{client} regulations don't apply for {user_group}s.")
            return chat_response_text, source_name, warning_message
        
        concatenated_content = ""        
        sourcePageNumber = ""
        source = ""
        source_name = ""
        unique_source_set = set() 
        # sourceLinks = ""
        # unique_source_links = set() 
        source_link = "<a href='https:/google.com/' >Visit google.com!</a>"
        for document in similar_docs:
            concatenated_content += document.page_content 
            file_pattern = r"^new_data/"
            source = re.sub(file_pattern, "",  document.metadata["source"])
            if source.endswith(".csv"):
                sourcePageNumber = 0
            elif source.endswith(".pdf"):
                sourcePageNumber = int(document.metadata["page"]) + 1
            if source_name not in unique_source_set:
                source_name = "🌎 File Name: " + source + '\n' + "🌏 Page Number: " + str(sourcePageNumber) 
                unique_source_set.add(source_name)                 
                
        #question
        prompt_token_input = prompt_context

        prompts = [
        f"""Please go through this text: {concatenated_content}
        Question: {message}
        Instruction: {prompt_token_input}"""]
        prompts = "".join(prompts)
        
        #prompt template
        generative_ai_inference_client = oci.generative_ai_inference.GenerativeAiInferenceClient(config={},signer=oci_signer, service_endpoint=service_endpoint, retry_strategy=oci.retry.NoneRetryStrategy(), timeout=(10,240))
        chat_detail = oci.generative_ai_inference.models.ChatDetails()
        chat_request = oci.generative_ai_inference.models.CohereChatRequest()
        chat_request.message = prompts
        chat_request.max_tokens = 2000 #1000 #max_tokens
        chat_request.is_stream = False # True results to an error
        chat_request.temperature = 0 #temperature #temperature
        chat_request.top_p = 1 # top_p
        #chat_request.top_k = 10 #top_k # Only support topK within [0, 500]
        chat_request.frequency_penalty = 0 #frequency_penalty


        chat_request.documents = [
        {
            "title": "Tell more about MCOAssistant",
            "snippet": "The primary objective of this integration is to advance the solution’s capabilities in delivering personalised user interactions and efficient data retrieval directly tailored to user needs. Specifically, not only is the ambition to make it easier to understand how to use the product but to also improve policy-related data retrieval for users of MCO. This AI is designed to assist financial services firms in effectively managing and mitigating compliance risk by providing real-time insights into relevant business rules and regulations. It should also be able to answer financial questions within the context of these regulations.",
            "website": "https://mco.mycomplianceoffice.com/"
        },
        # {
        #     "title": "More rules and regularation information in the United Stated",
        #     "snippet": "FINRA's rules and guidance in the US strive to protect investors and ensure the integrity of today's rapidly evolving market.",
        #     "website": "https://www.finra.org/rules-guidance/rulebooks/finra-rules"
        # },
        # {
        #     "title": "More rules and regularation information in the United Kingdom",
        #     "snippet": "This Handbook contains the complete record of Financial Conduct Authority (FCA) Legal Instruments.",
        #     "website": "http://www.handbook.fca.org.uk/"
        # }
        ]


        chat_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(model_id=model_id_16k)

        chat_detail.compartment_id = compartment_id
        chat_detail.chat_request = chat_request

        chat_response = generative_ai_inference_client.chat(chat_detail)

        # result
        data = chat_response.data
        chat_response_data = data.chat_response
        chat_response_text = chat_response_data.text
        gr.Info("Please let me know if my answer was helpful by providing feedback.")
        return chat_response_text, source_name, warning_message, source_link

        
    else: # if the user prompt's length is less than 10, just return the warning message and do not go into LLM model
        chat_response_text = ""
        source_name = ""
        warning_message = "⚠️ Warning: Your question is too short."
        source_link = ""
        raise gr.Error("⚠️Your question is too short!")
        return chat_response_text, source_name, warning_message, source_link

    ############################################################################# OCI Function Ends Here   
    
def clear_input():
    # Return an empty string to clear the user_input Textbox
    return default_values["user_input"], default_values["client_dropdown"], default_values["user_group_dropdown"],  default_values["response_output"], default_values['source_output'], default_values["warning_message"]

def handle_positive_feedback(user_input, client_dropdown, user_group_dropdown, response_output):
    feedback ='liked'
    with open(flagged_data_csv, "a", newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow([timestamp, user_input, client_dropdown, user_group_dropdown, response_output, feedback])
    warning_message = "Your feedback is valuable. I'll use it to improve my responses."
    return warning_message

def handle_negative_feedback(user_input, client_dropdown, user_group_dropdown, response_output):
    feedback ='disliked'
    with open(flagged_data_csv, "a", newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow([timestamp, user_input, client_dropdown, user_group_dropdown, response_output, feedback])
    warning_message = "Your feedback is valuable. I'll use it to improve my responses."
    return warning_message

def flag_response(user_input, client_dropdown, user_group_dropdown, response_output):
    # Open the CSV file in append mode
    feedback = 'flagged'
    with open(flagged_data_csv, "a", newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        # Write the timestamp, user input, and model response to the CSV file
        writer.writerow([timestamp, user_input, client_dropdown, user_group_dropdown, response_output, feedback])
    warning_message = "Content flagged for review."
    return warning_message


# Step 2: Set up Gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as app: #theme=gr.themes.Soft()
    gr.Markdown("# <span style='font-size:34px; color:#29339B'> MCO Assistant </span>")
    gr.Markdown("""<span style='color:#3D52D5'> MCOAssistant </span> is a large language model specializing in compliance best practices. It offers expert guidance on a wide range of regulations, policies, and industry standards. By providing clear and concise answers, ComplianceGPT empowers businesses to navigate complex compliance landscapes with confidence. 
    Whether you need to interpret regulations, assess risks, or develop compliance strategies, <span style='color:#3D52D5'> MCOAssistant </span> is your trusted AI companion.""")
    with gr.Row():
        with gr.Column(scale=3):
            user_input = gr.Textbox(label="Your Question", placeholder="What is in your mind...", lines=3 ,show_copy_button=False)
                
            with gr.Row():
                submit_button = gr.Button("✔️ Submit")
                clear_button = gr.Button("🗑️ Clear")
                flag_button = gr.Button("🚩 Flag")
            response_output = gr.Textbox(label="Model Response", lines=4, show_copy_button=True, placeholder="The model's response will be displayed here...")
            source_output = gr.Textbox(label="Source", lines=1, show_copy_button=True, placeholder="The reponse's source will be displayed here...")
            gr.HTML(source_link)
            with gr.Row():
                like_button = gr.Button("👍 Like")
                dislike_button = gr.Button("👎 Dislike")
        with gr.Column(scale=1):
            client_dropdown = gr.Dropdown(choices=["Central Bank of Ireland", "Mastercard"], label="Client Name", value='Mastercard', info="This is for presentation purpose only. The client's name is not allowed to change by the client.")
            user_group_dropdown = gr.Dropdown(choices=["Irish Employee", "US Employee"], label="User Group", value='US Employee', info="Which user is asking the question. For presentation purpose only, it is not visible in production level.")
            warning_message = gr.Textbox(label="System Message", lines=3, visible=True, placeholder= "📋 For more detailed and informative answers, try asking longer questions. 📋 If you don't know what to ask try one the examples below.")   

    
    #Define examples
    examples = [
        ["Can I receive a gift of $50?",  "Central Bank of Ireland",  "Irish Employee"],
        ["What kind of a gift can I not receive?",  "Central Bank of Ireland", "Irish Employee"],
        ["Who do I need to inform about my gift?", "Central Bank of Ireland", "Irish Employee"],
        ["What are the approval levels to give a gift to A Government Official?",  "Mastercard", "US Employee"],
        ["Can I give a gift to a government official of $49?",  "Mastercard",  "US Employee"],
        ["Do I need approval to give a gift of $50 to a government official?", "Mastercard", "US Employee"],
        ["What are the approval levels for a gift to a Non Government business partner?", "Mastercard", "US Employee"],
        ["What are the pre-approval levels for gifts for a Commercial Business Partner?", "Mastercard", "US Employee"],
        ["Can I trade VICO?", "Mastercard", "US Employee"],
        ["Can I trade VICO?", "Mastercard", "Irish Employee"],
    ]
    
    #Add examples to the interface
    gr.Examples(examples=examples, inputs=[user_input, client_dropdown, user_group_dropdown])#, outputs=response_output)

    
    submit_button.click(fn=oci_llm_model, 
                        inputs=[user_input, client_dropdown, user_group_dropdown], 
                        outputs=[response_output, source_output, warning_message])
    clear_button.click(fn=clear_input, 
                       inputs=[],  # No input is needed for clearing
                       outputs=[user_input, client_dropdown, user_group_dropdown, response_output, source_output, warning_message])
    flag_button.click(fn=flag_response, 
                      inputs=[user_input, client_dropdown, user_group_dropdown, response_output],  
                      outputs=[warning_message])
    like_button.click(fn=handle_positive_feedback, 
                      inputs=[user_input, client_dropdown, user_group_dropdown, response_output],  
                      outputs=[warning_message])
    dislike_button.click(fn=handle_negative_feedback, 
                      inputs=[user_input, client_dropdown, user_group_dropdown, response_output],  
                      outputs=[warning_message])    
    
    gr.Markdown("""
    <div style="text-align:center;">
    <span style="font-size:11px; color:#3c3c3c"> 
    Note: While the model strives to be accurate and helpful, it is not a substitute for legal advice.  It's always recommended to consult with a qualified attorney for specific legal issues. 
    </span> </div>""")

# Step 3: Launch the interface
app.launch(share=True, server_port=44451)

Running on local URL:  http://127.0.0.1:44451
Running on public URL: https://224226215a9a877d18.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




## 1.3. Refined Version

In [80]:
#Constants
MODEL_ID_16k = 'cohere.command-r-16k'
MODEL_ID_PLUS = 'cohere.command-r-plus'
FLAGGED_DATA_PATH = "flagged_data_folder/log.csv"
default_values = {
    "user_input": "",
    "model_dropdown": "Cohere 16k",
    "temperature_slider": 0,
    "top_p_slider": 0.7,
    "max_tokens_slider": 1000,
    "response_output": "",
    "source_output": "",
    "warning_message":"",
}
PROMPT_CONTEXT = """
    Answer the questions based on the context below. You will use your knowledge base for reliable sources to provide context and legal citations where applicable.
    ###
    Context: Your an agent with a vast knowledge base and expertise in business conduct. You can answer your questions on a wide range of topics related to ethical practices, legal regulations, and best practices in the business world.
"""

# Utility function to get current timestamp
def get_current_timestamp():
    return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

# Step 1: Set up the LLM Model
def oci_llm_model(user_input, model=default_values['model_dropdown'], temperature=default_values['temperature_slider'] , top_p=default_values['top_p_slider'], max_tokens=default_values['max_tokens_slider']):
    
    if len(user_input) > 10:
        prompts, source_name= setup_chat_context(user_input)
        chat_response_text = process_chat_request(prompts, model, max_tokens, temperature, top_p)
        return chat_response_text, source_name, "Your feedback is much appreciated."
    else:
        return "", "", "⚠️ Warning: Your input is too short."
    
def setup_chat_context(user_input):
    similar_docs = get_similiar_docs(user_input)
    concatenated_content, source_name, sourceLinks = "", "", ""
    for document in similar_docs:
        concatenated_content += document.page_content 
        source_name = document.metadata['source'].replace("data/", "") # since the folder data/ was always present
        #source_link = document.metadata["source"]
        #sourcePageNumber = int(document.metadata["page"])
        #sourcePageNumber = sourcePageNumber + 1
#         if source_link not in unique_source_links:
#           unique_source_links.add(source_link)  
#           sourceLinks += "<a href='" + objectStorageLink + source_link + "#page=" + "' target='_blank'>" + source_link[source_link.rfind("/")+1:] + " (page "+str(sourcePageNumber) + ")</a>\n"
        #question
        prompts = [f"""Please go through this text: {concatenated_content}\nQuestion: {user_input}\nInstruction: {PROMPT_CONTEXT}"""]
        prompts = "".join(prompt s)
        return prompts, source_name       
        
def process_chat_request(prompts, model, max_tokens, temperature, top_p):
        #prompt template
        generative_ai_inference_client = oci.generative_ai_inference.GenerativeAiInferenceClient(config={},signer=oci_signer, service_endpoint=service_endpoint, retry_strategy=oci.retry.NoneRetryStrategy(), timeout=(10,240))
        chat_request = setup_chat_request(prompts, max_tokens, temperature, top_p)
        chat_detail = setup_chat_detail(model, chat_request)
        chat_response = generative_ai_inference_client.chat(chat_detail)
        data = chat_response.data
        chat_response_data = data.chat_response
        chat_response_text = chat_response_data.text
        return chat_response_text        

def setup_chat_request(prompts, max_tokens, temperature, top_p):
        chat_request = oci.generative_ai_inference.models.CohereChatRequest()
        chat_request.message = prompts
        chat_request.max_tokens = max_tokens 
        chat_request.is_stream = False # True results to an error
        chat_request.temperature = temperature 
        chat_request.top_p = top_p 
        #chat_request.top_k = 10 #top_k # Only support topK within [0, 500]
        #chat_request.frequency_penalty = 0 #frequency_penalty

        chat_request.documents = [
        {
            "title": "Tell more about MCOAssistant",
            "snippet": "The primary objective of this integration is to advance the solution’s capabilities in delivering personalised user interactions and efficient data retrieval directly tailored to user needs. Specifically, not only is the ambition to make it easier to understand how to use the product but to also improve policy-related data retrieval for users of MCO. This AI is designed to assist financial services firms in effectively managing and mitigating compliance risk by providing real-time insights into relevant business rules and regulations. It should also be able to answer financial questions within the context of these regulations.",
            "website": "https://mco.mycomplianceoffice.com/"
        },
        # {
        #     "title": "More rules and regularation information in the United Stated",
        #     "snippet": "FINRA's rules and guidance in the US strive to protect investors and ensure the integrity of today's rapidly evolving market.",
        #     "website": "https://www.finra.org/rules-guidance/rulebooks/finra-rules"
        # },
        # {
        #     "title": "More rules and regularation information in the United Kingdom",
        #     "snippet": "This Handbook contains the complete record of Financial Conduct Authority (FCA) Legal Instruments.",
        #     "website": "http://www.handbook.fca.org.uk/"
        # }
        ]
        return chat_request
        
def setup_chat_detail(model, chat_request):
        chat_detail = oci.generative_ai_inference.models.ChatDetails()
        model_id = MODEL_ID_16K if model == "Cohere 16k" else MODEL_ID_PLUS
        chat_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(model_id=model_id) #✔️
        chat_detail.compartment_id = compartment_id
        chat_detail.chat_request = chat_request
        return chat_detail
    
# Specific UI Functions:   
    
def clear_input():
    # Return an empty string to clear the user_input Textbox
    #return "", "Cohere", 0, 0.7, 1000, ""
    return default_values["user_input"], default_values["model_dropdown"], default_values["temperature_slider"], default_values["top_p_slider"], default_values["max_tokens_slider"], default_values["response_output"], default_values['source_output'], default_values["warning_message"]

# def handle_feedback(user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output, feedback_type):
#     feedback = 'liked' if feedback_type == 'positive' else 'disliked'
#     timestamp = get_current_timestamp()
#     with open(FLAGGED_DATA_CSV, "a", newline='', encoding='utf-8') as file:
#         writer = csv.writer(file)
#         writer.writerow([timestamp, user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output, feedback])
#     return "Thanks for your feedback."

def handle_positive_feedback(user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output):
    feedback ='liked'
    timestamp = get_current_timestamp()
    with open(FLAGGED_DATA_PATH, "a", newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow([timestamp, user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output, feedback])
    warning_message = "Thanks for your feedback."
    return warning_message

def handle_negative_feedback(user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output):
    feedback ='disliked'
    timestamp = get_current_timestamp()
    with open(FLAGGED_DATA_PATH, "a", newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow([timestamp, user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output, feedback])
    warning_message = "Thanks for your feedback."
    return warning_message

def flag_response(user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output):
    timestamp = get_current_timestamp()
    with open(FLAGGED_DATA_PATH, "a", newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        # Write the timestamp, user input, and model response to the CSV file
        writer.writerow([timestamp, user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output, feedback])
    warning_message = "Content flagged for review."
    return warning_message


# Step 2: Set up Gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as app: #theme=gr.themes.Soft()
    gr.Markdown("# <span style='color:#29339B'> MCO Assistant </span>")
    gr.Markdown("""    
                    This interface allows you to interact with different language models. Choose your settings and input your question below. You do not need to change any specifications.""")
    with gr.Row():
        with gr.Column(scale=3): #'scale' value should be an integer. Using 2.5 will cause issues.
            user_input = gr.Textbox(label="Your Question", placeholder="Ask the model what it thinks...", lines=3 ,show_copy_button=False)
                
            with gr.Row():
                submit_button = gr.Button("✔️ Submit")
                clear_button = gr.Button("🗑️ Clear")
                flag_button = gr.Button("🚩 Flag")
            response_output = gr.Textbox(label="Model Response", lines=8, show_copy_button=True, placeholder="The model's response will be displayed here...")
            source_output = gr.Textbox(label="Source", lines=1, show_copy_button=True, placeholder="The reponse's source will be displayed here...")
            with gr.Row():
                like_button = gr.Button("👍 Like")
                dislike_button = gr.Button("👎 Dislike")
        with gr.Column(scale=1):
            #with gr.Row():
                model_dropdown = gr.Dropdown(choices=["Cohere 16k", "Cohere Plus"], label="Model", value='Cohere 16k', info="Cohere 16k is a smaller-scale language model than Cohere Plus. While Cohere 16k offers high-quality responses, it might not possess the same level of sophistication and depth as Cohere Plus. ")
                temperature_slider = gr.Slider(minimum=0, maximum=1, step=0.1, value=0, label="Temperature", info="The level of randomness used to generate the output text. A lower temperature means less random generations.")
                top_p_slider = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.7, label="Top P", info="This ensures that only the most likely tokens, with total probability mass of p, are considered for generation at each step.")
                max_token_slider = gr.Slider(minimum=100, maximum=4000, step=500, value=1000, label="Max Tokens", info="The maximum number of output tokens that the model will generate for the response.")
                #country_checkbox = gr.CheckboxGroup(["USA", "UK"], label="Countries", info="Which countries should the regulations be applied for?"),
            #with gr.Row():
                warning_message = gr.Textbox(label="System Message", visible=True, lines=1)
                #file_upload = gr.File(label="Upload your documents here.")                
    
    #Define examples
    examples = [
        #{"user_input": "Can a member give anything of value more than a hundred dollar?", "model": "Cohere", "temperature": 0.5, "top_p": 0.7, "max_tokens":1000}
        #{"user_input": "Explain the concept of deep learning.", "model": "Cohere", "temperature": 0.5, "top_p": 0.7}
        ["Tell me more about MCOAssitant.",  "Cohere 16k",  0.2,  1, 800],
        ["Provide contact information for the Bank of England.",  "Cohere 16k",  0,  1, 500],
        ["Can you summarize Private Securities Transactions of an Associated Person rules?", "Cohere 16k",  0,  1, 1500],
        ["Can a member give anything of value more than a hundred dollar?",  "Cohere 16k",  0,  0.9, 1000],
        ["Which obligations do not apply when successive personal transactions are carried out on behalf of a person? Make a list.",  "Cohere 16k",  0,  1, 1000],
        ["What are examples of giving advice, or providing services, to an employer in connection with a group personal pension scheme or group stakeholderpension scheme? Make a list.", "Cohere 16k", 0,  1, 1500]
    ]

    # Add examples to the interface
    gr.Examples(examples=examples, inputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider])#, outputs=response_output)

        
    
    submit_button.click(fn=oci_llm_model, 
                        inputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider], 
                        outputs=[response_output, source_output, warning_message])
    clear_button.click(fn=clear_input, 
                       inputs=[],  # No input is needed for clearing
                       outputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output, source_output, warning_message])
    flag_button.click(fn=flag_response, 
                      inputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output],  
                      outputs=[warning_message])
    like_button.click(fn= handle_positive_feedback,
                      inputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output],  
                      outputs=[warning_message])
    dislike_button.click(fn=handle_negative_feedback,
                      inputs=[user_input, model_dropdown, temperature_slider, top_p_slider, max_token_slider, response_output],  
                      outputs=[warning_message])
    
    gr.Markdown("""
    <div style="text-align:center;">
    <span style="font-size:13px; color:#3c3c3c"> 
    Version 1.0.0.
    </span> </div>""") 
    
    gr.Markdown("""
    <div style="text-align:center;">
    <span style="font-size:11px; color:#3c3c3c"> 
    Note: While the model strives to be accurate and helpful, it is not a substitute for legal advice.  It's always recommended to consult with a qualified attorney for specific legal issues. 
    </span> </div>""")

# Step 3: Launch the interface
app.launch(share=True)#, server_port=44510)

Running on local URL:  http://127.0.0.1:7883

Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB
Running on public URL: https://33b8d99a41539cb4c8.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


