In [1]:
from openai import OpenAI  # require v1.33
import shelve
from dotenv import load_dotenv
import os
import time
import pandas as pd
import json
import gradio as gr

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [14]:
load_dotenv()
OPENAI_API_KEY = os.getenv("OPEN_API_KEY_Bsc")
client = OpenAI(api_key=OPENAI_API_KEY)

In [13]:
os.getenv("OPEN_API_KEY_Bsc")

In [5]:
# this code might need optimization - very simple solution but works for now

"""
creates a new thread file to store conversations, or loads old conversations if they exist
"""

# initialize thread dict
thread_dict = {
    "prompt": [],
    "answer": [],
    "user_id": [],
    "thread_id": [],
    "msg_id": [],
    "assistant_id": []  
}

# check if already threads exist
if os.path.exists("GPT_Threads\\threads.json"):
    # load exisiting thread
    with open('GPT_Threads\\threads.json') as json_file:
        data = json.load(json_file)  # this is a list of dictionaries
        for elem in data: 
            thread_dict["thread_id"].append(elem["thread_id"])
            thread_dict["msg_id"].append(elem["msg_id"])
            thread_dict["assistant_id"].append(elem["assistant_id"])
            thread_dict["prompt"].append(elem["prompt"])
            thread_dict["answer"].append(elem["answer"])
            thread_dict["user_id"].append(elem["user_id"])
elif not os.path.exists("GPT_Threads"): 
    os.makedirs("GPT_Threads")


In [7]:
def upload_code_interpreter_files(path):
    """
    uploads file for the assistant to use as external knowledge to the openai server
    """
    file_ids = []
    # Upload files from Dashboard Files with an "assistants" purpose
    for file_name in path:
        with open(file_name, "rb") as file_data:
            file_response = client.files.create(file=file_data, purpose='assistants')
            file_ids.append(file_response.id) 
    return file_ids

def upload_file_search_files(file_paths, vector_store):
    """
    uploads file for the assistant to use as external knowledge to the openai server
    """
    file_streams = [open(path, "rb") for path in file_paths]
 
    # Use the upload and poll SDK helper to upload the files, add them to the vector store,
    # and poll the status of the file batch for completion.
    file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
    vector_store_id=vector_store.id, files=file_streams)
    print(file_batch.status)
    print(file_batch.file_counts)
    return 

def create_assistant(file_ids, vector_ids):
    """
    creates the assistant with given instruction and files for external knowledge
    choose which model you want to use, note that it has to have retrieval and code_interpreter functionalities
    more information on the openai assistant website
    """
    assistant = client.beta.assistants.create(
        name="Dashboard Onboarding ChatBot BetaV2",
        temperature=1,
        instructions="You are an assistant during a dashboard onboarding process. You should guide users of different expertise level step by step through a comprehensive journey of getting to know a PowerBI dashboard. The elements and layout of the dashboard are explained in the provided JSON file. I also provided you with previous user interactions in the form of csv files, which you should use to guide users to insightful states of the dashboard. Start the onboarding by asking the experience level of the new user and then give exercises based on this information. Beginner users should at least solve 5 exercises, showing them how to leverage PowerBI for data analysis, but also introduce them to the given dashboard. Only prompt one step at a time to the user, do not overwhelm him with too much information at once. Only give exercise you have knowledge about, hence you should be able to identify if the users answer is incorrect. Make sure to leverage the uploaded files",
        model="gpt-4o-2024-05-13",
        tools=[{"type": "file_search"}, {"type": "code_interpreter"}],
        tool_resources = {
            "file_search": {"vector_store_ids": [vector_ids]}
            ,"code_interpreter": {"file_ids": file_ids}
        }
        
    )
    return assistant

In [16]:
# Thread management

def thread_management(user_id, name):

    if user_id == "" or name == "":
        return None, None, "Invalid User ID or name"
    thread_id = check_if_thread_exists(user_id)

    # If a thread doesn't exist, create one and store it
    if thread_id is None:
        msg=f"Created new thread for user_id {user_id} - Welcome {name}!"
        thread = client.beta.threads.create()
        store_thread(user_id, thread.id)
        thread_id = thread.id

    # Otherwise, retrieve the existing thread
    else:
        msg=f"Found existing thread for user ID {user_id} - Welcome Back {name}!"
        thread = client.beta.threads.retrieve(thread_id)
    return thread_id, thread, msg

def check_if_thread_exists(user_id):
    with shelve.open("threads_db") as threads_shelf:
        return threads_shelf.get(user_id, None)

def store_thread(user_id, thread_id):
    with shelve.open("threads_db", writeback=True) as threads_shelf:
        threads_shelf[user_id] = thread_id

def save_thread(user_id, data_user, data_assistant):
    # saving threads in json file for better visualization of conversations
    thread_dict["thread_id"].append(data_assistant.thread_id)
    thread_dict["msg_id"].append(data_assistant.id)
    thread_dict["assistant_id"].append(data_assistant.assistant_id)
    thread_dict["prompt"].append(data_user.content[0].text.value)
    thread_dict["answer"].append(data_assistant.content[0].text.value)
    thread_dict["user_id"].append(user_id)

    df = pd.DataFrame(thread_dict)  # probably also a more neat solution without this step available
    df.to_json("GPT_Threads\\threads.json", orient="records", indent=2)

# Generate response
def generate_response(user_id, name, message_body, temp):
    
    thread_id, thread, _ = thread_management(user_id, name)
    if thread_id is None: 
        return "invalid request"

    # Add message to thread
    message = client.beta.threads.messages.create(
        thread_id=thread_id,
        role="user",
        content=message_body,
    )

    # Run the assistant and get the new message
    new_message = run_assistant(user_id, thread, temp)
    # print(f"To {name}:", new_message)
    return new_message

# Run assistant
def run_assistant(user_id, thread, temp):
    # Retrieve the created assistant or paste the id of any other available assistant instead of "assistant_glob.id"

    # Onboarding bot: asst_CzAZx0pbuCdy57fd18DFWZEB
    # Dashboard Onboarding Assistant: asst_irK6D1q8nwG8JTHN21ykPURB
    # Dashboard Onboarding ChatBot BetaV2: asst_FaRkIb4JWeNMBij48tjyGyS2

    assistant = client.beta.assistants.retrieve("asst_FaRkIb4JWeNMBij48tjyGyS2")
    assistant = client.beta.assistants.update(
        assistant_id=assistant.id,
        temperature=temp
)

    # Run the assistant
    run = client.beta.threads.runs.create_and_poll(
        thread_id=thread.id,
        assistant_id=assistant.id,
    )

    # Wait for completion
    # while run.status != "completed":
    #     time.sleep(0.5)
    #     run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)

    # Retrieve the Messages
    messages = client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id)
    new_message = messages.data[0].content[0].text

    annotations = new_message.annotations
    citations = []
    for index, annotation in enumerate(annotations):
        new_message.value = new_message.value.replace(annotation.text, f"[{index}]")
        if file_citation := getattr(annotation, "file_citation", None):
            cited_file = client.files.retrieve(file_citation.file_id)
            citations.append(f"[{index}] {cited_file.filename}")

    print(new_message.value)
    print("\n".join(citations))

    data_assistant = client.beta.threads.messages.list(thread_id=thread.id).data[0]
    data_user = client.beta.threads.messages.list(thread_id=thread.id).data[1]

    save_thread(user_id, data_user, data_assistant)
    return new_message.value

In [15]:
# Create an assistant with external knowledge
# only run once - check on website if assitant was created

# Upload files for code interpreter
files = upload_code_interpreter_files(["Dashboard Files\merged_file.csv"])

# create vector store
vector_store = client.beta.vector_stores.create(name="Component Graph")
upload_file_search_files(["Dashboard Files\Component Graph.md"], vecor_store)

# Create assistant
assistant_glob = create_assistant(files, vector_store.id)

completed
FileCounts(cancelled=0, completed=1, failed=0, in_progress=0, total=1)


## Gradio Interface

In [7]:
theme = gr.themes.Monochrome(radius_size="md", spacing_size="lg")

with gr.Blocks(theme=theme) as demo:

    gr.Markdown("# Welcome to PowerBI Onboarding!")
    greet = gr.Markdown("Please enter your User Name and ID")
    user_info = gr.Markdown(visible=False)

    with gr.Column(visible=True) as user_int:
        userid = gr.Textbox(label="User ID")
        name = gr.Textbox(label="User Name")
        button_submit = gr.Button("Submit")

    thread_msg = gr.Markdown(label="Thread")
    message = gr.Textbox(label="Message", visible=False)

    # Define the function and its inputs and outputs
    with gr.Row(visible=False) as btn_int:
        button_send = gr.Button("Send")
        button_clear = gr.ClearButton([message])
        temp = gr.Slider(minimum=0, maximum=2, step=0.1, value=1, interactive=True, label="Temperature")  # for testing purpose only
    bot = gr.Textbox(label="Chat Bot", visible=False)
    
    def submit(userid, name):
        if len(name) == 0:    
            return "Empty name or ID"
        time.sleep(2)
        return {btn_int: gr.Row(visible=True),
                message: gr.Textbox(visible=True),
                bot: gr.Textbox(visible=True),
                user_int: gr.Column(visible=False),
                temp: gr.Slider(visible=True),
            
                greet: gr.Markdown(visible=False),
                user_info: gr.Markdown(f"{name} with User ID: {userid}", visible=True)
                }

    button_submit.click(fn=thread_management, inputs=[userid, name], outputs=[gr.Text(visible=False), gr.Text(visible=False), thread_msg])
    button_submit.click(fn=submit, inputs=[userid, name], outputs=[btn_int, message, bot, user_int, greet, user_info, temp])
    button_send.click(fn=generate_response, inputs=[userid, name, message, temp], outputs=bot)

demo.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




IMPORTANT: You are using gradio version 4.19.2, however version 4.29.0 is available, please upgrade.
--------


In [None]:
demo = gr.Interface(
    fn=generate_response,
    inputs=["text", "text", "text"],
    outputs=["text"],
)

demo.launch()

## Cell Interface

In [None]:
# Test assistant
user_name = "Beginner"
user_id = "2"
new_message = generate_response("Start Dashboard onboarding", user_id, user_name)

In [None]:
new_message = generate_response("Beginner", user_id, user_name)

In [None]:
new_message = generate_response("I see two summaries of new hire numbers, a comparison chart of two years. A line chart showing Full Time vs Part Time hires and 2 filter options. Did i miss something?", user_id, user_name)

In [None]:
new_message = generate_response("I would expect the total number of new hires to go down, since we are narrowing the data. Also maybe new trends emerge if we set filters to specific regions", user_id, user_name)

In [None]:
new_message = generate_response("Selecting a specific region bar will apply the same filter as if I filtered by this region using the slicer I suppose", user_id, user_name)

In [None]:
new_message = generate_response("Which insights should I search for in this particular dashboard?", user_id, user_name)