In [1]:
from openai import OpenAI  # require v1.33
import shelve
from dotenv import load_dotenv
import os
import time
import pandas as pd
import json
import gradio as gr

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
load_dotenv()
OPENAI_API_KEY = os.getenv("OPEN_API_KEY_Bsc")
client = OpenAI(api_key=OPENAI_API_KEY)

In [3]:
# this code might need optimization - very simple solution but works for now

"""
creates a new thread file to store conversations, or loads old conversations if they exist
"""

# initialize thread dict
thread_dict = {
    "prompt": [],
    "answer": [],
    "user_id": [],
    "thread_id": [],
    "msg_id": [],
    "assistant_id": []  
}

# check if already threads exist
if os.path.exists("GPT_Threads\\threads.json"):
    # load exisiting thread
    with open('GPT_Threads\\threads.json') as json_file:
        data = json.load(json_file)  # this is a list of dictionaries
        for elem in data: 
            thread_dict["thread_id"].append(elem["thread_id"])
            thread_dict["msg_id"].append(elem["msg_id"])
            thread_dict["assistant_id"].append(elem["assistant_id"])
            thread_dict["prompt"].append(elem["prompt"])
            thread_dict["answer"].append(elem["answer"])
            thread_dict["user_id"].append(elem["user_id"])
elif not os.path.exists("GPT_Threads"): 
    os.makedirs("GPT_Threads")


In [4]:
def upload_code_interpreter_files(path):
    """
    uploads file for the assistant to use as external knowledge to the openai server
    """
    file_ids = []
    # Upload files from Dashboard Files with an "assistants" purpose
    for file_name in path:
        with open(file_name, "rb") as file_data:
            file_response = client.files.create(file=file_data, purpose='assistants')
            file_ids.append(file_response.id) 
    return file_ids

def upload_file_search_files(file_paths, vector_store):
    """
    uploads file for the assistant to use as external knowledge to the openai server
    """
    file_streams = [open(path, "rb") for path in file_paths]
 
    # Use the upload and poll SDK helper to upload the files, add them to the vector store,
    # and poll the status of the file batch for completion.
    file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
    vector_store_id=vector_store.id, files=file_streams)
    print(file_batch.status)
    print(file_batch.file_counts)
    return 

def create_assistant(name, instruction_file, file_ids, vector_ids):
    """
    creates the assistant with given instruction and files for external knowledge
    choose which model you want to use, note that it has to have retrieval and code_interpreter functionalities
    more information on the openai assistant website
    """
    with open('Instructions/'+ instruction_file, 'r') as file:
        instructions = file.read()
    assistant = client.beta.assistants.create(
        name=name,
        temperature=1,
        instructions=instructions,
        model="gpt-4o-2024-05-13",
        tools=[{"type": "file_search"}, {"type": "code_interpreter"}],
        tool_resources = {
            "file_search": {"vector_store_ids": [vector_ids]}
            ,"code_interpreter": {"file_ids": file_ids}
        }
        
    )
    return assistant

In [11]:
# Upload files for code interpreter
files = upload_code_interpreter_files(["Dashboard Files\merged_file.csv"])
# create vector store
vector_store = client.beta.vector_stores.create(name="Component Graph")
upload_file_search_files(["Dashboard Files\Component Graph.md"], vector_store)

completed
FileCounts(cancelled=0, completed=1, failed=0, in_progress=0, total=1)


In [22]:
# Create an assistant with external knowledge
# only run once - check on website if assitant was created

# Create assistant
for instruction_file in os.listdir("Instructions"):
    name = instruction_file.split("_")[0]
    assistant_glob = create_assistant(name + "_Bot", instruction_file, files, vector_store.id)

In [28]:
for ass in client.beta.assistants.list().data:
    print(ass.name)

Novice_Bot
Interm_Bot
Expert_Bot


In [20]:
# Thread management

def thread_management(user_id, name):

    if user_id == "" or name == "":
        return None, None, "Invalid User ID or name"
    thread_id = check_if_thread_exists(user_id)

    # If a thread doesn't exist, create one and store it
    if thread_id is None:
        msg=f"Created new thread for user_id {user_id} - Welcome {name}!"
        thread = client.beta.threads.create()
        store_thread(user_id, thread.id)
        thread_id = thread.id

    # Otherwise, retrieve the existing thread
    else:
        msg=f"Found existing thread for user ID {user_id} - Welcome Back {name}!"
        thread = client.beta.threads.retrieve(thread_id)
    return thread_id, thread, msg

def check_if_thread_exists(user_id):
    with shelve.open("threads_db") as threads_shelf:
        return threads_shelf.get(user_id, None)

def store_thread(user_id, thread_id):
    with shelve.open("threads_db", writeback=True) as threads_shelf:
        threads_shelf[user_id] = thread_id

def save_thread(user_id, data_user, data_assistant):
    # saving threads in json file for better visualization of conversations
    thread_dict["thread_id"].append(data_assistant.thread_id)
    thread_dict["msg_id"].append(data_assistant.id)
    thread_dict["assistant_id"].append(data_assistant.assistant_id)
    thread_dict["prompt"].append(data_user.content[0].text.value)
    thread_dict["answer"].append(data_assistant.content[0].text.value)
    thread_dict["user_id"].append(user_id)

    df = pd.DataFrame(thread_dict)  # probably also a more neat solution without this step available
    df.to_json("GPT_Threads\\threads.json", orient="records", indent=2)

# Generate response
def generate_response(user_id, name, message_body, temp, assistant_level):
    
    thread_id, thread, _ = thread_management(user_id, name)
    if thread_id is None: 
        return "invalid request"

    # Add message to thread
    message = client.beta.threads.messages.create(
        thread_id=thread_id,
        role="user",
        content=message_body,
    )
    print(assistant_level)
    # Run the assistant and get the new message
    new_message = run_assistant(user_id, thread, temp, assistant_level)
    # print(f"To {name}:", new_message)
    return new_message

# Run assistant
def run_assistant(user_id, thread, temp, assistant_level="Novice"):
    # Retrieve the created assistant or paste the id of any other available assistant instead of "assistant_glob.id"
    assistants = client.beta.assistants.list().data
    for assistant in assistants:
        if assistant.name == assistant_level+ "_Bot":
            break
    assistant = client.beta.assistants.update(
        assistant_id=assistant.id,
        temperature=temp
)

    # Run the assistant
    run = client.beta.threads.runs.create_and_poll(
        thread_id=thread.id,
        assistant_id=assistant.id,
    )

    # Wait for completion
    # while run.status != "completed":
    #     time.sleep(0.5)
    #     run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)

    # Retrieve the Messages
    messages = client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id)
    new_message = messages.data[0].content[0].text

    annotations = new_message.annotations
    citations = []
    for index, annotation in enumerate(annotations):
        new_message.value = new_message.value.replace(annotation.text, f"[{index}]")
        if file_citation := getattr(annotation, "file_citation", None):
            cited_file = client.files.retrieve(file_citation.file_id)
            citations.append(f"[{index}] {cited_file.filename}")

    print(new_message.value)
    print("\n".join(citations))

    data_assistant = client.beta.threads.messages.list(thread_id=thread.id).data[0]
    data_user = client.beta.threads.messages.list(thread_id=thread.id).data[1]

    save_thread(user_id, data_user, data_assistant)
    return new_message.value

In [18]:
assistants = client.beta.assistants.list().data
assistant_level = "Novice"
for assistant in assistants:
    if assistant.name == assistant_level+ "_Bot":
        print("found")
        break
        # assistant = client.beta.assistants.retrieve(assistant.id)

found


In [19]:
assistant

Assistant(id='asst_vQD4OvFjr22xV7Om6PJ15254', created_at=1720696326, description=None, instructions="You are an assistant during a dashboard onboarding process. You should guide users of novice level step by step through a comprehensive journey of getting to know a PowerBI dashboard. The elements and layout of the dashboard are explained in the provided markdown file.\n\nHere are your instructions:\nProvide a simple and clear introduction to the dashboard layout.\nExplain basic elements such as charts, graphs, tooltips, and navigation menus.\nGuide the user through navigating the menu on the left.\nExplain how to click on charts and graphs to see more details.\nTeach the user how to hover over data points to see tooltips with more information.\nDescribe how to use legends and labels to understand visual elements.\nCreate simple exercises, such as finding specific data points or identifying basic trends.\nProvide step-by-step guidance for each exercise.\nEncourage users to use the 'Help

## Gradio Interface

In [21]:
theme = gr.themes.Monochrome(radius_size="md", spacing_size="lg")
assistant_levels = ["Novice", "Interm", "Expert"]

with gr.Blocks(theme=theme) as demo:

    gr.Markdown("# Welcome to PowerBI Onboarding!")
    greet = gr.Markdown("Please enter your User Name and ID")
    user_info = gr.Markdown(visible=False)

    with gr.Column(visible=True) as user_int:
        userid = gr.Textbox(label="User ID")
        name = gr.Textbox(label="User Name")
        button_submit = gr.Button("Submit")

    thread_msg = gr.Markdown(label="Thread")
    message = gr.Textbox(label="Message", visible=False)

    # Define the function and its inputs and outputs
    with gr.Row(visible=False) as btn_int:
        button_send = gr.Button("Send")
        button_clear = gr.ClearButton([message])
        temp = gr.Slider(minimum=0, maximum=2, step=0.1, value=1, interactive=True, label="Temperature")  # for testing purpose only
        assistant_level = gr.Radio(choices=assistant_levels, label="Choose Your Visual Literacy")
    bot = gr.Textbox(label="Chat Bot", visible=False)
    
    def submit(userid, name):
        if len(name) == 0:    
            return "Empty name or ID"
        time.sleep(2)
        return {btn_int: gr.Row(visible=True),
                message: gr.Textbox(visible=True),
                bot: gr.Textbox(visible=True),
                user_int: gr.Column(visible=False),
                temp: gr.Slider(visible=True),
            
                greet: gr.Markdown(visible=False),
                user_info: gr.Markdown(f"{name} with User ID: {userid}", visible=True)
                }

    button_submit.click(fn=thread_management, inputs=[userid, name], outputs=[gr.Text(visible=False), gr.Text(visible=False), thread_msg])
    button_submit.click(fn=submit, inputs=[userid, name], outputs=[btn_int, message, bot, user_int, greet, user_info, temp])
    button_send.click(fn=generate_response, inputs=[userid, name, message, temp, assistant_level], outputs=bot)

demo.launch()

Running on local URL:  http://127.0.0.1:7863

To create a public link, set `share=True` in `launch()`.




IMPORTANT: You are using gradio version 4.19.2, however version 4.29.0 is available, please upgrade.
--------
None


Traceback (most recent call last):
  File "c:\Users\pmair\miniconda3\envs\PR\lib\site-packages\gradio\queueing.py", line 495, in call_prediction
    output = await route_utils.call_process_api(
  File "c:\Users\pmair\miniconda3\envs\PR\lib\site-packages\gradio\route_utils.py", line 235, in call_process_api
    output = await app.get_blocks().process_api(
  File "c:\Users\pmair\miniconda3\envs\PR\lib\site-packages\gradio\blocks.py", line 1627, in process_api
    result = await self.call_function(
  File "c:\Users\pmair\miniconda3\envs\PR\lib\site-packages\gradio\blocks.py", line 1173, in call_function
    prediction = await anyio.to_thread.run_sync(
  File "c:\Users\pmair\miniconda3\envs\PR\lib\site-packages\anyio\to_thread.py", line 33, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(
  File "c:\Users\pmair\miniconda3\envs\PR\lib\site-packages\anyio\_backends\_asyncio.py", line 877, in run_sync_in_worker_thread
    return await future
  File "c:\Users\pmair\minico

Novice
Hello! I'm here to help you get started with your PowerBI dashboard. Let's dive into it step by step.

### Introduction to the Dashboard Layout

1. **Charts and Graphs**: These are visual representations of your data. Charts might include bar charts, line graphs, pie charts, etc.
2. **Tooltips**: When you hover over a data point on a chart or graph, a small box will appear showing more detailed information about that data point.
3. **Navigation Menus**: Typically found on the left side, this menu helps you navigate between different pages or sections of the dashboard.

### Basic Navigation

1. **Menu on the Left**: This is your primary navigation tool. You can click on different items here to switch between various views or reports in your dashboard.
2. **Clicking on Charts and Graphs**: You can click on different parts of charts and graphs to drill down into more detailed data. This is helpful for exploring specific insights.
3. **Hovering for Tooltips**: Simply hover your mous

In [None]:
demo = gr.Interface(
    fn=generate_response,
    inputs=["text", "text", "text"],
    outputs=["text"],
)

demo.launch()

## Cell Interface

In [None]:
# Test assistant
user_name = "Beginner"
user_id = "2"
new_message = generate_response("Start Dashboard onboarding", user_id, user_name)

In [None]:
new_message = generate_response("Beginner", user_id, user_name)

In [None]:
new_message = generate_response("I see two summaries of new hire numbers, a comparison chart of two years. A line chart showing Full Time vs Part Time hires and 2 filter options. Did i miss something?", user_id, user_name)

In [None]:
new_message = generate_response("I would expect the total number of new hires to go down, since we are narrowing the data. Also maybe new trends emerge if we set filters to specific regions", user_id, user_name)

In [None]:
new_message = generate_response("Selecting a specific region bar will apply the same filter as if I filtered by this region using the slicer I suppose", user_id, user_name)

In [None]:
new_message = generate_response("Which insights should I search for in this particular dashboard?", user_id, user_name)