# 1. Initialization

### Setup

In [1]:
import ray
import pandas as pd
import time 
import bokeh 
import numpy as np
import binascii
import redis
import pprint
import json
import qgrid
import os
import matplotlib.pyplot as plt
pp = pprint.PrettyPrinter() 
# from misc import *

In [2]:
#addr = os.environ["redis_address"]
#ray.init(redis_address = addr)
ray.init()

Waiting for redis server at 127.0.0.1:22697 to respond...
Waiting for redis server at 127.0.0.1:32311 to respond...
Starting local scheduler with 8 CPUs and 0 GPUs.
View the web UI at http://localhost:8888/WebUI.ipynb


{'local_scheduler_socket_names': ['/tmp/scheduler50665381'],
 'node_ip_address': '127.0.0.1',
 'object_store_addresses': [ObjectStoreAddress(name='/tmp/plasma_store59214586', manager_name='/tmp/plasma_manager6662867', manager_port=49248)],
 'redis_address': '127.0.0.1:22697'}

### Function definitions

In [3]:
@ray.remote
def example(x):
    return "ok" 

@ray.remote
def example2(x): 
    return "hi"

@ray.remote
class TestCls():
    def __init__(self):
        self.g = 1
        
    def to_go(self, x):
        return x

    
@ray.remote
class Outer():
    def __init__(self):
        self.f = 1
        self.test = TestCls.remote()
    
    def to_go2(self, x):
        return x * 2
    
    def error(self):
        return 1/0

In [4]:
import binascii
import re

IDENTIFIER_LENGTH = 20

# This prefix must match the value defined in ray_redis_module.cc.

def hex_identifier(identifier):
    return binascii.hexlify(identifier).decode()


def identifier(hex_identifier):
    return binascii.unhexlify(hex_identifier)

def key_to_hex_identifiers(key):
    # Extract worker_id and task_id from key of the form
    # prefix:worker_id:task_id.
    offset = key.index(b":") + 1
    worker_id = hex_identifier(key[offset:(offset + IDENTIFIER_LENGTH)])
    offset += IDENTIFIER_LENGTH + 1
    task_id = hex_identifier(key[offset:(offset + IDENTIFIER_LENGTH)])
    return worker_id, task_id

def clean(sometext):
    sometext = sometext.decode('UTF-8')
    ansi_escape = re.compile(r'\x1b[^m]*m')
    return ansi_escape.sub('', sometext)

### Generate data in Redis

In [35]:
results = ray.get([example.remote(x) for x in range(4)])
results2 = ray.get([example2.remote(x) for x in range(2)])
actor = TestCls.remote()
actor_results = ray.get([actor.to_go.remote(1)])
err_actor = Outer.remote()
err_actor.error.remote()

ObjectID(03499a262434452adbff12c34e68fed0fd4e7612)

Remote function error failed with:

Traceback (most recent call last):
  File "/Users/michellemarzoev/Desktop/ray/python/ray/worker.py", line 1771, in process_task
    worker.actors[task.actor_id().id()], *arguments)
  File "<ipython-input-3-e8a35551207e>", line 28, in error
ZeroDivisionError: division by zero


You can inspect errors by running

    ray.error_info()

If this driver is hanging, start a new one with

    ray.init(redis_address="127.0.0.1:22697")



### Connect to Redis

In [36]:
addr, port = ray.worker.global_worker.redis_address.split(":")
rc = redis.StrictRedis(host=addr, port=port, decode_responses=True, encoding='latin-1', encoding_errors='replace')

# 2. Jobs Data


### Remote Functions Information

In [38]:
fn_table = ray.global_state.function_table()
fn_list = []
for fn_id in fn_table:
    val = fn_table[fn_id]
    val["function_id"] = fn_id
    fn_list.append(val)
qgrid.nbinstall(overwrite = True)
frame = pd.DataFrame(fn_list) 
frame = frame.reset_index(drop=True)
frame.columns = ["DriverID", "Module", "Function", "FunctionID"]
qgrid.show_grid(frame)

### Task Information

In [49]:
from pandas.io.json import json_normalize

tt = ray.global_state.task_table()
tt_list = list(tt.values())
for d in tt_list:
    d['TaskSpec']['ReturnObjectIDs'] = [oid.hex() for oid in d['TaskSpec']['ReturnObjectIDs']]
    d['TaskSpec']['Args'] = [arg.hex() if isinstance(arg, ray.local_scheduler.ObjectID) else arg for arg in d['TaskSpec']['Args']]

task_df = json_normalize(tt_list)
task_df.columns = ["Local Scheduler ID", "State", "Actor Counter", "ActorID", "Arguments", "DriverID", "FunctionID", 
                  "Parent Counter", "Parent Task ID", "Required CPUs", "Required GPUs", "Return Object IDs", "TaskID" ]
qgrid.set_defaults(grid_options={
    'forceFitColumns': False,
    'defaultColumnWidth': 150})
qgrid.show_grid(task_df)

### Actor Information

In [21]:
actor_info = dict()
actors = rc.keys("Actor*") 
for actor in actors:
    actor_key_str = actor[len('Actor:'):]
    actor_key_bytes = actor_key_str.encode('latin-1')
    actor_info['Actor:{}'.format(hex_identifier(actor_key_bytes))] = rc.hgetall(actor)
    x = actor_info['Actor:{}'.format(hex_identifier(actor_key_bytes))]
    if 'class_id' in x: 
        class_key_bytes = x['class_id'].encode('latin-1')
        x['class_id'] = format(hex_identifier(class_key_bytes))
    if 'driver_id' in x: 
        driver_bytes = x['driver_id'].encode('latin-1')
        x['driver_id'] = format(hex_identifier(driver_bytes))

actor_df = pd.DataFrame.from_dict(actor_info)
df2 = actor_df.T
df2.index.name = "ActorID"
df2.columns = [ "Method", "Class", "Class ID", "Class Name", "DriverID", "Module", "Num GPUS"] 
qgrid.show_grid(df2)


### Task - Worker Placement Information

In [22]:
event_names = rc.keys("event_log*")
results = dict()
for i in range(len(event_names)):
    event_list = rc.lrange(event_names[i], 0, -1)
    for event in event_list:
        event_dict = json.loads(event)
        task_id = ""
        worker_id = ""
        function_name = ""
    for element in event_dict:
        if "task_id" in element[3] and "worker_id" in element[3]:
            task_id = element[3]["task_id"]
            worker_id = element[3]["worker_id"]
            function_name = element[3]["function_name"]
        if task_id != "" and worker_id != "" and function_name != "":
            results[worker_id] = {}
            results[worker_id]["task_id"] = task_id
            results[worker_id]["function_name"] = function_name
results_table = pd.DataFrame.from_dict(results)
rt = results_table.T 
rt.index.name = "WorkerID"
rt.columns = ["Function", "TaskID"]
qgrid.show_grid(rt)

### Task Profiles

In [55]:
task_profiles, events = ray.global_state.task_profiles()
profiles_dict = dict()
for task_id, profiles in task_profiles.items(): 
    for profile in profiles:
        start_exec = -1
        end_exec = -1 
        start_store = -1
        end_store = -1
        start_lock = -1
        end_lock = -1
        overall_start = profile[0][0]
        overall_end = profile[len(profile)-1][0]
        overall_dur = overall_end - overall_start
        for log in profile: 
            if log[1] == "ray:task:execute" and log[2] == 1: 
                start_exec = log[0]
            if log[1] == "ray:task:execute" and log[2] == 2: 
                end_exec = log[0]
            if log[1] == "ray:task:store_outputs" and log[2] == 1: 
                start_store = log[0]
            if log[1] == "ray:task:store_outputs" and log[2] == 2: 
                end_store = log[0]
            if log[1] == "ray:acquire_lock" and log[2] == 1: 
                start_lock = log[0]
            if log[1] == "ray:acquire_lock" and log[2] == 2: 
                end_lock = log[0]
        if start_exec != -1 and end_exec != -1 and start_store != -1 and end_store != -1 and start_lock != -1 and end_lock != -1:
            profiles_dict[task_id] = dict()
            exec_dur = end_exec - start_exec
            store_dur = end_store - start_store
            lock_dur = end_lock - start_lock
            overall_dur = overall_end - overall_start 
            profiles_dict[task_id]["execute"] = exec_dur
            profiles_dict[task_id]["store"] = store_dur
            profiles_dict[task_id]["acquire_lock"] = lock_dur
            profiles_dict[task_id]["total"] = overall_dur
            profiles_dict[task_id]["other"] = overall_dur - exec_dur - store_dur - lock_dur
results_table = pd.DataFrame.from_dict(profiles_dict)
results_table2 = results_table.T
results_table2.index.name = "TaskID"
results_table2.columns = ["Acquire lock (s)", "Execute (s)", "Other (s)", "Store Outputs (s)", "Total time (s)"] 
qgrid.show_grid(results_table2)



In [None]:
# total_acq = 0
# total_exec = 0
# total_store = 0
# total_other = 0
# total = 0
# for value in profiles_dict.values(): 
#     total_exec += value["execute"]
#     total_acq += value["acquire_lock"]
#     total_store += value["store"]
#     total_other += value["other"]
#     total += value["total"]

# labels = 'Acquire Lock', 'Execute', 'Store', 'Other'
# sizes = [total_acq/total, total_exec/total, total_store/total, total_other/total]
# explode = (0, 0.1, 0, 0)
# plt.pie(sizes, explode=explode, labels=labels, shadow=True, startangle=140)
# plt.axis('equal')
# print("Overall Task Breakdowns:")
# print("Acquire Lock: " + str(total_acq/total * 100) + "%")
# print("Execute: " + str(total_exec/total * 100) + "%")
# print("Store outputs: " + str(total_store/total * 100) + "%")
# print("Other: " + str(total_other/total * 100) + "%")
# plt.show()


### Reconstructed Task Information

In [24]:
event_names = rc.keys("event_log*")
attempted = dict()
reconstructed = dict()
for i in range(len(event_names)):
    event_list = rc.lrange(event_names[i], 0, -1)
    for event in event_list:
        event_dict = json.loads(event)
        task_id = ""
        for element in event_dict:
            if "task_id" in element[3]:
                task_id = element[3]["task_id"]
        if task_id != "":
            if task_id in attempted:
                if task_id not in reconstructed:
                    reconstructed[task_id] = 0
                    reconstructed[task_id] += 1
                else:
                    attempted[task_id] = True
results_table = pd.DataFrame(reconstructed)
qgrid.show_grid(results_table)
# include objects

# 3. System State


### Node Information

In [56]:
# Using the global state API, we can populate a DataFrame with a list of Redis Clients currently connected
ctable = ray.global_state.client_table()

client_list = []
for node_ip in ctable:
    for client in ctable[node_ip]:
        client["node_ip_address"] = node_ip
        client_list.append(client)

client_df = pd.DataFrame(client_list)
client_df.columns = ["Aux Address", "Client Type", "DB Client ID", "Deleted", "Local Scheduler Socket", "Num CPUs", "NumGPUs", "Node IP Address"]
qgrid.show_grid(client_df)

### Object Store

In [58]:
# We can populate a DataFrame with a list of objects in the object store
object_dict = {oid.hex(): v for oid, v in ray.global_state.object_table().items()}
object_df = pd.DataFrame(object_dict).transpose()
object_df.index.name = "ObjectID"
object_df.columns = ["Data Size", "Hash", "IsPut", "ManagerIDs", "TaskID"]
qgrid.show_grid(object_df)

### Object - Worker Placement Information 

In [32]:
# Objects associated with each worker_id 
object_table = ray.global_state.object_table()
location_to_objects = dict()

for object_id, object_descriptor in object_table.items():
    if object_descriptor["ManagerIDs"] != None: 
        for location in object_descriptor["ManagerIDs"]:
            if location not in location_to_objects:
                location_to_objects[location] = []
            object_id = str(object_id)
            obj_comp = object_id.split("(")
            obj_comps = obj_comp[1].split(")") 
            object_id = obj_comps[0]
            location_to_objects[location].append(object_id)
table = pd.DataFrame.from_dict(location_to_objects)
qgrid.show_grid(table)
# object id -> worker id 
# skew in how objects are distributed 
# physical nodes -> total amt data on node, num tasks 

### Worker Information

In [59]:
workers = rc.keys("Worker*") 
worker_info = dict()
for worker in workers:
    worker_key_str = worker[len('Workers:'):]
    worker_key_bytes = worker_key_str.encode('latin-1')
    worker_info['Workers:{}'.format(hex_identifier(worker_key_bytes))] = rc.hgetall(worker)
table = pd.DataFrame.from_dict(worker_info)
table2 = table.T
table2.index.name = "WorkerID"
table2.columns = ["Local Scheduler Socket", "Node IP Address", "Plasma Manager Socket", "Plasma Store Socket", "Stderr File", "Stdout File"]
qgrid.show_grid(table2)
# resource info for each physical node 
# double check the IP 

### Object Transfer Information 

In [34]:
log_files = ray.global_state.log_files()
transferred = dict()
for addr, inner_dict in log_files.items(): 
    for filename, contents in inner_dict.items(): 
        if "plasma_manager" in filename and ".out" in filename:
            cont = str(contents).split("ObjectID: ") 
            cont2 = cont[1].split("\\n")
            if cont2[0] not in transferred:
                transferred[cont2[0]] = 0 
            transferred[cont2[0]] += 1 
table = pd.DataFrame(transferred, index = [0]) 
qgrid.show_grid(table.T)


# 3. Error Information

### Error Profiles


In [None]:
event_names = rc.keys("event_log*")
error_profiles = dict()
for i in range(len(event_names)):
    event_list = rc.lrange(event_names[i], 0, -1)
    for event in event_list:
        event_dict = json.loads(event)
        task_id = ""
        traceback = ""
        worker_id = ""
        start_time = -1
    for element in event_dict:
        if element[1] == "ray:task:execute" and element[2] == 1:
            start_time = element[0]
        if "task_id" in element[3] and "worker_id" in element[3]:
            task_id = element[3]["task_id"]
            worker_id = element[3]["worker_id"]
        if "traceback" in element[3]:
            traceback = element[3]["traceback"]
        if task_id != "" and worker_id != "" and traceback != "":
            if start_time != -1:
                error_profiles[task_id] = dict()
                error_profiles[task_id]["worker_id"] = worker_id
                error_profiles[task_id]["traceback"] = traceback
                error_profiles[task_id]["start_time"] = start_time
table = pd.DataFrame.from_dict(error_profiles) 
qgrid.show_grid(table.T)

### Parallelization Score

In [None]:
event_names = rc.keys("event_log*")
total_exec = 0
earliest_start = float("inf")
latest_end = -1
for i in range(len(event_names)):
    event_list = rc.lrange(event_names[i], 0, -1)
    for event in event_list:
        event_dict = json.loads(event)
        start_point = 00125
        
        end_point = 0
        for element in event_dict:
            if element[1] == "ray:task:execute" and element[2] == 1:
                start_point = element[0]
            if start_point < earliest_start:
                earliest_start = start_point
            if element[1] == "ray:task:execute" and element[2] == 2:
                end_point = element[0]
            if end_point > latest_end:
                latest_end = end_point
        total_exec += (end_point - start_point)
job_dur = latest_end - earliest_start
table = ray.global_state.client_table()
total_cpus = 0
for key, value in table.items():
    for element in range(len(value)):
        if "NumCPUs" in value[element]:
            total_cpus += table[key][element]["NumCPUs"]
if total_exec != None and job_dur != None: 
    print("Parallelization Score: ")
    print ((total_exec) / (total_cpus * job_dur))

# Task Interactive Queries 

In [61]:
import sys
task_profiles = ray.global_state.task_profiles() 
task_info = dict()
event_names = rc.keys("event_log*")
counter = 1
for i in range(len(event_names)):
    event_list = rc.lrange(event_names[i], 0, -1)
    for event in event_list:
        event_dict = json.loads(event)
        tid = ""
        workers = dict() 
        earliest = sys.maxsize
        for event in event_dict: 
            if "task_id" in event[3]: 
                tid = event[3]["task_id"]
        task_info[tid] = dict()
        task_info[tid]["task_id"] = tid 
        for event in event_dict: 
            if event[1] == "ray:get_task" and event[2] == 1: 
                task_info[tid]["get_task_start"] = event[0] 
            if event[1] == "ray:get_task" and event[2] == 2: 
                task_info[tid]["get_task_end"] = event[0] 
            if event[1] == "ray:import_remote_function" and event[2] == 1: 
                task_info[tid]["import_remote_func_start"] = event[0] 
            if event[1] == "ray:import_remote_function" and event[2] == 2: 
                task_info[tid]["import_remote_func_end"] = event[0] 
            if event[1] == "ray:wait_for_function" and event[2] == 1: 
                task_info[tid]["wait_for_function_start"] = event[0]  
            if event[1] == "ray:wait_for_function" and event[2] == 2: 
                task_info[tid]["wait_for_function_end"] = event[0]  
            if event[1] == "ray:acquire_lock" and event[2] == 1: 
                task_info[tid]["acquire_lock_start"] = event[0] 
            if event[1] == "ray:acquire_lock" and event[2] == 2: 
                task_info[tid]["acquire_lock_end"] = event[0] 
            if event[1] == "ray:task:get_arguments" and event[2] == 1: 
                task_info[tid]["get_arguments_start"] = event[0] 
            if event[1] == "ray:task:get_arguments" and event[2] == 2: 
                task_info[tid]["get_arguments_end"] = event[0]  
            if event[1] == "ray:task:execute" and event[2] == 1: 
                task_info[tid]["execute_start"] = event[0] 
            if event[1] == "ray:task:execute" and event[2] == 2: 
                task_info[tid]["execute_end"] = event[0] 
            if event[1] == "ray:task:store_outputs" and event[2] == 1: 
                task_info[tid]["store_outputs_start"] = event[0]
            if event[1] == "ray:task:store_outputs" and event[2] == 2: 
                task_info[tid]["store_outputs_end"] = event[0] 
            if "worker_id" in event[3]: 
                wid = event[3]["worker_id"]
                if wid in workers: 
                    task_info[tid]["worker_id"] = workers[wid]
                else: 
                    counter += 1 
                    workers[wid] = counter
                    task_info[tid]["worker_id"] = counter 
                task_info[tid]["wid"] = wid 
            if "function_name" in event[3]: 
                task_info[tid]["function_name"] = event[3]["function_name"]
        if "import_remote_func_start" not in task_info[tid]: 
            task_info[tid]["import_remote_func_start"] = -1
            task_info[tid]["import_remote_func_end"] = -1
        if "store_outputs_start" not in task_info[tid]: 
            task_info[tid]["store_outputs_start"] = -1
            task_info[tid]["store_outputs_end"] = -1
    for val in task_info[tid].values():
        if val == -1: 
            del task_info[tid]
            break


In [44]:
# all_times = []
# for data in task_info.values(): 
#     all_times.append(data["acquire_lock_start"])
#     print(data["acquire_lock_start"])
#     all_times.append(data["acquire_lock_end"])
#     print(data["acquire_lock_end"])
#     all_times.append(data["get_arguments_start"])
#     print(data["get_arguments_start"])
#     all_times.append(data["get_arguments_end"])
#     print(data["get_arguments_end"])
#     all_times.append(data["execute_start"])
#     print(data["execute_start"])
#     all_times.append(data["execute_end"])
#     print(data["execute_end"])
#     all_times.append(data["store_outputs_start"])
#     print(data["store_outputs_start"])
#     all_times.append(data["store_outputs_end"])
#     print(data["store_outputs_end"])

# min_time = min(all_times)
# print("Min:" + str(min_time))
# for data in task_info.values(): 
#     for x in data.values(): 
#         data["acquire_lock_start"] = data["acquire_lock_start"] - min_time 
#         print(data["acquire_lock_start"])
#         data["acquire_lock_end"] = data["acquire_lock_end"] - min_time
#         print(data["acquire_lock_end"])
#         data["execute_start"] = data["execute_start"] - min_time  
#         data["execute_end"] = data["execute_end"] - min_time  
#         data["get_arguments_start"] = data["get_arguments_start"] - min_time  
#         data["get_arguments_end"] = data["get_arguments_end"] - min_time  
#         data["store_outputs_start"] = data["store_outputs_start"] - min_time  
#         data["store_outputs_end"] = data["store_outputs_end"] - min_time  
# print(task_info)

In [62]:
import pandas.io.sql as psql
from bokeh.plotting import figure, show
from bokeh.layouts import layout, widgetbox, row, gridplot
from bokeh.models import ColumnDataSource, HoverTool, Div, CustomJS, Range1d
from bokeh.models.widgets import Slider, Select, TextInput
from bokeh.io import curdoc, output_notebook
import sys
output_notebook()
source = ColumnDataSource(data=dict(
                                    x=[], 
                                    y=[], 
                                    start=[], 
                                    end=[], 
                                    worker_id=[], 
                                    task_id=[], 
                                    function_name=[], 
                                    get_task_start=[],
                                    get_task_end=[],
                                    get_arguments_start=[],
                                    get_arguments_end=[],
                                    import_remote_func_start=[],
                                    import_remote_func_end=[],
                                    acquire_lock_start=[],
                                    acquire_lock_end=[],
                                    wait_for_function_start=[],
                                    wait_for_function_end=[],
                                    execute_start=[], 
                                    execute_end=[],
                                    store_outputs_start=[],
                                    store_outputs_end=[],
                                    wid=[]
                                    ))
axis_map = {
    "worker_id": "worker_id",
    "time": "time",
}
 
hover = HoverTool(tooltips=[
    ("TaskID", "@task_id"),
    ("Function Name", "@function_name"),
    ("WorkerID", "@wid"),
    ("ObjectID", "@oid"),
    ("Variables", "@vars")
])


lock_slider = Slider(start=0, end=10, value=1, step=1, title="Time to acquire lock")
args_slider = Slider(start=0, end=10, value=1, step=1, title="Time to get arguments")
exec_slider = Slider(start=0, end=10, value=1, step=1, title="Time to execute")
outputs_slider = Slider(start=0, end=10, value=1, step=1, title="Time to store outputs")
task_id_search = TextInput(title="TaskID")
worker_id_search = TextInput(title="WorkerID")
x_axis = Select(title="Time in seconds", options=sorted(axis_map.keys()), value="time")
y_axis = Select(title="WorkerID", options=sorted(axis_map.keys()), value="worker_id")
 
p = figure(plot_height=600, plot_width=700, title="", toolbar_location="below", tools=[hover,"pan","wheel_zoom","box_zoom"], toolbar_sticky=False) 
p.hbar(y="y", height=1, left="acquire_lock_start", right="acquire_lock_end", source=source, color="#FF8633", legend="Acquire lock")
p.hbar(y="y", height=1, left="get_arguments_start", right="get_arguments_end", source=source, color="#8033FF", legend="Get arguments")
p.hbar(y="y", height=1, left="execute_start", right="execute_end", source=source, color="#3390FF", legend="Execute task")
p.hbar(y="y", height=1, left="store_outputs_start", right="store_outputs_end", source=source, color="#33FF9C", legend="Store outputs")
 
# def select(): 
#     worker_id_val = worker_id_search.value.strip()
#     task_id_val = task_id_search.value.strip()
    
#     selected_tasks = dict()
#     print("hi")
#     for task_id, data in task_info.items(): 
#         print("here")
#         print(str(data["acquire_lock_end"]-data["acquire_lock_start"]))
#         if ((data["acquire_lock_end"]-data["acquire_lock_start"]) > lock_slider.value): 
#             print("here")
#             if ((data["get_arguments_end"]-data["get_arguments_start"]) > args_slider.value): 
#                 print("here2")
#                 if ((data["execute_end"]-data["execute_start"]) > exec_slider.value): 
#                     print("here3")
#                     if ((data["store_outputs_end"]-data["store_outputs_start"]) > outputs_slider.value): 
# #                         if (task_id_val != ""): 
# #                             if (data["task_id"].contains(task_id_val)==True): 
# #                                 selected_tasks[task_id] = data 
# #                         if (worker_id_val != ""): 
# #                             if (data["worker_id"].contains(worker_id_val)==True): 
# #                                 selected_tasks[task_id] = data 
# #                         else: 
# #                             selected_tasks[task_id] = data
#                         selected_tasks[task_id]=data
#     return selected_tasks
    
def update():
#     selected_tasks = select() 
    from collections import defaultdict
    df = defaultdict(list)
    for i, worker in task_info.items(): 
        df['x'].append(worker["execute_start"])
        df['y'].append(worker["worker_id"])
        df['worker_id'].append(worker['worker_id'])
        df['task_id'].append(worker['task_id'])
        df['function_name'].append(worker['function_name'])
        df['get_task_start'].append(worker['get_task_start'])
        df['get_task_end'].append(worker['get_task_end'])
        df["get_arguments_start"].append(worker['get_arguments_start'])
        df["get_arguments_end"].append(worker['get_arguments_end'])
        df['import_remote_func_start'].append(worker['import_remote_func_start'])
        df['import_remote_func_end'].append(worker['import_remote_func_end'])
        df['acquire_lock_start'].append(worker['acquire_lock_start'])
        df['acquire_lock_end'].append(worker['acquire_lock_end'])
        df['store_outputs_start'].append(worker['store_outputs_start'])
        df['store_outputs_end'].append(worker['store_outputs_end'])
        df['execute_start'].append(worker['execute_start'])
        df['execute_end'].append(worker['execute_end'])
        df['wid'].append(worker["wid"])
 
    x_name = axis_map[x_axis.value]
    y_name = axis_map[y_axis.value]
 
    p.xaxis.axis_label = "Time in seconds"
    p.yaxis.axis_label = "WorkerID"
    p.title.text = "Task Information"
    source.data = dict(
        x= df["x"],
        y= df["y"],
        worker_id=df["worker_id"],
        task_id=df["task_id"], 
        function_name=df["function_name"], 
        get_task_start=df["get_task_start"],
        get_task_end=df["get_task_end"], 
        import_remote_func_start=df["import_remote_func_start"],
        import_remote_func_end=df["import_remote_func_end"],
        acquire_lock_start=df['acquire_lock_start'], 
        acquire_lock_end=df['acquire_lock_start'], 
        get_arguments_start=df["get_arguments_start"],
        get_arguments_end=df["get_arguments_end"],
        store_outputs_start=df['store_outputs_start'], 
        store_outputs_end=df['store_outputs_start'],
        execute_start=df['execute_start'],
        execute_end=df['execute_end'],
        start=df["get_task_start"],
        end=df['store_outputs_start'],
        wid=df["wid"]
    )

    
sizing_mode = 'fixed' 

controls = [x_axis, y_axis, exec_slider, args_slider, outputs_slider, lock_slider]

for control in controls:
    control.on_change('value', lambda attr, old, new: update())
    
inputs = widgetbox(*controls, sizing_mode=sizing_mode)
layout = row(
    p,
    widgetbox(lock_slider, args_slider, exec_slider, outputs_slider, task_id_search, worker_id_search)
)

update()
 
curdoc().add_root(layout)
curdoc().title = "Tasks"
show(layout)