In [1]:
from ipywidgets import VBox, HBox
from pyspark.sql import SparkSession
from perspective import PerspectiveWidget

from helpers.data import (
    HOST,
    MACHINES_PORT,
    USAGE_PORT,
    STATUS_PORT,
    JOBS_PORT,
)
from helpers.spark import (
    MACHINE_SCHEMA,
    MACHINE_SCHEMA_SPARK,
    USAGE_SCHEMA,
    USAGE_SCHEMA_SPARK,
    STATUS_SCHEMA,
    STATUS_SCHEMA_SPARK,
    JOBS_SCHEMA,
    JOBS_SCHEMA_SPARK,
)

In [2]:
# Important imports
from helpers.spark import (
    get_df_from_server,
    push_to_perspective,
)
from helpers.fastapi import (
    perspective_spark_bridge,
    start_server,
)

In [None]:
from helpers.data import machines, usage

In [8]:
m = machines()

In [39]:
u = usage(u)
u

{'machine_id': '5b361c4e5718',
 'kind': 'worker',
 'cores': 64,
 'region': 'eu',
 'zone': 'C',
 'cpu': 88.47,
 'mem': 100,
 'free': 0,
 'network': 91.78,
 'disk': 98.4}

In [40]:
spark = SparkSession.builder.appName("Perspective Demo").getOrCreate()

24/06/07 17:02:27 WARN Utils: Your hostname, mbp-m1 resolves to a loopback address: 127.0.0.1; using 10.0.1.10 instead (on interface en0)
24/06/07 17:02:27 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/06/07 17:02:27 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [41]:
# Get spark streaming dfs
machines_df = get_df_from_server(spark, MACHINE_SCHEMA_SPARK, HOST, MACHINES_PORT)
usage_df = get_df_from_server(spark, USAGE_SCHEMA_SPARK, HOST, USAGE_PORT)
status_df = get_df_from_server(spark, STATUS_SCHEMA_SPARK, HOST, STATUS_PORT)
jobs_df = get_df_from_server(spark, JOBS_SCHEMA_SPARK, HOST, JOBS_PORT)

24/06/07 17:02:48 WARN TextSocketSourceProvider: The socket source should not be used for production applications! It does not support recovery.
24/06/07 17:02:48 WARN TextSocketSourceProvider: The socket source should not be used for production applications! It does not support recovery.
24/06/07 17:02:48 WARN TextSocketSourceProvider: The socket source should not be used for production applications! It does not support recovery.
24/06/07 17:02:48 WARN TextSocketSourceProvider: The socket source should not be used for production applications! It does not support recovery.


In [42]:
# construct 4 separate perspective widgets. Each will have its own table internally
machines_widget = PerspectiveWidget(MACHINE_SCHEMA, index="machine_id", settings=False)
usage_widget = PerspectiveWidget(USAGE_SCHEMA, index="machine_id", settings=False)
status_widget = PerspectiveWidget(STATUS_SCHEMA, index="machine_id", sort=[["last_update", "desc"]], settings=False)
jobs_widget = PerspectiveWidget(JOBS_SCHEMA, sort=[["start_time", "desc"]], settings=False)

In [43]:
# a little bit of layout with ipywidgets
VBox(children=[
    HBox(children=[machines_widget, usage_widget]),
    HBox(children=[status_widget, jobs_widget]),
])

VBox(children=(HBox(children=(PerspectiveWidget(columns=['machine_id', 'kind', 'cores', 'region', 'zone'], set…

24/06/07 17:10:01 WARN TextSocketMicroBatchStream: Stream closed by localhost:8082
24/06/07 17:10:01 WARN TextSocketMicroBatchStream: Stream closed by localhost:8083
24/06/07 17:10:01 WARN TextSocketMicroBatchStream: Stream closed by localhost:8084


In [44]:
app = perspective_spark_bridge(
    {
        "machines": machines_widget,
        "usage": usage_widget,
        "status": status_widget,
        "jobs": jobs_widget,
    }
)
port = start_server(app)

CRITICAL:root:Listening on http://localhost:50487
INFO:     Started server process [34584]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:50487 (Press CTRL+C to quit)
INFO:     ('127.0.0.1', 50647) - "WebSocket /tables/machines" [accepted]
INFO:     ('127.0.0.1', 50648) - "WebSocket /tables/status" [accepted]
INFO:     ('127.0.0.1', 50649) - "WebSocket /tables/usage" [accepted]
INFO:     ('127.0.0.1', 50650) - "WebSocket /tables/jobs" [accepted]
INFO:     connection open
INFO:     connection open
INFO:     connection open
INFO:     connection open
INFO:     ('127.0.0.1', 50666) - "WebSocket /tables/usage" [accepted]
INFO:     connection open
INFO:     ('127.0.0.1', 50668) - "WebSocket /tables/status" [accepted]
INFO:     ('127.0.0.1', 50667) - "WebSocket /tables/machines" [accepted]
INFO:     connection open
INFO:     connection open
INFO:     connection closed
INFO:     connection closed
INFO:     connecti

INFO:     connection open
INFO:     connection open
INFO:     connection closed
INFO:     connection closed
INFO:     connection closed
INFO:     ('127.0.0.1', 51280) - "WebSocket /tables/usage" [accepted]
INFO:     connection open
INFO:     ('127.0.0.1', 51304) - "WebSocket /tables/jobs" [accepted]
INFO:     connection open
INFO:     ('127.0.0.1', 51306) - "WebSocket /tables/usage" [accepted]
INFO:     connection closed
INFO:     connection open
INFO:     ('127.0.0.1', 51308) - "WebSocket /tables/status" [accepted]
INFO:     connection open
INFO:     connection closed
INFO:     connection closed
INFO:     ('127.0.0.1', 51337) - "WebSocket /tables/status" [accepted]
INFO:     connection open
INFO:     ('127.0.0.1', 51338) - "WebSocket /tables/usage" [accepted]
INFO:     ('127.0.0.1', 51340) - "WebSocket /tables/jobs" [accepted]
INFO:     connection open
INFO:     connection open
INFO:     connection closed
INFO:     connection closed
INFO:     connection closed
INFO:     connection clo

INFO:     connection open
INFO:     connection closed
INFO:     connection closed
INFO:     connection closed
INFO:     ('127.0.0.1', 51994) - "WebSocket /tables/status" [accepted]
INFO:     connection open
INFO:     ('127.0.0.1', 51996) - "WebSocket /tables/jobs" [accepted]
INFO:     connection open
INFO:     ('127.0.0.1', 51998) - "WebSocket /tables/usage" [accepted]
INFO:     connection open
INFO:     connection closed
INFO:     connection closed
INFO:     connection closed
INFO:     ('127.0.0.1', 52027) - "WebSocket /tables/jobs" [accepted]
INFO:     connection open
INFO:     ('127.0.0.1', 52029) - "WebSocket /tables/status" [accepted]
INFO:     connection open
INFO:     ('127.0.0.1', 52031) - "WebSocket /tables/usage" [accepted]
INFO:     connection open
INFO:     connection closed
INFO:     connection closed
INFO:     connection closed
INFO:     ('127.0.0.1', 52060) - "WebSocket /tables/jobs" [accepted]
INFO:     connection open
INFO:     ('127.0.0.1', 52062) - "WebSocket /tables

In [45]:
push_to_perspective(machines_df, "machines", "localhost", port)
push_to_perspective(usage_df, "usage", "localhost", port)
push_to_perspective(status_df, "status", "localhost", port)
push_to_perspective(jobs_df, "jobs", "localhost", port)

24/06/07 17:05:18 WARN ResolveWriteToStream: Temporary checkpoint location created which is deleted normally when the query didn't fail: /private/var/folders/pm/0nt85wh51h39f604mgmyyfwm0000gn/T/temporary-87abbd5b-7640-4a46-8981-b94e2e115385. If it's required to delete it under any circumstances, please set spark.sql.streaming.forceDeleteTempCheckpointLocation to true. Important to know deleting temp checkpoint folder is best effort.
24/06/07 17:05:18 WARN ResolveWriteToStream: spark.sql.adaptive.enabled is not supported in streaming DataFrames/Datasets and will be disabled.
24/06/07 17:05:18 WARN ResolveWriteToStream: Temporary checkpoint location created which is deleted normally when the query didn't fail: /private/var/folders/pm/0nt85wh51h39f604mgmyyfwm0000gn/T/temporary-629e2584-fe4a-40fc-883a-2a20c39d634d. If it's required to delete it under any circumstances, please set spark.sql.streaming.forceDeleteTempCheckpointLocation to true. Important to know deleting temp checkpoint folde

In [46]:
status_widget.plugin = "X Bar"
status_widget.group_by = ["status"]
status_widget.columns = ["machine_id"]
status_widget.aggregates = {"status": "last"}

In [47]:
jobs_widget.group_by = ["machine_id"]
jobs_widget.columns = ["job_id", "name", "units", "start_time", "end_time"],
jobs_widget.aggregates = {"job_id": "count", "name": "last", "units": "sum", "start_time": "last", "end_time": "last"}