In [1]:
import numpy as np
import pandas as pd

In [5]:
def load_and_describe_data(fp):
    df = pd.read_parquet(fp)
    print('Shape:', df.shape)
    print('dtypes:', df.dtypes)
    display(df.sample(3).T)
    return df

# Artifact A2: Historic Job Trace

In [6]:
a2_df = load_and_describe_data('../data/historic_job_trace.parquet')

Shape: (2557884, 20)
dtypes: submit_time           datetime64[us, pytz.FixedOffset(-360)]
start_time            datetime64[us, pytz.FixedOffset(-360)]
end_time              datetime64[us, pytz.FixedOffset(-360)]
nodes_req                                              int64
processors_req                                         int64
qos                                                   object
wallclock_used_sec                                   float64
avg_power_per_node                                   float64
wallclock_req_sec                                    float64
memory_req_raw                                       float64
modules                                               object
conda_envs                                            object
user                                                  object
name                                                  object
account                                               object
partition                                             ob

Unnamed: 0,1424879,2334124,19426
submit_time,2024-08-14 16:05:21-06:00,2025-01-31 21:07:22-06:00,2023-12-19 16:58:12-06:00
start_time,2024-08-15 05:39:36-06:00,2025-02-01 00:18:52-06:00,2023-12-19 17:02:58-06:00
end_time,2024-08-15 05:51:48-06:00,2025-02-01 00:37:39-06:00,2023-12-19 17:03:05-06:00
nodes_req,1,1,1
processors_req,104,104,4
qos,normal,normal,normal
wallclock_used_sec,732.0,1127.0,7.0
avg_power_per_node,763.963115,730.075421,410.428571
wallclock_req_sec,14400.0,172800.0,300.0
memory_req_raw,246064.0,246064.0,10240.0


# A4: Baseline Model Results

## Power Results

In [7]:
a4_power_df = load_and_describe_data('../data/baseline_power_results.parquet')

Shape: (1035281, 2)
dtypes: avg_power_per_node    float64
predicted_power       float64
dtype: object


Unnamed: 0,330378,247939,172010
avg_power_per_node,646.515929,728.022405,413.9375
predicted_power,578.89886,745.91614,405.38577


## Runtime Results

In [8]:
a4_power_df = load_and_describe_data('../data/baseline_runtime_results.parquet')

Shape: (1035281, 2)
dtypes: wallclock_used_sec         float64
predicted_runtime_hours    float64
dtype: object


Unnamed: 0,270545,214504,943721
wallclock_used_sec,2915.0,2430.0,14403.0
predicted_runtime_hours,1.067133,0.193978,0.464323


In [10]:
# Convert wallclock_used_sec to hours for comparison
a4_power_df['wallclock_used_hours'] = a4_power_df['wallclock_used_sec'] / 3600
a4_power_df.sample(3).T

Unnamed: 0,623737,334611,537512
wallclock_used_sec,2181.0,1518.0,18323.0
predicted_runtime_hours,0.144274,3.193444,9.234471
wallclock_used_hours,0.605833,0.421667,5.089722


# A14 Ground Truth Schedule Data

In [11]:
a14_df = load_and_describe_data('../data/ground_truth.parquet')

Shape: (161266, 7)
dtypes: submit                 datetime64[ns]
start                  datetime64[ns]
end                    datetime64[ns]
nodes                           int64
runtime               timedelta64[us]
wait_time                     float64
avg_power_per_node            float64
dtype: object


Unnamed: 0,128510,75177,157068
submit,2024-09-12 16:24:39,2024-09-06 13:25:43,2024-09-14 20:42:32
start,2024-09-12 20:49:16,2024-09-06 13:44:07,2024-09-14 22:30:24
end,2024-09-12 22:10:25,2024-09-06 13:48:27,2024-09-14 23:22:11
nodes,1,1,1
runtime,0 days 01:21:09,0 days 00:04:20,0 days 00:51:47
wait_time,15877.0,1104.0,6472.0
avg_power_per_node,745.898542,331.461538,764.808497


# A15 Validation Simulation Schedule Data

In [12]:
a15_df = load_and_describe_data('../data/validation_sim.parquet')

Shape: (161266, 7)
dtypes: submit                 datetime64[ns]
start                  datetime64[ns]
end                    datetime64[ns]
nodes                           int64
runtime               timedelta64[us]
wait_time                     float64
avg_power_per_node            float64
dtype: object


Unnamed: 0,106333,21589,68185
submit,2024-09-10 11:56:21,2024-09-03 04:25:43,2024-09-05 18:05:23
start,2024-09-10 11:56:21,2024-09-03 16:12:56.500000,2024-09-06 13:31:21.500000
end,2024-09-10 11:58:42,2024-09-03 16:17:51.500000,2024-09-06 14:22:57.500000
nodes,1,1,1
runtime,0 days 00:02:21,0 days 00:04:55,0 days 00:51:36
wait_time,0.0,42433.5,69958.5
avg_power_per_node,369.134752,428.501695,761.34916


# A16 Baseline Simulation Schedule Data

In [13]:
a16_df = load_and_describe_data('../data/baseline_sim.parquet')

Shape: (161266, 7)
dtypes: submit                 datetime64[ns]
start                  datetime64[ns]
end                    datetime64[ns]
nodes                           int64
runtime               timedelta64[us]
wait_time                     float64
avg_power_per_node            float64
dtype: object


Unnamed: 0,153732,161111,98664
submit,2024-09-14 11:31:57,2024-09-15 18:07:49,2024-09-10 08:38:46
start,2024-09-14 13:22:36.500000,2024-09-15 20:22:31,2024-09-10 09:24:03.500000
end,2024-09-15 04:20:44.500000,2024-09-15 20:22:59,2024-09-10 09:24:10.500000
nodes,1,1,1
runtime,0 days 14:58:08,0 days 00:00:28,0 days 00:00:07
wait_time,6639.5,8082.0,2717.5
avg_power_per_node,738.508351,406.785714,447.714286


# A18 Energy-Aware Simulation Schedule Data

In [14]:
a18_df = load_and_describe_data('../data/ea_sim.parquet')

Shape: (161266, 7)
dtypes: submit                 datetime64[ns]
start                  datetime64[ns]
end                    datetime64[ns]
nodes                           int64
runtime               timedelta64[us]
wait_time                     float64
avg_power_per_node            float64
dtype: object


Unnamed: 0,120977,75901,140736
submit,2024-09-12 01:20:58,2024-09-06 14:41:42,2024-09-13 11:25:13
start,2024-09-12 01:29:08.500000,2024-09-06 21:47:36,2024-09-13 11:25:41.500000
end,2024-09-12 01:32:26.500000,2024-09-06 21:52:17,2024-09-13 11:53:37.500000
nodes,1,1,1
runtime,0 days 00:03:18,0 days 00:04:41,0 days 00:27:56
wait_time,490.5,25554.0,28.5
avg_power_per_node,313.535354,318.338078,260.045346


# A19 Optuna Results

In [16]:
a18_df = load_and_describe_data('../data/optuna_results.parquet')

Shape: (500, 7)
dtypes: alpha                object
beta                 object
gamma                object
start                object
end                  object
median_wait_time    float64
re_utilization      float64
dtype: object


Unnamed: 0,173,480,384
alpha,2.276,2.1803,0.8365
beta,2.4701,2.5238,4.6054
gamma,0.3889,0.1664,0.3357
start,6.7588,9.4426,7.7787
end,9.6131,9.6303,9.2226
median_wait_time,2.172361,2.849722,3.498472
re_utilization,0.230636,0.23027,0.232745
