## Lucid: Workload Estimator


In [7]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn import preprocessing, metrics
from interpret.glassbox import ExplainableBoostingRegressor


pd.set_option("display.max_columns", None)

idx = 1
save = False
experiment_list = ["Philly", "MLaas"]
cluster_list = ["Philly", "MLaas"]
cluster = cluster_list[idx]
experiment = experiment_list[idx]

datapath = f"../data/{cluster}"

result = pd.DataFrame()

df = pd.read_csv(f"{datapath}/filled.csv", 
                 usecols=[
                "job_id",
                "user",
                "vc",
                "gpu_num",
                "submit_time",
                "amp",
                "gpu_util",
                "gmem_util",
                "gmem",
                "duration" ],
            )


In [9]:
def split_indices(n, parts=5):
    size = n // parts
    remainer = n % parts
    
    indices = []
    start = 0
    
    for i in range(parts):
        end = start + size + (1 if i < remainer else 0)
        indices.append((start, end))
        start = end
    return indices

split_indices(len(df))

[(0, 15635), (15635, 31269), (31269, 46903), (46903, 62537), (62537, 78171)]

## Partical Update Lucid Model

In [10]:

trace_range_list = split_indices(len(df), parts=5)

week_df = pd.DataFrame()
for i, trace_range in enumerate(trace_range_list[:-1]):
    
    train_start, train_end = trace_range[0], trace_range[1]
    val_start, val_end = trace_range_list[i+1][0], trace_range_list[i+1][1]
    
    
    train_df = df[train_start:train_end]
    val_df = df[val_start:val_end] 

    train_df = train_df.sort_values(by="submit_time")
    train_df.reset_index(inplace=True, drop=True)
    val_df = val_df.sort_values(by="submit_time")
    val_df.reset_index(inplace=True, drop=True)

    train_data = train_df.drop(columns=["duration", "submit_time"])
    test_data = val_df.drop(columns=["duration", "submit_time"])
    train_label = train_df[["duration"]]
    test_label = val_df[["duration"]]

    print(f"Train Data Len: {len(train_data)}")

    # ebm = ExplainableBoostingRegressor(learning_rate=0.01, binning="uniform", interactions=20)
    ebm = ExplainableBoostingRegressor(learning_rate=0.01,  interactions=20)
    ebm.fit(train_data, train_label)
    pred = ebm.predict(test_data)

    mae_score = metrics.mean_absolute_error(test_label, pred)
    mape_score = metrics.mean_absolute_percentage_error(test_label, pred)
    r2_score = metrics.r2_score(test_label, pred)
    result.at["ebm_r2", cluster] = r2_score
    print(f"mae_score: {mae_score:.2f}, mape_score: {mape_score:.2f}, r2_score: {r2_score:.4f}")

    pred = pred.astype(int)
    val_df.loc[:,'priority'] = pred
    week_df = pd.concat([week_df, val_df])


# week_df.to_csv(f"ebm/{experiment}_Sept_ebm_weekly_updated.csv", index=False)


Train Data Len: 15635
mae_score: 3357.42, mape_score: 4.74, r2_score: 0.6742
Train Data Len: 15634
mae_score: 3481.87, mape_score: 3.49, r2_score: 0.6403
Train Data Len: 15634
mae_score: 2925.79, mape_score: 4.95, r2_score: 0.5535
Train Data Len: 15634
mae_score: 3430.56, mape_score: 4.15, r2_score: 0.6365


In [11]:
week_df.to_csv(f"ebm/{experiment}_ebm_updated.csv", index=False)


In [19]:
trace_data = pd.read_csv("../data/Philly/filled.csv")
emb_data = pd.read_csv("./ebm/Philly_ebm_updated.csv")

trace_data = pd.read_csv("../data/MLaas/filled.csv")
emb_data = pd.read_csv("./ebm/MLaas_ebm_updated.csv")
# emb_data.iloc[0]['job_id'] == trace_data.iloc[1926]["job_id"]
emb_data.iloc[0]['job_id'] == trace_data.iloc[15635]["job_id"]
# emb_data[0]["job_id"] == trace_data[1926]["job_id"]

True