# 有压力下的 Workload聚合数据分析

In [None]:
import sys
sys.path.append('./tools')

from aggregation import *
import analyze
import display

In [None]:
exp_root = "/home/ict/appProfile/data/redis/same_cpu/redis_cache_20231027124659"
no_stress_exp_root = "/home/ict/appProfile/data/redis/no_stress/redis_no_20231102090842"

qos_columns = [
    "app_redis_qos_qps_of_redis_get",
    "app_redis_qos_qps_of_redis_total",
    "app_redis_qos_qps_of_redis_set",
    "app_redis_qos_p99_latency_set",
    "app_redis_qos_p99_latency_get",
]

exp_data = read_from_dir(exp_root)
exp_data.set_workload_preprocess_funcs([
    filter_column_startswith(col_prefix=("stress", "vm", "app")),
    filter_column_useless(excol_prefix=("stress")),
    filter_row_noise(col_prefix=("app")),
])

corr_f = analyze.pearson_correlation

df_epoch = exp_data.agg_epoch()
df_epoch_group = df_epoch.groupby(df_epoch.index)
keys = list(df_epoch_group.groups.keys())
keys

In [None]:
# 选择 workload
df_key = keys[6]
# 选择QoS指标
qos_column = qos_columns[3]
# 相关性阈值
c = 0.85

df_workload = df_epoch_group.get_group(df_key)
stress = list(df_workload.columns)[0]
df_workload

# 一、干扰劣化分析

## (1) 劣化程度分析

In [None]:
no_stress_exp_data = read_from_dir(no_stress_exp_root)
no_stress_exp_data.set_workload_preprocess_funcs([
    filter_column_startswith(col_prefix=("vm", "app")),
    filter_column_useless(),
    filter_row_noise(col_prefix=("app")),
])

no_stress_df_epoch = no_stress_exp_data.agg_epoch()
no_stress = no_stress_df_epoch.loc[[df_key]]

# no_stress may have diffrent column
common_columns = list(set(no_stress.columns).intersection(set(df_workload.columns)))
no_stress = no_stress[common_columns]

delta_df = df_workload[common_columns] - no_stress
delta_df

### 1. QoS劣化与压力

In [None]:
percentage_df = 100 * delta_df / no_stress
percentage_df[stress] = df_workload[stress]
percentage_df.index = [f"{stress.split('_', 1)[1]}_{i}"for i in df_workload[stress]]
percentage_df[qos_columns]

In [None]:
display.plt_by_column(percentage_df, columns=qos_columns)

### 2. 指标劣化与压力相关性

In [None]:
# 考虑percentage会有许多除数为0的情况，因此使用 delta
delta_df[stress] = df_workload[stress]
corr_matrix = corr_f(delta_df)

stress_corr = analyze.single_corr(corr_matrix, stress, similarity_filter = [
    lambda x: x[x.abs() > c].dropna(axis=0, how='all'),
])
stress_corr

In [None]:
display.plt_by_column(delta_df.set_index(stress), columns=list(stress_corr.index), ncols=3)

### 3. QoS劣化与指标相关性

In [None]:
qos_column = qos_columns[3]
qos_corr = analyze.single_corr(corr_matrix, qos_column, similarity_filter = [
    lambda x : x[x.abs() > c].dropna(axis=0, how='all'),
    lambda x : x.drop(qos_columns, errors='ignore'),
])
qos_corr

In [None]:
display.plt_by_column(delta_df, x_column=qos_column, columns=list(qos_corr.index), ncols=3)

### 4. 箱线图绘制

In [None]:
column = qos_columns[4]
df = exp_data.one_column_on_stresses(column, df_key)
display.plt_box(df, stress, column)

### 5. 不同负载下的劣化程度

In [None]:
det_df = agg_per_workload_stress(exp_data, no_stress_df_epoch, qos_column, stress="stress_cpu-load")
det_df

In [None]:
display.plt_deterioration_heatmap(det_df, qos_column)

### 6. 不同干扰强度下的最大团

In [None]:
import functools


largest_clique_dict = {}
for i in range(exp_data.exp["n_epoch"]):
    workload_info = exp_data.workloads_of(df_key)[i]
    corr_matrix = corr_f(exp_data.workload_df(workload_info))
    G, cliques = analyze.cliques_from_corr(corr_matrix, threshold=0.90)
    largest_clique_dict[stress + '_'+ str(i)] = set(cliques[0])
    

# 判断与交集的差异
sets = list(largest_clique_dict.values())
common_metric = functools.reduce(lambda a, b: a.intersection(b), sets)
diff_largest_clique = {k : len(v) - len(common_metric) for k,v in largest_clique_dict.items()}

print(common_metric)
diff_largest_clique

In [None]:
display.plt_clique_on_graph(G, common_metric, scaling=1.25, label_pos=0.35)