In [None]:
import sys
sys.path.append('./tool')

from aggregation import *
import analyze
import display

In [None]:
exp_root = "/home/ict/appProfile/data/same_cpu/redis_cpu_20231028084501"
qos_columns = [
    "app_redis_qos_qps_of_redis_get",
    "app_redis_qos_qps_of_redis_total",
    "app_redis_qos_qps_of_redis_set",
    "app_redis_qos_p99_latency_set",
    "app_redis_qos_p99_latency_get",
]

exp_data = read_from_dir(exp_root)
exp_data.set_workload_preprocess_funcs([
    filter_column_startswith(col_prefix=("stress", "vm", "app")),
    filter_column_useless(excol_prefix=("stress")),
    filter_row_noise(col_prefix=("app")),
])

df_epoch = exp_data.agg_epoch()
df_epoch_group = df_epoch.groupby(df_epoch.index)
keys = list(df_epoch_group.groups.keys())
keys

In [None]:
# 选择 workload
df_key = keys[6]
# 选择QoS指标
qos_column = qos_columns[3]
# 相关性阈值
c = 0.85

df_workload = df_epoch_group.get_group(df_key)
stress = list(df_workload.columns)[1]
df_workload

# 一、干扰敏感度分析
## (1) 劣化程度

In [None]:
no_stress_exp_root = "/home/ict/appProfile/data/addtion_exp/standard_stress_no_20231023023901"
no_stress_exp_data = read_from_dir(no_stress_exp_root)
no_stress_exp_data.set_workload_preprocess_funcs([
    filter_column_startswith(col_prefix=("vm", "app")),
    filter_column_useless(),
    filter_row_noise(col_prefix=("app")),
])

no_stress_df_epoch = no_stress_exp_data.agg_epoch()
no_stress = no_stress_df_epoch.loc[[df_key]]

# no_stress may have diffrent column
common_columns = list(set(no_stress.columns).intersection(set(df_workload.columns)))
no_stress = no_stress[common_columns]

delta_df = df_workload[common_columns] - no_stress
delta_df

### 1. QoS劣化与压力

In [None]:
percentage_df = 100 * delta_df / no_stress
percentage_df[stress] = df_workload[stress]
percentage_df.index = [f"{stress.split('_', 1)[1]}_{i}"for i in df_workload[stress]]
display.plt_by_column(percentage_df, columns=qos_columns)

### 2. 其他指标劣化与压力

In [None]:
# 考虑percentage会有许多除数为0的情况，因此使用 delta
delta_df[stress] = df_workload[stress]
corr_matrix = analyze.cosine_similarity(delta_df)
corr_matrix

In [None]:
stress_corr = analyze.single_corr(corr_matrix, stress, similarity_filter = [
    lambda x: x[x.abs() > c].dropna(axis=0, how='all'),
    lambda x : x.drop(qos_columns, errors='ignore'),
])
stress_corr

In [None]:
display.plt_by_column(delta_df.set_index(stress), columns=list(stress_corr.index), ncols=3)

### 3. QoS劣化与其他指标

In [None]:
qos_corr = analyze.single_corr(corr_matrix, qos_column, similarity_filter = [
    lambda x : x[x.abs() > c].dropna(axis=0, how='all'),
    lambda x : x.drop(qos_columns, errors='ignore'),
])
qos_corr

In [None]:
display.plt_by_column(delta_df, x_column=qos_column, columns=list(qos_corr.index), ncols=3)

# 二、相关性分析

## (1) 皮尔逊相关性

In [None]:
# corr_matrix = analyze.pearson_correlation(df_workload)
# corr_matrix

In [None]:
# display.plt_corr_heatmap(corr_matrix)

### 1. 相关性排序

In [None]:
# 提取相关性大于 c 的指标对
# c = 0.95
# fcorr_df = analyze.flatten_corr(corr_matrix, similarity_filter = [
#     lambda x: x[x.abs() > c].dropna(axis=0, how='all'),
# ])
# fcorr_df

### 2. 与压力相关的指标

In [None]:
# stress = "stress_cache"
# c = 0.90
# stress_corr = analyze.single_corr(corr_matrix, stress, similarity_filter = [
#     lambda x: x[x.abs() > c].dropna(axis=0, how='all'),
# ])
# stress_corr

In [None]:
# display.plt_by_column(df_workload.set_index(stress), columns=list(stress_corr.index))

## (2) 余弦相似性

In [None]:
# corr_matrix = analyze.cosine_similarity(df_workload)
# corr_matrix

In [None]:
# display.plt_corr_heatmap(corr_matrix)

### 1. 相关性排序

In [None]:
# 提取相关性大于 c 的指标对
# c = 0.95
# fcorr_df = analyze.flatten_corr(corr_matrix, similarity_filter = [
#     lambda x: x[x > c].dropna(axis=0, how='all'),
# ])
# fcorr_df

In [None]:
# stress = "stress_cache"
# stress_corr = analyze.single_corr(corr_matrix, stress)
# stress_corr

In [None]:
# display.plt_by_column(df_workload.set_index(stress), columns=list(stress_corr.index)[:16])