# 数据可视化


In [4]:
import os
import pandas as pd
import warnings
from tqdm.notebook import tqdm, trange
import random
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import plotly_express as px
import plotly as py
import plotly.io as pio
from plotly.offline import download_plotlyjs, init_notebook_mode
import plotly.graph_objs as go
from plotly.subplots import make_subplots

warnings.filterwarnings("ignore")

py.offline.init_notebook_mode(connected=True)
tqdm.pandas()


## 画图函数

In [None]:
def create_shapes(starts, _min, _max, type=None, xref=None, yref=None):
    if type == 'service':
        color = 'red'
    elif type=='pod':
        color = 'blue'
    elif type == 'node':
        color = 'green'
    else:
        color = 'red'
        
    if _min==_max:
        _min=0
        _max=1
    
    shapes = []

    for r in starts:
        w = timedelta(minutes=10)
        x0 = r
        x1 = r + w
        shape = {
            'type': 'rect',
            'x0': x0,
            'y0': _min,
            'x1': x1,
            'y1': _max,
            'fillcolor': color,
            'opacity': 0.3,
            'line': {
                'width': 0,
            },
        }
        if xref is not None:
            shape['xref'] = xref
            shape['yref'] = yref

        shapes.append(shape)

    return shapes

## 读取数据

### 标签数据

In [5]:
label_data1 = pd.read_csv(
    '../../data/training_data_with_faults/groundtruth/groundtruth-k8s-1-2022-03-20.csv')
label_data2 = pd.read_csv(
    '../../data/training_data_with_faults/groundtruth/groundtruth-k8s-1-2022-03-21.csv')



In [18]:
label_data = pd.concat(
    [label_data1, label_data2])
label_data


[28, 26, 26]

In [None]:
label_data.sort_values(by=['level', 'cmdb_id','timestamp'], inplace=True)
label_data.reset_index(drop=True, inplace=True)
label_data['datetime'] = pd.to_datetime(
    label_data['timestamp'], unit='s')

label_data.to_csv('../data/label/label1.csv', index=False)
label_data


### 业务指标

#### service级别

In [None]:
label_data = pd.read_csv('../data/label/label1.csv')
label_data['datetime'] = pd.to_datetime(label_data['datetime'])
label_data

In [4]:
# service_metric_data1 = pd.read_csv(
#     '../data/training_data_normal/cloudbed-1/metric/service/metric_service.csv')
# service_metric_data2 = pd.read_csv(
#     '../data/training_data_normal/cloudbed-2/metric/service/metric_service.csv')
# service_metric_data3 = pd.read_csv(
#     '../data/training_data_normal/cloudbed-3/metric/service/metric_service.csv')


service_metric_data1 = pd.read_csv(
    '../../data/training_data_with_faults/tar/2022-03-20-cloudbed1/metric/service/metric_service.csv')
service_metric_data2 = pd.read_csv(
    '../../data/training_data_with_faults/tar/2022-03-21-cloudbed1/metric/service/metric_service.csv')
# service_metric_data3 = pd.read_csv(
#     '../data/training_data_with_faults/tar/2022-03-20-cloudbed2/metric/service/metric_service.csv')
# service_metric_data4 = pd.read_csv(
#     '../data/training_data_with_faults/tar/2022-03-21-cloudbed2/metric/service/metric_service.csv')
# service_metric_data5 = pd.read_csv(
#     '../data/training_data_with_faults/tar/2022-03-20-cloudbed3/metric/service/metric_service.csv')
# service_metric_data6 = pd.read_csv(
#     '../data/training_data_with_faults/tar/2022-03-21-cloudbed3/metric/service/metric_service.csv')
# service_metric_data7 = pd.read_csv(
#     '../data/training_data_with_faults/tar/2022-03-24-cloudbed3/metric/service/metric_service.csv')

# service_metric_data=pd.concat([service_metric_data1,service_metric_data2,service_metric_data3])
# service_metric_data = pd.concat(
#     [service_metric_data1, service_metric_data2, service_metric_data3, service_metric_data4, service_metric_data5,service_metric_data6,service_metric_data7])
service_metric_data = pd.concat(
    [service_metric_data1, service_metric_data2])
# service_metric_data=service_metric_data1
service_metric_data


Unnamed: 0,service,timestamp,rr,sr,mrt,count
0,adservice-grpc,1647716400,100.0,100.0,2.429508,61
1,adservice-grpc,1647716460,100.0,100.0,2.429508,61
2,adservice-grpc,1647716520,100.0,100.0,2.332967,91
3,adservice-grpc,1647716580,100.0,100.0,2.647015,67
4,adservice-grpc,1647716640,100.0,100.0,2.510000,85
...,...,...,...,...,...,...
15835,frontend-http,1647820500,100.0,100.0,53.037687,134
15836,frontend-http,1647820560,100.0,100.0,247.060484,124
15837,frontend-http,1647820620,100.0,100.0,549.454167,120
15838,frontend-http,1647820680,100.0,100.0,883.313559,118


In [None]:
processed_service_metric_data = list(service_metric_data.groupby('service'))
for (service_name, service_data) in tqdm(processed_service_metric_data):
    service_data.sort_values(by='timestamp', inplace=True)
    service_data.reset_index(drop=True, inplace=True)
    service_data['datetime'] = pd.to_datetime(
        service_data['timestamp'], unit='s')

    # processed_data_path = f'../data/training_data_normal/processed_service_metric_data/'
    processed_data_path = f'../data/training_data_with_faults/tar/processed_service_metric_data/'
    os.makedirs(processed_data_path, exist_ok=True)
    service_data.to_csv(processed_data_path+service_name +
                        '_metrics.csv', index=False)


In [None]:
processed_service_metric_data = list(service_metric_data.groupby('service'))
for (service_name,service_data) in tqdm(processed_service_metric_data):
    service_data.sort_values(by='timestamp', inplace=True)
    service_data.reset_index(drop=True, inplace=True)
    service_data['datetime'] = pd.to_datetime(
        service_data['timestamp'], unit='s')
    
    visualization_path = f'../result/visualization/with_faults/service/'
    os.makedirs(visualization_path, exist_ok=True)
    
    label_data_service= label_data[(label_data['level'] == 'service') & (
        label_data['cmdb_id'] == service_name.split('-')[0])]
    label_data_pod=label_data[(label_data['level'] == 'pod') & (
        label_data['cmdb_id'].str.contains(service_name.split('-')[0]))]
    
    fig = make_subplots(rows=4, cols=1, shared_xaxes=True)
    shapes = []
    
    fig.append_trace(go.Scatter(x=service_data['datetime'], y=service_data['rr'], name='rr',
                                marker=dict(color='rgb(255, 127, 14, 1)', size=1), mode='markers',legendgroup="group1",
                                legendgrouptitle_text="Data Type"), row=1, col=1)
    shapes += create_shapes(label_data_service['datetime'], _min=0,
                            _max=service_data['rr'].max(), type='service', xref='x1', yref='y1')
    shapes += create_shapes(label_data_pod['datetime'], _min=0,
                            _max=service_data['rr'].max(), type='pod', xref='x1', yref='y1')
    
    fig.append_trace(go.Scatter(x=service_data['datetime'], y=service_data['sr'], name='sr',
                                marker=dict(color='rgb(0, 204, 150, 1)', size=1), mode='markers', legendgroup="group1",
                                legendgrouptitle_text="Data Type"), row=2, col=1)
    shapes += create_shapes(label_data_service['datetime'], _min=0,
                            _max=service_data['sr'].max(), type='service', xref='x2', yref='y2')
    shapes += create_shapes(label_data_pod['datetime'], _min=0,
                            _max=service_data['sr'].max(), type='pod', xref='x2', yref='y2')
    
    fig.append_trace(go.Scatter(x=service_data['datetime'], y=service_data['mrt'], name='mrt',
                                line=dict(color='rgb(31, 119, 180, 1)', width=1), mode='lines', legendgroup="group1",
                                legendgrouptitle_text="Data Type"), row=3, col=1)
    shapes += create_shapes(label_data_service['datetime'], _min=0,
                            _max=service_data['mrt'].max(), type='service', xref='x3', yref='y3')
    shapes += create_shapes(label_data_pod['datetime'], _min=0,
                            _max=service_data['mrt'].max(), type='pod', xref='x3', yref='y3')
    
    fig.append_trace(go.Scatter(x=service_data['datetime'], y=service_data['count'], name='count',
                                line=dict(color='rgb(0, 0, 0, 1)', width=1), mode='lines', legendgroup="group1",
                                legendgrouptitle_text="Data Type"), row=4, col=1)
    shapes += create_shapes(label_data_service['datetime'], _min=0,
                            _max=service_data['count'].max(), type='service', xref='x4', yref='y4')
    shapes += create_shapes(label_data_pod['datetime'], _min=0,
                            _max=service_data['count'].max(), type='pod', xref='x4', yref='y4')
    
    fig.append_trace(go.Scatter(x=[service_data['datetime'][0]], y=[service_data['rr'][0]], legendgroup='group2', legendgrouptitle_text='Label Type',
                                name='service label', mode="lines", line=dict(color='red')), row=1, col=1)
    fig.append_trace(go.Scatter(x=[service_data['datetime'][0]], y=[service_data['rr'][0]], legendgroup='group2', legendgrouptitle_text='Label Type',
                                name='pod label',mode="lines",line=dict(color='blue')), row=1, col=1)
    
    fig.update_layout(title_text=service_name, shapes=shapes)
    pio.write_html(fig, file=visualization_path+service_name+'.html')
    

### 性能指标

#### node级别

In [None]:
label_data = pd.read_csv('../data/label/label1.csv')
label_data['datetime'] = pd.to_datetime(label_data['datetime'])
cmdb_id = label_data[label_data['level'] =='node']['cmdb_id'].drop_duplicates().tolist()
cmdb_id

In [None]:
node_metric_data1=pd.read_csv('../data/training_data_with_faults/tar/2022-03-20-cloudbed1/metric/node/kpi_cloudbed1_metric_0320.csv')
node_metric_data2=pd.read_csv('../data/training_data_with_faults/tar/2022-03-21-cloudbed1/metric/node/kpi_cloudbed1_metric_0321.csv')
node_metric_data=pd.concat([node_metric_data1,node_metric_data2])

# node_metric_data = pd.read_csv(
#     '../data/training_data_normal/cloudbed-1/metric/node/kpi_cloudbed1_metric_0319.csv')

node_metric_data['datetime'] = pd.to_datetime(
    node_metric_data['timestamp'], unit='s')

node_metric_data

In [None]:
processed_node_metric_data=list(node_metric_data.groupby('kpi_name'))

for kpi_name,kpi_data in tqdm(processed_node_metric_data):
    kpi_data.sort_values(by=['cmdb_id', 'timestamp'], inplace=True)
    kpi_data.reset_index(drop=True, inplace=True)
    # processed_data_path = f'../data/training_data_normal/processed_node_metric_data/'
    processed_data_path = f'../data/training_data_with_faults/tar/processed_node_metric_data/'
    os.makedirs(processed_data_path, exist_ok=True)
    kpi_data.to_csv(processed_data_path+kpi_name +'_metrics.csv', index=False)


In [None]:
categories=['cpu&load','mem','disk&fs','io','net&can_connect&tcp&udp','swap','os&user','process']
colors = ['blue', 'green', 'orange', 'purple', 'brown','pink', 'gray', 'olive', 'cyan', 'magenta']

for cat in tqdm(categories):
    node_data=pd.DataFrame()
    cat_list=cat.split('&')
    for c in cat_list:
        node_metric_data['kpi_cat'] = node_metric_data['kpi_name'].apply(
            lambda x: x.split('.')[1])
        node_data = pd.concat(
            [node_data, node_metric_data[node_metric_data['kpi_cat'] == c]])
        
    kpi_num = len(node_data['kpi_name'].drop_duplicates())
    cmdb_num = len(node_data['cmdb_id'].drop_duplicates())
    
    processed_node_data=list(node_data.groupby('cmdb_id'))
    
    visualization_path = f'../result/visualization/with_faults/node/'+cat+'/'
    os.makedirs(visualization_path, exist_ok=True)
    
    for node_id, kpi_data in processed_node_data:
        fig = make_subplots(rows=kpi_num, cols=1, shared_xaxes=True,
                            subplot_titles=kpi_data['kpi_name'].drop_duplicates().sort_values().tolist())
        shapes = []
        
        kpi_data.sort_values(by='timestamp', inplace=True)
        kpi_data.reset_index(drop=True, inplace=True)
        
        label_data_i = label_data[(label_data['level'] == 'node') & (
            label_data['cmdb_id'] == node_id)]
        
        kpi_data = list(kpi_data.groupby(['kpi_name']))
        
        for i,(kpi,data) in enumerate(kpi_data):
            data.sort_values(by='timestamp', inplace=True)
            data.reset_index(drop=True, inplace=True)

            shapes += create_shapes(
                label_data_i['datetime'], _min=0, _max=data['value'].max(), xref='x'+str(i+1), yref='y'+str(i+1))
            fig.append_trace(go.Scatter(x=data['datetime'], y=data['value'], name=kpi,
                                        line=dict(color=colors[i % 10], width=1.5), mode='lines'), row=i+1, col=1)

        fig.update_layout(title_text=node_id, shapes=shapes)
        pio.write_html(fig, file=visualization_path+node_id+'.html')


#### container级别

In [None]:
label_data = pd.read_csv('../data/label/label1.csv')
label_data['datetime'] = pd.to_datetime(label_data['datetime'])

cmdb_id_service = label_data[label_data['level'] ==
                     'service']['cmdb_id'].drop_duplicates().tolist()

cmdb_id_raw = label_data[label_data['level'] ==
                     'service']['cmdb_id'].drop_duplicates().tolist()
cmdb_id_pod = []
pre = ['', '2']
for id in cmdb_id_raw:
    for p in pre:
        for i in range(3):
            cmdb_id_pod.append(id+f'{p}-{i}')

cmdb_id_node = label_data[label_data['level'] ==
                     'node']['cmdb_id'].drop_duplicates().tolist()

# 查看后发现container的network类的指标多了这一类的cmdb_id，但是再label中没有此类cmdb_id
cmdb_id_pod.append('redis-cart')
cmdb_id_pod

In [None]:
dir_path1 = f'../data/training_data_with_faults/tar/2022-03-20-cloudbed1/metric/container/'
dir_path2 = f'../data/training_data_with_faults/tar/2022-03-21-cloudbed1/metric/container/'
dir_content = os.listdir(dir_path1)
categories = ['cpu', 'memory', 'fs', 'network',
              'spec', 'threads&processes&ulimits']
colors = ['blue', 'green', 'orange', 'purple', 'brown',
          'pink', 'gray', 'olive', 'cyan', 'magenta']


In [None]:
for cat in tqdm(categories):
    file_name_set = [i for i in dir_content if i.split('.')[0].split('_')[2] in cat]
    container_data=pd.DataFrame()
    for filename in file_name_set:
        file_path1=dir_path1+filename
        file_path2=dir_path2+filename
        container_data1=pd.read_csv(file_path1)
        container_data2=pd.read_csv(file_path2)
        container_data = pd.concat(
            [container_data,container_data1, container_data2])
    
    container_data['datetime'] = pd.to_datetime(container_data['timestamp'], unit='s')
    container_data['cmdb_id_pod'] = container_data['cmdb_id']
    container_data['cmdb_id_service'] = container_data['cmdb_id']
    container_data['cmdb_id_node'] = container_data['cmdb_id']
    
    for i in cmdb_id_pod:
        container_data['cmdb_id_pod'] = container_data['cmdb_id_pod'].apply(
            lambda x: i if i in x else x)
    
    for i in cmdb_id_service:
        container_data['cmdb_id_service'] = container_data['cmdb_id_service'].apply(
            lambda x: i if i in x else x)
        
    # for i in cmdb_id_node:
    #     container_data['cmdb_id_node'] = container_data['cmdb_id_node'].apply(
    #         lambda x: i if i in x else x)
        
    processed_container_data = list(container_data.groupby(['cmdb_id_pod']))
    
    for id, kpi_data in processed_container_data:
        kpi_data.sort_values(by='timestamp', inplace=True)
        kpi_data.reset_index(drop=True, inplace=True)

        cmdb_num = len(kpi_data['cmdb_id'].drop_duplicates())
        kpi_num = len(kpi_data['kpi_name'].drop_duplicates())
        
        service_ids=kpi_data['cmdb_id_service'].drop_duplicates().tolist()
        # node_ids=kpi_data['cmdb_id_node'].drop_duplicates().tolist()
        
        service_id = service_ids[0] if len(service_ids)==1 else ''
        # node_id = node_ids[0] if len(service_ids) == 1 else ''
        
        label_data_pod = label_data[(label_data['level'] == 'pod') & (
            label_data['cmdb_id'] == id)]
        label_data_service = label_data[(label_data['level'] == 'service') & (
            label_data['cmdb_id'] == service_id)]
        # label_data_node = label_data[(label_data['level'] == 'node') & (
        #     label_data['cmdb_id'] == node_id)]

        visualization_path = f'../result/visualization/with_faults/container/'+cat+'/'
        os.makedirs(visualization_path, exist_ok=True)

        fig = make_subplots(rows=kpi_num, cols=1, shared_xaxes=True,
                            subplot_titles=kpi_data['kpi_name'].drop_duplicates().sort_values().tolist())
        
        shapes = []

        kpi_data_temp = list(kpi_data.groupby(['kpi_name']))
        kpi_data = list(kpi_data.groupby(['kpi_name', 'cmdb_id']))

        for i in range(kpi_num):
            shapes += create_shapes(
                label_data_service['datetime'], _min=0, _max=kpi_data_temp[i][1]['value'].max(),type='service', xref='x'+str(i+1), yref='y'+str(i+1))
            shapes += create_shapes(
                label_data_pod['datetime'], _min=0, _max=kpi_data_temp[i][1]['value'].max(), type='pod', xref='x'+str(i+1), yref='y'+str(i+1))
            # shapes += create_shapes(
            #     label_data_node['datetime'], _min=0, _max=kpi_data_temp[i][1]['value'].max(), type='node', xref='x'+str(i+1), yref='y'+str(i+1))
            for j in range(cmdb_num):
                ((kpi, cmdb), data) = kpi_data[i*cmdb_num+j]
                data.sort_values(by='timestamp', inplace=True)
                data.reset_index(drop=True, inplace=True)

                fig.append_trace(go.Scatter(x=data['datetime'], y=data['value'], name=cmdb,
                                            line=dict(color=colors[j % 10], width=1.5), mode='lines', legendgroup="group1",
                                            legendgrouptitle_text="Data Type"), row = i+1, col = 1)

        fig.append_trace(go.Scatter(x=[data['datetime'][0]], y=[data['value'][0]], legendgroup='group2', legendgrouptitle_text='Label Type',
                                    name='service label', mode="lines", line=dict(color='red')), row=1, col=1)
        fig.append_trace(go.Scatter(x=[data['datetime'][0]], y=[data['value'][0]], legendgroup='group2', legendgrouptitle_text='Label Type',
                                name='pod label', mode="lines", line=dict(color='blue')), row=1, col=1)
        # fig.append_trace(go.Scatter(x=[data['datetime'][0]], y=[data['value'][0]], legendgroup='group2', legendgrouptitle_text='Label Type',
        #                             name='node label', mode="lines", line=dict(color='green')), row=1, col=1)
        
        fig.update_layout(title_text=id, shapes=shapes)
        pio.write_html(fig, file=visualization_path+id+'.html')


In [1]:
import schedule
import time
def main():
    print(123)
    time.sleep(120)
    
schedule.every().minute.at(':59').do(main)
while True:
    schedule.run_pending()
    time.sleep(1)

123


KeyboardInterrupt: 

In [None]:
import schedule
import time
i = 0


def hello():
    print('Hello world')


def some_task():
    global i
    i += 1
    print(i)
    if i == 5:
        schedule.clear()
        print('clear all jobs')
        time.sleep(30)
        schedule.every().second.do(hello)
        schedule.every().second.do(some_task)

schedule.every().second.do(hello)
schedule.every().second.do(some_task)
while True:
    schedule.run_pending()


In [1]:
import schedule
import time
import traceback

FLAG = False
i=0

def hello():
    current_time = int(time.time())
    print(time.strftime('%H:%M:%S', time.localtime(current_time)))
    global FLAG
    global i
    i += 1
    if i % 2==1:
        FLAG=True


schedule.every().minute.at(':59').do(hello)

while True:
    try:
        if FLAG:
            FLAG=False
            schedule.clear()
            print('clear all jobs')
            time.sleep(70)
            schedule.every().minute.at(':59').do(hello)
        schedule.run_pending()
    except Exception as e:
        print('!!!ERROR!!!')
        schedule.clear()
        print('error clear all jobs')
        print(e)
        print(traceback.format_exc())
        FLAG = False
        i = 0
        print('wait for 10s')
        time.sleep(10)
        schedule.every().minute.at(':59').do(hello)


14:56:59
clear all jobs
14:58:59


KeyboardInterrupt: 

In [10]:
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler
# df_60min = pd.DataFrame([[1, 2, 3], [1, 5, 6], [1, 8, 9], [1, 2, 3], [
#                         1, 5, 6], [1, 8, 9], [1, 2, 3], [1, 5, 6], [1, 8, 9], [1, 2, 3], [1, 5, 6], [1, 8, 9],[10, 11, 12]], columns=['a', 'b', 'c'])
# sigma_n = 3
# df_60min_mean = np.mean(df_60min.values,axis=0)
# df_60min_std = np.std(df_60min.values,axis=0)
# display(df_60min_mean,df_60min_std)
# threshold1 = df_60min_mean - sigma_n * df_60min_std
# threshold2 = df_60min_mean + sigma_n * df_60min_std
# display(threshold1,threshold2)
# for i in range(3):
# 	df_60min.iloc[:, i]=df_60min.iloc[:, i].apply(lambda x: df_60min_mean[i] if x < threshold1[i] or x > threshold2[i] else x)
# df_60min

# display(d)
# d=d.diff()
# display(d)
# d.iloc[-1:,:]
# kpi_list = joblib.load('../../data/kpi_list.pkl')
# kpi_list = kpi_list[:1000]
# display(len(kpi_list))
# df_kpi = pd.DataFrame(
#     kpi_list, columns=['timestamp', 'cmdb_id', 'kpi_name', 'value'])
# df_kpi

a = pd.DataFrame([[1, 1, 1, 1], [2, 2, 2, 3], [3, 3, 3, 3]])
a=a.diff()
display(a)
online_std_scaler = StandardScaler()


def noise_clean(df, std):
    df = df.copy()

    # 过滤异常值
    sigma_n = 3
    df_mean = np.mean(df.values, axis=0)
    df_std = np.std(df.values, axis=0)
    threshold1 = df_mean - sigma_n * df_std
    threshold2 = df_mean + sigma_n * df_std
    for i in range(4):
        df.iloc[:, i] = df.iloc[:, i].apply(
            lambda x: df_mean[i] if x < threshold1[i] or x > threshold2[i] else x)

    random_nums = []
    for i in range(4):
        random_nums.append(np.random.normal(0, 0.01*std[i], size=3))
    random_nums = np.array(random_nums).T
    print(random_nums)
    df = df + random_nums

    return df


b = noise_clean(a, [1, 1, 1, 1])
display(b)
online_std_scaler.fit(b.values)
display(online_std_scaler.mean_)
online_std_scaler.var_

Unnamed: 0,0,1,2,3
0,,,,
1,1.0,1.0,1.0,2.0
2,1.0,1.0,1.0,0.0


[[ 0.00528635  0.0059857   0.00190149 -0.00515017]
 [ 0.01114795  0.00665745  0.00207141  0.00731995]
 [-0.00801557  0.01379005 -0.01771506  0.01380453]]


Unnamed: 0,0,1,2,3
0,,,,
1,1.011148,1.006657,1.002071,2.00732
2,0.991984,1.01379,0.982285,0.013805


array([1.00156619, 1.01022375, 0.99217818, 1.01056224])

array([9.18101691e-05, 1.27185162e-05, 9.78760647e-05, 9.93525931e-01])

In [17]:
import joblib
import numpy as np
import pandas as pd

pd.set_option('display.max_columns', None)

a=joblib.load('../../model/spot/spot.pkl')
display(a)

b=pd.read_csv('../../data/df_57_test-2.csv')
e=pd.read_csv('../../data/df_57_test.csv')
f = pd.read_csv('../../data/df_57_test-3.csv')
g = pd.read_csv('../../data/df_57_test-4.csv')
h = pd.read_csv('../../data/df_57_test-5.csv')
i = pd.read_csv('../../data/df_57_test-8.csv')
l = pd.read_csv('../../data/df_57_test1.csv')
j = pd.read_csv('../../data/df_57_test2.csv')
k = pd.read_csv('../../data/df_57_test3.csv')

c = pd.read_csv('../../data/df_57_train.csv')
c=c.iloc[:1440,:]
c=c.apply(lambda x:np.sort(x.values))
# display(c[1350:1400])
# b[b['timestamp']==1647851040]
pd.concat([a,b,e,f,g,h,i,l,j,k])


Unnamed: 0,node-1,node-2,node-3,node-4,node-5,node-6,adservice-grpc,adservice-http,cartservice-grpc,checkoutservice-grpc,currencyservice-grpc,emailservice-grpc,frontend-http,paymentservice-grpc,productcatalogservice-grpc,recommendationservice-grpc,shippingservice-grpc,adservice-0,adservice-1,adservice-2,adservice2-0,cartservice-0,cartservice-1,cartservice-2,cartservice2-0,checkoutservice-0,checkoutservice-2,checkoutservice-1,checkoutservice2-0,currencyservice-0,currencyservice-1,currencyservice-2,currencyservice2-0,emailservice-0,emailservice-1,emailservice-2,emailservice2-0,frontend-0,frontend-1,frontend-2,frontend2-0,paymentservice-0,paymentservice-1,paymentservice-2,paymentservice2-0,productcatalogservice-0,productcatalogservice-1,productcatalogservice-2,productcatalogservice2-0,recommendationservice-0,recommendationservice-1,recommendationservice-2,recommendationservice2-0,shippingservice-0,shippingservice-1,shippingservice-2,shippingservice2-0
0,3.311579,2.695983,2.297362,3.278348,1.863397,3.550096,2.583068,3.211694,2.249804,2.506688,4.422558,4.892056,4.515652,2.610314,2.600173,3.918648,3.32588,3.016773,3.162674,4.753863,3.398221,3.612861,3.475039,4.173957,4.28698,3.467843,2.19957,1.847026,2.334042,3.129835,2.820185,2.808198,3.617343,1.273137,0.803803,3.153075,1.298777,2.240575,2.10713,2.517187,3.294747,3.0815,1.082393,4.132457,4.483114,2.711729,3.437144,2.984085,2.369322,2.494014,3.016993,1.293198,3.724876,3.287005,1.213797,2.724835,1.142008


Unnamed: 0,node-1,node-2,node-3,node-4,node-5,node-6,adservice-grpc,adservice-http,cartservice-grpc,checkoutservice-grpc,currencyservice-grpc,emailservice-grpc,frontend-http,paymentservice-grpc,productcatalogservice-grpc,recommendationservice-grpc,shippingservice-grpc,adservice-0,adservice-1,adservice-2,adservice2-0,cartservice-0,cartservice-1,cartservice-2,cartservice2-0,checkoutservice-0,checkoutservice-2,checkoutservice-1,checkoutservice2-0,currencyservice-0,currencyservice-1,currencyservice-2,currencyservice2-0,emailservice-0,emailservice-1,emailservice-2,emailservice2-0,frontend-0,frontend-1,frontend-2,frontend2-0,paymentservice-0,paymentservice-1,paymentservice-2,paymentservice2-0,productcatalogservice-0,productcatalogservice-1,productcatalogservice-2,productcatalogservice2-0,recommendationservice-0,recommendationservice-1,recommendationservice-2,recommendationservice2-0,shippingservice-0,shippingservice-1,shippingservice-2,shippingservice2-0,timestamp
0,3.311579,2.695983,2.297362,3.278348,1.863397,3.550096,2.583068,3.211694,2.249804,2.506688,4.422558,4.892056,4.515652,2.610314,2.600173,3.918648,3.32588,3.016773,3.162674,4.753863,3.398221,3.612861,3.475039,4.173957,4.28698,3.467843,2.19957,1.847026,2.334042,3.129835,2.820185,2.808198,3.617343,1.273137,0.803803,3.153075,1.298777,2.240575,2.10713,2.517187,3.294747,3.0815,1.082393,4.132457,4.483114,2.711729,3.437144,2.984085,2.369322,2.494014,3.016993,1.293198,3.724876,3.287005,1.213797,2.724835,1.142008,
0,0.690254,0.592266,0.593896,0.889879,0.41073,0.230238,0.899038,0.054279,1.210797,0.613857,0.565871,2.139669,1.509226,1.466981,1.42374,2.814836,1.121387,0.293089,0.476607,0.398983,0.449259,0.847835,0.77078,1.14725,0.501033,0.913335,0.421383,0.609748,0.516597,0.622988,0.395075,2.191311,0.411552,0.378856,0.298587,0.218926,0.252718,1.192754,1.245966,0.494891,1.150211,0.58341,0.416,0.509503,1.848543,0.893852,1.520522,1.694173,0.656251,0.472852,0.311731,0.158032,0.375052,0.571637,0.351822,0.607835,0.222518,1651305000.0
0,0.397707,0.460924,0.989055,0.745922,0.386684,0.485784,1.807864,0.054279,1.556354,0.005583,1.003493,0.577462,1.297389,1.124526,1.244716,1.323449,3.414971,0.588618,0.624361,9.003374,0.527968,0.689002,0.869738,0.839579,0.371252,0.48528,2.897809,0.872904,0.864698,2.197722,0.855823,1.255147,0.53673,0.223533,0.338794,0.43225,0.290287,0.890812,0.581819,1.123314,0.979799,0.300269,0.698144,2.062348,3.241713,0.623883,1.930109,1.590086,0.913723,2.094687,0.247519,0.695059,0.406759,0.551529,0.535862,0.415418,0.79995,1651298000.0
0,0.595935,0.438502,0.544759,0.594141,0.638869,0.722173,0.554523,0.054279,3.022278,0.557544,3.495508,1.535862,1.809567,1.754926,0.473538,1.159522,0.739683,0.79668,0.360769,0.883754,0.752946,1.908496,1.238837,0.654371,0.613208,0.278813,0.463806,1.092728,0.424823,0.402716,0.595726,1.012071,1.336765,0.346961,0.365044,0.256385,0.19992,1.114477,1.046465,0.292274,1.504215,0.405267,0.531072,0.999216,0.633956,0.496511,0.921376,1.145502,0.668068,1.341723,0.762053,0.553607,0.697162,0.424611,0.658037,0.655311,0.41249,1651305000.0
0,0.439709,0.728736,0.392129,0.372456,0.167316,0.364603,1.883367,0.054279,2.676721,0.613857,1.975987,3.347282,1.950792,2.906707,0.366495,1.987179,3.331408,0.221957,0.854594,2.113716,0.493802,2.863045,0.887388,0.950458,3.049139,0.446523,0.384192,0.82334,0.338637,0.565123,3.054939,0.45378,1.078834,0.374961,0.302601,0.273323,0.275313,1.254845,0.780952,0.458844,0.572718,0.799371,0.39263,1.250861,0.48699,2.14267,0.93654,2.199481,0.863373,0.81777,0.507142,0.580673,0.521044,0.475709,0.502585,0.383494,0.218118,1651306000.0
0,0.371258,0.568352,0.487,0.754138,0.195104,0.190495,0.899038,0.054279,1.380881,0.782795,2.778294,3.649186,1.032828,2.330817,1.189632,3.849407,3.215091,0.254522,5.908052,0.267412,1.185894,6.986616,1.385912,3.637155,0.388534,1.019511,0.689251,1.245747,0.608196,1.252696,0.573528,3.645202,1.199893,0.438867,0.185172,0.158806,0.513257,1.064497,0.618441,0.793191,0.835454,0.389523,0.500637,0.485236,0.190461,0.545192,1.430288,1.105327,1.122943,0.522593,1.667368,0.422918,0.375247,0.526463,0.340764,0.353093,0.718471,1651307000.0
0,0.454396,0.280451,0.497414,0.514709,0.325936,0.354544,0.111576,0.054279,0.085042,0.613857,1.550521,0.328249,1.103441,1.466981,0.955524,0.849151,1.063229,0.387984,0.594792,9.911684,0.614611,0.8568,1.48535,0.865025,0.438918,0.537179,0.98745,0.569099,0.528615,0.837242,1.341366,0.33631,1.433958,0.307127,0.275805,0.436473,0.288033,0.624166,1.327552,0.311418,0.682357,0.85723,0.51418,1.464075,0.593829,0.865096,1.849667,0.983719,1.050202,0.783959,0.623762,0.422117,0.587269,0.486036,0.454673,0.819271,0.566372,1651308000.0
0,0.472598,0.450372,0.377776,0.847051,0.320912,0.278178,0.554523,0.054279,1.812828,0.332293,1.173679,1.535862,1.174053,0.89109,1.963936,0.228408,1.528497,0.551766,0.422181,2.094731,0.230762,1.181041,1.242937,0.6932,0.400579,0.639894,0.46326,0.693389,1.324037,1.903491,0.751381,0.833609,1.139288,0.410322,0.290805,0.358528,0.30548,0.834258,0.89297,0.224571,1.25211,0.610822,0.256986,1.014033,0.414689,1.501968,1.129812,0.484158,1.263222,0.39897,0.437538,0.652857,1.281444,0.427098,0.275371,0.76401,0.453148,1651463000.0
0,0.380868,0.466007,0.442141,0.432339,0.426308,0.340394,1.194198,0.122654,0.236379,0.483568,0.298441,0.326611,0.549127,0.308088,0.326621,0.022968,0.65644,0.444408,4.802134,0.813092,1.392133,0.537593,0.734488,0.772058,0.72447,0.624473,0.562462,0.930771,0.726328,1.602277,0.636339,0.807656,0.264703,0.460481,0.335996,0.295779,0.388369,0.57451,0.652447,0.7349,0.787599,1.104495,0.331605,0.453154,0.213619,1.144776,1.287565,1.391161,0.709098,0.716742,0.398233,0.504441,0.981964,0.392621,0.586403,0.685745,0.700253,1651464000.0
0,0.548805,0.684296,0.801621,0.806323,1.024158,0.657119,0.970593,0.031823,2.250201,0.223018,0.540346,0.025481,1.793699,0.262844,1.713176,0.942254,0.331906,0.358514,0.556651,0.486166,0.606054,0.669068,0.630396,1.020141,0.326386,0.467112,0.688274,0.883062,0.431361,1.140584,0.129679,1.290267,0.571794,0.242792,0.229839,0.291559,0.335876,1.517739,0.974012,0.955953,2.531402,0.30496,0.264234,0.477819,0.921304,1.673445,1.858757,0.896557,0.832193,0.509362,0.364701,0.264956,0.367564,0.47154,0.518994,0.622359,0.574277,1651808000.0


In [16]:
import pprint as pp
nodes = ['node-1', 'node-2', 'node-3', 'node-4', 'node-5', 'node-6']
services = ['adservice-grpc', 'adservice-http', 'cartservice-grpc', 'checkoutservice-grpc', 'currencyservice-grpc',
            'emailservice-grpc', 'frontend-http', 'paymentservice-grpc', 'productcatalogservice-grpc',
            'recommendationservice-grpc', 'shippingservice-grpc']
pods = ['adservice-0', 'adservice-1', 'adservice-2', 'adservice2-0', 'cartservice-0', 'cartservice-1', 'cartservice-2',
        'cartservice2-0', 'checkoutservice-0', 'checkoutservice-2', 'checkoutservice-1', 'checkoutservice2-0',
        'currencyservice-0', 'currencyservice-1', 'currencyservice-2', 'currencyservice2-0', 'emailservice-0',
        'emailservice-1', 'emailservice-2', 'emailservice2-0', 'frontend-0', 'frontend-1', 'frontend-2', 'frontend2-0',
        'paymentservice-0', 'paymentservice-1', 'paymentservice-2', 'paymentservice2-0', 'productcatalogservice-0',
        'productcatalogservice-1', 'productcatalogservice-2', 'productcatalogservice2-0', 'recommendationservice-0',
        'recommendationservice-1', 'recommendationservice-2', 'recommendationservice2-0', 'shippingservice-0',
        'shippingservice-1', 'shippingservice-2', 'shippingservice2-0']
threshold_list = {i: 1 for i in nodes+services+pods}
for id, _ in threshold_list.items():
        if id in ['recommendationservice-grpc']:
            threshold_list[id] = threshold_list[id]*15
            
        elif id in ['adservice-2']:
            threshold_list[id] = threshold_list[id]*3
            
        elif id in ['productcatalogservice-grpc', 'cartservice-grpc', 'productcatalogservice-0', 'productcatalogservice-1', 'productcatalogservice-2']:
            threshold_list[id] = threshold_list[id]*2
            
        elif id in ['node-3','node-5', 'currencyservice-grpc', 'shippingservice-grpc', 'checkoutservice-1', 'frontend-0', 'frontend-1', 'frontend-2', 'recommendationservice-0', 'shippingservice-1', 'currencyservice-0']:
            threshold_list[id] = threshold_list[id]*1.5
            
        elif id in ['checkoutservice-0', 'recommendationservice2-0', 'cartservice-2']:
            threshold_list[id] = threshold_list[id]/1.2
            
        elif id in ['node-1', 'node-2', 'node-6', 'paymentservice2-0', 'shippingservice-2', 'adservice2-0', 'cartservice-1', 'currencyservice2-0', 'emailservice-2', 'paymentservice-0']:
            threshold_list[id] = threshold_list[id]/1.8
        
        elif id in ['shippingservice-0', 'cartservice-0']:
            threshold_list[id] = threshold_list[id]/2
            
        elif id in ['emailservice-grpc', 'cartservice2-0', 'paymentservice-2', 'adservice-0', 'adservice-1']:
            threshold_list[id] = threshold_list[id]/3
            
joblib.dump(threshold_list, '../../model/spot/threshold_rate.pkl')
pp.pprint(threshold_list)


{'adservice-0': 0.3333333333333333,
 'adservice-1': 0.3333333333333333,
 'adservice-2': 3,
 'adservice-grpc': 1,
 'adservice-http': 1,
 'adservice2-0': 0.5555555555555556,
 'cartservice-0': 0.5,
 'cartservice-1': 0.5555555555555556,
 'cartservice-2': 0.8333333333333334,
 'cartservice-grpc': 2,
 'cartservice2-0': 0.3333333333333333,
 'checkoutservice-0': 0.8333333333333334,
 'checkoutservice-1': 1.5,
 'checkoutservice-2': 1,
 'checkoutservice-grpc': 1,
 'checkoutservice2-0': 1,
 'currencyservice-0': 1.5,
 'currencyservice-1': 1,
 'currencyservice-2': 1,
 'currencyservice-grpc': 1.5,
 'currencyservice2-0': 0.5555555555555556,
 'emailservice-0': 1,
 'emailservice-1': 1,
 'emailservice-2': 0.5555555555555556,
 'emailservice-grpc': 0.3333333333333333,
 'emailservice2-0': 1,
 'frontend-0': 1.5,
 'frontend-1': 1.5,
 'frontend-2': 1.5,
 'frontend-http': 1,
 'frontend2-0': 1,
 'node-1': 0.5555555555555556,
 'node-2': 0.5555555555555556,
 'node-3': 1.5,
 'node-4': 1,
 'node-5': 1.5,
 'node-6': 0

In [31]:
a=np.array([[1,2,3],[3,2,4],[2,5,6]])
np.sort(a,axis=0)

array([[1, 2, 3],
       [2, 3, 4],
       [2, 5, 6]])

In [6]:
import pandas as pd
import numpy as np
a = pd.DataFrame([[2, 3, 1], [2, 3, np.NAN], [2, 3, np.NAN],
                 [2, 3, 1]], columns=['a', 'b', 'c'])
display(a)
# a=a.diff()
a['c']= a['c'].ffill().bfill()
a
# a[['a','b']].apply(lambda x: x.mean(),axis=1)

Unnamed: 0,a,b,c
0,2,3,1.0
1,2,3,
2,2,3,
3,2,3,1.0


Unnamed: 0,a,b,c
0,2,3,1.0
1,2,3,1.0
2,2,3,1.0
3,2,3,1.0


In [85]:
import joblib
std_scaler = joblib.load(
    '../../model/scaler/offline_std_scaler3.pkl')
mean = std_scaler.mean_
mean[24]

2.421316481204662