# 数据可视化


In [4]:
import os
import pandas as pd
import warnings
from tqdm.notebook import tqdm, trange
import random
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import plotly_express as px
import plotly as py
import plotly.io as pio
from plotly.offline import download_plotlyjs, init_notebook_mode
import plotly.graph_objs as go
from plotly.subplots import make_subplots

warnings.filterwarnings("ignore")

py.offline.init_notebook_mode(connected=True)
tqdm.pandas()


## 画图函数

In [None]:
def create_shapes(starts, _min, _max, type=None, xref=None, yref=None):
    if type == 'service':
        color = 'red'
    elif type=='pod':
        color = 'blue'
    elif type == 'node':
        color = 'green'
    else:
        color = 'red'
        
    if _min==_max:
        _min=0
        _max=1
    
    shapes = []

    for r in starts:
        w = timedelta(minutes=10)
        x0 = r
        x1 = r + w
        shape = {
            'type': 'rect',
            'x0': x0,
            'y0': _min,
            'x1': x1,
            'y1': _max,
            'fillcolor': color,
            'opacity': 0.3,
            'line': {
                'width': 0,
            },
        }
        if xref is not None:
            shape['xref'] = xref
            shape['yref'] = yref

        shapes.append(shape)

    return shapes

## 读取数据

### 标签数据

In [5]:
label_data1 = pd.read_csv(
    '../../data/training_data_with_faults/groundtruth/groundtruth-k8s-1-2022-03-20.csv')
label_data2 = pd.read_csv(
    '../../data/training_data_with_faults/groundtruth/groundtruth-k8s-1-2022-03-21.csv')



In [18]:
label_data = pd.concat(
    [label_data1, label_data2])
label_data


[28, 26, 26]

In [None]:
label_data.sort_values(by=['level', 'cmdb_id','timestamp'], inplace=True)
label_data.reset_index(drop=True, inplace=True)
label_data['datetime'] = pd.to_datetime(
    label_data['timestamp'], unit='s')

label_data.to_csv('../data/label/label1.csv', index=False)
label_data


### 业务指标

#### service级别

In [None]:
label_data = pd.read_csv('../data/label/label1.csv')
label_data['datetime'] = pd.to_datetime(label_data['datetime'])
label_data

In [4]:
# service_metric_data1 = pd.read_csv(
#     '../data/training_data_normal/cloudbed-1/metric/service/metric_service.csv')
# service_metric_data2 = pd.read_csv(
#     '../data/training_data_normal/cloudbed-2/metric/service/metric_service.csv')
# service_metric_data3 = pd.read_csv(
#     '../data/training_data_normal/cloudbed-3/metric/service/metric_service.csv')


service_metric_data1 = pd.read_csv(
    '../../data/training_data_with_faults/tar/2022-03-20-cloudbed1/metric/service/metric_service.csv')
service_metric_data2 = pd.read_csv(
    '../../data/training_data_with_faults/tar/2022-03-21-cloudbed1/metric/service/metric_service.csv')
# service_metric_data3 = pd.read_csv(
#     '../data/training_data_with_faults/tar/2022-03-20-cloudbed2/metric/service/metric_service.csv')
# service_metric_data4 = pd.read_csv(
#     '../data/training_data_with_faults/tar/2022-03-21-cloudbed2/metric/service/metric_service.csv')
# service_metric_data5 = pd.read_csv(
#     '../data/training_data_with_faults/tar/2022-03-20-cloudbed3/metric/service/metric_service.csv')
# service_metric_data6 = pd.read_csv(
#     '../data/training_data_with_faults/tar/2022-03-21-cloudbed3/metric/service/metric_service.csv')
# service_metric_data7 = pd.read_csv(
#     '../data/training_data_with_faults/tar/2022-03-24-cloudbed3/metric/service/metric_service.csv')

# service_metric_data=pd.concat([service_metric_data1,service_metric_data2,service_metric_data3])
# service_metric_data = pd.concat(
#     [service_metric_data1, service_metric_data2, service_metric_data3, service_metric_data4, service_metric_data5,service_metric_data6,service_metric_data7])
service_metric_data = pd.concat(
    [service_metric_data1, service_metric_data2])
# service_metric_data=service_metric_data1
service_metric_data


Unnamed: 0,service,timestamp,rr,sr,mrt,count
0,adservice-grpc,1647716400,100.0,100.0,2.429508,61
1,adservice-grpc,1647716460,100.0,100.0,2.429508,61
2,adservice-grpc,1647716520,100.0,100.0,2.332967,91
3,adservice-grpc,1647716580,100.0,100.0,2.647015,67
4,adservice-grpc,1647716640,100.0,100.0,2.510000,85
...,...,...,...,...,...,...
15835,frontend-http,1647820500,100.0,100.0,53.037687,134
15836,frontend-http,1647820560,100.0,100.0,247.060484,124
15837,frontend-http,1647820620,100.0,100.0,549.454167,120
15838,frontend-http,1647820680,100.0,100.0,883.313559,118


In [None]:
processed_service_metric_data = list(service_metric_data.groupby('service'))
for (service_name, service_data) in tqdm(processed_service_metric_data):
    service_data.sort_values(by='timestamp', inplace=True)
    service_data.reset_index(drop=True, inplace=True)
    service_data['datetime'] = pd.to_datetime(
        service_data['timestamp'], unit='s')

    # processed_data_path = f'../data/training_data_normal/processed_service_metric_data/'
    processed_data_path = f'../data/training_data_with_faults/tar/processed_service_metric_data/'
    os.makedirs(processed_data_path, exist_ok=True)
    service_data.to_csv(processed_data_path+service_name +
                        '_metrics.csv', index=False)


In [None]:
processed_service_metric_data = list(service_metric_data.groupby('service'))
for (service_name,service_data) in tqdm(processed_service_metric_data):
    service_data.sort_values(by='timestamp', inplace=True)
    service_data.reset_index(drop=True, inplace=True)
    service_data['datetime'] = pd.to_datetime(
        service_data['timestamp'], unit='s')
    
    visualization_path = f'../result/visualization/with_faults/service/'
    os.makedirs(visualization_path, exist_ok=True)
    
    label_data_service= label_data[(label_data['level'] == 'service') & (
        label_data['cmdb_id'] == service_name.split('-')[0])]
    label_data_pod=label_data[(label_data['level'] == 'pod') & (
        label_data['cmdb_id'].str.contains(service_name.split('-')[0]))]
    
    fig = make_subplots(rows=4, cols=1, shared_xaxes=True)
    shapes = []
    
    fig.append_trace(go.Scatter(x=service_data['datetime'], y=service_data['rr'], name='rr',
                                marker=dict(color='rgb(255, 127, 14, 1)', size=1), mode='markers',legendgroup="group1",
                                legendgrouptitle_text="Data Type"), row=1, col=1)
    shapes += create_shapes(label_data_service['datetime'], _min=0,
                            _max=service_data['rr'].max(), type='service', xref='x1', yref='y1')
    shapes += create_shapes(label_data_pod['datetime'], _min=0,
                            _max=service_data['rr'].max(), type='pod', xref='x1', yref='y1')
    
    fig.append_trace(go.Scatter(x=service_data['datetime'], y=service_data['sr'], name='sr',
                                marker=dict(color='rgb(0, 204, 150, 1)', size=1), mode='markers', legendgroup="group1",
                                legendgrouptitle_text="Data Type"), row=2, col=1)
    shapes += create_shapes(label_data_service['datetime'], _min=0,
                            _max=service_data['sr'].max(), type='service', xref='x2', yref='y2')
    shapes += create_shapes(label_data_pod['datetime'], _min=0,
                            _max=service_data['sr'].max(), type='pod', xref='x2', yref='y2')
    
    fig.append_trace(go.Scatter(x=service_data['datetime'], y=service_data['mrt'], name='mrt',
                                line=dict(color='rgb(31, 119, 180, 1)', width=1), mode='lines', legendgroup="group1",
                                legendgrouptitle_text="Data Type"), row=3, col=1)
    shapes += create_shapes(label_data_service['datetime'], _min=0,
                            _max=service_data['mrt'].max(), type='service', xref='x3', yref='y3')
    shapes += create_shapes(label_data_pod['datetime'], _min=0,
                            _max=service_data['mrt'].max(), type='pod', xref='x3', yref='y3')
    
    fig.append_trace(go.Scatter(x=service_data['datetime'], y=service_data['count'], name='count',
                                line=dict(color='rgb(0, 0, 0, 1)', width=1), mode='lines', legendgroup="group1",
                                legendgrouptitle_text="Data Type"), row=4, col=1)
    shapes += create_shapes(label_data_service['datetime'], _min=0,
                            _max=service_data['count'].max(), type='service', xref='x4', yref='y4')
    shapes += create_shapes(label_data_pod['datetime'], _min=0,
                            _max=service_data['count'].max(), type='pod', xref='x4', yref='y4')
    
    fig.append_trace(go.Scatter(x=[service_data['datetime'][0]], y=[service_data['rr'][0]], legendgroup='group2', legendgrouptitle_text='Label Type',
                                name='service label', mode="lines", line=dict(color='red')), row=1, col=1)
    fig.append_trace(go.Scatter(x=[service_data['datetime'][0]], y=[service_data['rr'][0]], legendgroup='group2', legendgrouptitle_text='Label Type',
                                name='pod label',mode="lines",line=dict(color='blue')), row=1, col=1)
    
    fig.update_layout(title_text=service_name, shapes=shapes)
    pio.write_html(fig, file=visualization_path+service_name+'.html')
    

### 性能指标

#### node级别

In [None]:
label_data = pd.read_csv('../data/label/label1.csv')
label_data['datetime'] = pd.to_datetime(label_data['datetime'])
cmdb_id = label_data[label_data['level'] =='node']['cmdb_id'].drop_duplicates().tolist()
cmdb_id

In [None]:
node_metric_data1=pd.read_csv('../data/training_data_with_faults/tar/2022-03-20-cloudbed1/metric/node/kpi_cloudbed1_metric_0320.csv')
node_metric_data2=pd.read_csv('../data/training_data_with_faults/tar/2022-03-21-cloudbed1/metric/node/kpi_cloudbed1_metric_0321.csv')
node_metric_data=pd.concat([node_metric_data1,node_metric_data2])

# node_metric_data = pd.read_csv(
#     '../data/training_data_normal/cloudbed-1/metric/node/kpi_cloudbed1_metric_0319.csv')

node_metric_data['datetime'] = pd.to_datetime(
    node_metric_data['timestamp'], unit='s')

node_metric_data

In [None]:
processed_node_metric_data=list(node_metric_data.groupby('kpi_name'))

for kpi_name,kpi_data in tqdm(processed_node_metric_data):
    kpi_data.sort_values(by=['cmdb_id', 'timestamp'], inplace=True)
    kpi_data.reset_index(drop=True, inplace=True)
    # processed_data_path = f'../data/training_data_normal/processed_node_metric_data/'
    processed_data_path = f'../data/training_data_with_faults/tar/processed_node_metric_data/'
    os.makedirs(processed_data_path, exist_ok=True)
    kpi_data.to_csv(processed_data_path+kpi_name +'_metrics.csv', index=False)


In [None]:
categories=['cpu&load','mem','disk&fs','io','net&can_connect&tcp&udp','swap','os&user','process']
colors = ['blue', 'green', 'orange', 'purple', 'brown','pink', 'gray', 'olive', 'cyan', 'magenta']

for cat in tqdm(categories):
    node_data=pd.DataFrame()
    cat_list=cat.split('&')
    for c in cat_list:
        node_metric_data['kpi_cat'] = node_metric_data['kpi_name'].apply(
            lambda x: x.split('.')[1])
        node_data = pd.concat(
            [node_data, node_metric_data[node_metric_data['kpi_cat'] == c]])
        
    kpi_num = len(node_data['kpi_name'].drop_duplicates())
    cmdb_num = len(node_data['cmdb_id'].drop_duplicates())
    
    processed_node_data=list(node_data.groupby('cmdb_id'))
    
    visualization_path = f'../result/visualization/with_faults/node/'+cat+'/'
    os.makedirs(visualization_path, exist_ok=True)
    
    for node_id, kpi_data in processed_node_data:
        fig = make_subplots(rows=kpi_num, cols=1, shared_xaxes=True,
                            subplot_titles=kpi_data['kpi_name'].drop_duplicates().sort_values().tolist())
        shapes = []
        
        kpi_data.sort_values(by='timestamp', inplace=True)
        kpi_data.reset_index(drop=True, inplace=True)
        
        label_data_i = label_data[(label_data['level'] == 'node') & (
            label_data['cmdb_id'] == node_id)]
        
        kpi_data = list(kpi_data.groupby(['kpi_name']))
        
        for i,(kpi,data) in enumerate(kpi_data):
            data.sort_values(by='timestamp', inplace=True)
            data.reset_index(drop=True, inplace=True)

            shapes += create_shapes(
                label_data_i['datetime'], _min=0, _max=data['value'].max(), xref='x'+str(i+1), yref='y'+str(i+1))
            fig.append_trace(go.Scatter(x=data['datetime'], y=data['value'], name=kpi,
                                        line=dict(color=colors[i % 10], width=1.5), mode='lines'), row=i+1, col=1)

        fig.update_layout(title_text=node_id, shapes=shapes)
        pio.write_html(fig, file=visualization_path+node_id+'.html')


#### container级别

In [None]:
label_data = pd.read_csv('../data/label/label1.csv')
label_data['datetime'] = pd.to_datetime(label_data['datetime'])

cmdb_id_service = label_data[label_data['level'] ==
                     'service']['cmdb_id'].drop_duplicates().tolist()

cmdb_id_raw = label_data[label_data['level'] ==
                     'service']['cmdb_id'].drop_duplicates().tolist()
cmdb_id_pod = []
pre = ['', '2']
for id in cmdb_id_raw:
    for p in pre:
        for i in range(3):
            cmdb_id_pod.append(id+f'{p}-{i}')

cmdb_id_node = label_data[label_data['level'] ==
                     'node']['cmdb_id'].drop_duplicates().tolist()

# 查看后发现container的network类的指标多了这一类的cmdb_id，但是再label中没有此类cmdb_id
cmdb_id_pod.append('redis-cart')
cmdb_id_pod

In [None]:
dir_path1 = f'../data/training_data_with_faults/tar/2022-03-20-cloudbed1/metric/container/'
dir_path2 = f'../data/training_data_with_faults/tar/2022-03-21-cloudbed1/metric/container/'
dir_content = os.listdir(dir_path1)
categories = ['cpu', 'memory', 'fs', 'network',
              'spec', 'threads&processes&ulimits']
colors = ['blue', 'green', 'orange', 'purple', 'brown',
          'pink', 'gray', 'olive', 'cyan', 'magenta']


In [None]:
for cat in tqdm(categories):
    file_name_set = [i for i in dir_content if i.split('.')[0].split('_')[2] in cat]
    container_data=pd.DataFrame()
    for filename in file_name_set:
        file_path1=dir_path1+filename
        file_path2=dir_path2+filename
        container_data1=pd.read_csv(file_path1)
        container_data2=pd.read_csv(file_path2)
        container_data = pd.concat(
            [container_data,container_data1, container_data2])
    
    container_data['datetime'] = pd.to_datetime(container_data['timestamp'], unit='s')
    container_data['cmdb_id_pod'] = container_data['cmdb_id']
    container_data['cmdb_id_service'] = container_data['cmdb_id']
    container_data['cmdb_id_node'] = container_data['cmdb_id']
    
    for i in cmdb_id_pod:
        container_data['cmdb_id_pod'] = container_data['cmdb_id_pod'].apply(
            lambda x: i if i in x else x)
    
    for i in cmdb_id_service:
        container_data['cmdb_id_service'] = container_data['cmdb_id_service'].apply(
            lambda x: i if i in x else x)
        
    # for i in cmdb_id_node:
    #     container_data['cmdb_id_node'] = container_data['cmdb_id_node'].apply(
    #         lambda x: i if i in x else x)
        
    processed_container_data = list(container_data.groupby(['cmdb_id_pod']))
    
    for id, kpi_data in processed_container_data:
        kpi_data.sort_values(by='timestamp', inplace=True)
        kpi_data.reset_index(drop=True, inplace=True)

        cmdb_num = len(kpi_data['cmdb_id'].drop_duplicates())
        kpi_num = len(kpi_data['kpi_name'].drop_duplicates())
        
        service_ids=kpi_data['cmdb_id_service'].drop_duplicates().tolist()
        # node_ids=kpi_data['cmdb_id_node'].drop_duplicates().tolist()
        
        service_id = service_ids[0] if len(service_ids)==1 else ''
        # node_id = node_ids[0] if len(service_ids) == 1 else ''
        
        label_data_pod = label_data[(label_data['level'] == 'pod') & (
            label_data['cmdb_id'] == id)]
        label_data_service = label_data[(label_data['level'] == 'service') & (
            label_data['cmdb_id'] == service_id)]
        # label_data_node = label_data[(label_data['level'] == 'node') & (
        #     label_data['cmdb_id'] == node_id)]

        visualization_path = f'../result/visualization/with_faults/container/'+cat+'/'
        os.makedirs(visualization_path, exist_ok=True)

        fig = make_subplots(rows=kpi_num, cols=1, shared_xaxes=True,
                            subplot_titles=kpi_data['kpi_name'].drop_duplicates().sort_values().tolist())
        
        shapes = []

        kpi_data_temp = list(kpi_data.groupby(['kpi_name']))
        kpi_data = list(kpi_data.groupby(['kpi_name', 'cmdb_id']))

        for i in range(kpi_num):
            shapes += create_shapes(
                label_data_service['datetime'], _min=0, _max=kpi_data_temp[i][1]['value'].max(),type='service', xref='x'+str(i+1), yref='y'+str(i+1))
            shapes += create_shapes(
                label_data_pod['datetime'], _min=0, _max=kpi_data_temp[i][1]['value'].max(), type='pod', xref='x'+str(i+1), yref='y'+str(i+1))
            # shapes += create_shapes(
            #     label_data_node['datetime'], _min=0, _max=kpi_data_temp[i][1]['value'].max(), type='node', xref='x'+str(i+1), yref='y'+str(i+1))
            for j in range(cmdb_num):
                ((kpi, cmdb), data) = kpi_data[i*cmdb_num+j]
                data.sort_values(by='timestamp', inplace=True)
                data.reset_index(drop=True, inplace=True)

                fig.append_trace(go.Scatter(x=data['datetime'], y=data['value'], name=cmdb,
                                            line=dict(color=colors[j % 10], width=1.5), mode='lines', legendgroup="group1",
                                            legendgrouptitle_text="Data Type"), row = i+1, col = 1)

        fig.append_trace(go.Scatter(x=[data['datetime'][0]], y=[data['value'][0]], legendgroup='group2', legendgrouptitle_text='Label Type',
                                    name='service label', mode="lines", line=dict(color='red')), row=1, col=1)
        fig.append_trace(go.Scatter(x=[data['datetime'][0]], y=[data['value'][0]], legendgroup='group2', legendgrouptitle_text='Label Type',
                                name='pod label', mode="lines", line=dict(color='blue')), row=1, col=1)
        # fig.append_trace(go.Scatter(x=[data['datetime'][0]], y=[data['value'][0]], legendgroup='group2', legendgrouptitle_text='Label Type',
        #                             name='node label', mode="lines", line=dict(color='green')), row=1, col=1)
        
        fig.update_layout(title_text=id, shapes=shapes)
        pio.write_html(fig, file=visualization_path+id+'.html')


In [1]:
import schedule
import time
def main():
    print(123)
    time.sleep(120)
    
schedule.every().minute.at(':59').do(main)
while True:
    schedule.run_pending()
    time.sleep(1)

123


KeyboardInterrupt: 

In [None]:
import schedule
import time
i = 0


def hello():
    print('Hello world')


def some_task():
    global i
    i += 1
    print(i)
    if i == 5:
        schedule.clear()
        print('clear all jobs')
        time.sleep(30)
        schedule.every().second.do(hello)
        schedule.every().second.do(some_task)

schedule.every().second.do(hello)
schedule.every().second.do(some_task)
while True:
    schedule.run_pending()


In [1]:
import schedule
import time
import traceback

FLAG = False
i=0

def hello():
    current_time = int(time.time())
    print(time.strftime('%H:%M:%S', time.localtime(current_time)))
    global FLAG
    global i
    i += 1
    if i % 2==1:
        FLAG=True


schedule.every().minute.at(':59').do(hello)

while True:
    try:
        if FLAG:
            FLAG=False
            schedule.clear()
            print('clear all jobs')
            time.sleep(70)
            schedule.every().minute.at(':59').do(hello)
        schedule.run_pending()
    except Exception as e:
        print('!!!ERROR!!!')
        schedule.clear()
        print('error clear all jobs')
        print(e)
        print(traceback.format_exc())
        FLAG = False
        i = 0
        print('wait for 10s')
        time.sleep(10)
        schedule.every().minute.at(':59').do(hello)


14:56:59
clear all jobs
14:58:59


KeyboardInterrupt: 

In [10]:
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler
# df_60min = pd.DataFrame([[1, 2, 3], [1, 5, 6], [1, 8, 9], [1, 2, 3], [
#                         1, 5, 6], [1, 8, 9], [1, 2, 3], [1, 5, 6], [1, 8, 9], [1, 2, 3], [1, 5, 6], [1, 8, 9],[10, 11, 12]], columns=['a', 'b', 'c'])
# sigma_n = 3
# df_60min_mean = np.mean(df_60min.values,axis=0)
# df_60min_std = np.std(df_60min.values,axis=0)
# display(df_60min_mean,df_60min_std)
# threshold1 = df_60min_mean - sigma_n * df_60min_std
# threshold2 = df_60min_mean + sigma_n * df_60min_std
# display(threshold1,threshold2)
# for i in range(3):
# 	df_60min.iloc[:, i]=df_60min.iloc[:, i].apply(lambda x: df_60min_mean[i] if x < threshold1[i] or x > threshold2[i] else x)
# df_60min

# display(d)
# d=d.diff()
# display(d)
# d.iloc[-1:,:]
# kpi_list = joblib.load('../../data/kpi_list.pkl')
# kpi_list = kpi_list[:1000]
# display(len(kpi_list))
# df_kpi = pd.DataFrame(
#     kpi_list, columns=['timestamp', 'cmdb_id', 'kpi_name', 'value'])
# df_kpi

a = pd.DataFrame([[1, 1, 1, 1], [2, 2, 2, 3], [3, 3, 3, 3]])
a=a.diff()
display(a)
online_std_scaler = StandardScaler()


def noise_clean(df, std):
    df = df.copy()

    # 过滤异常值
    sigma_n = 3
    df_mean = np.mean(df.values, axis=0)
    df_std = np.std(df.values, axis=0)
    threshold1 = df_mean - sigma_n * df_std
    threshold2 = df_mean + sigma_n * df_std
    for i in range(4):
        df.iloc[:, i] = df.iloc[:, i].apply(
            lambda x: df_mean[i] if x < threshold1[i] or x > threshold2[i] else x)

    random_nums = []
    for i in range(4):
        random_nums.append(np.random.normal(0, 0.01*std[i], size=3))
    random_nums = np.array(random_nums).T
    print(random_nums)
    df = df + random_nums

    return df


b = noise_clean(a, [1, 1, 1, 1])
display(b)
online_std_scaler.fit(b.values)
display(online_std_scaler.mean_)
online_std_scaler.var_

Unnamed: 0,0,1,2,3
0,,,,
1,1.0,1.0,1.0,2.0
2,1.0,1.0,1.0,0.0


[[ 0.00528635  0.0059857   0.00190149 -0.00515017]
 [ 0.01114795  0.00665745  0.00207141  0.00731995]
 [-0.00801557  0.01379005 -0.01771506  0.01380453]]


Unnamed: 0,0,1,2,3
0,,,,
1,1.011148,1.006657,1.002071,2.00732
2,0.991984,1.01379,0.982285,0.013805


array([1.00156619, 1.01022375, 0.99217818, 1.01056224])

array([9.18101691e-05, 1.27185162e-05, 9.78760647e-05, 9.93525931e-01])

In [38]:
import joblib
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)

a=joblib.load('../../model/spot/spot.pkl')
# display(a)

b=pd.read_csv('../../data/df_57_test.csv')

c = pd.read_csv('../../data/df_57_train.csv')
c=c.iloc[:1440,:]
c=c.apply(lambda x:np.sort(x.values))
display(c[1420:1425])
# b[b['timestamp']==1647851040]
pd.concat([a,b])

Unnamed: 0,timestamp,node-1,node-2,node-3,node-4,node-5,node-6,adservice-grpc,adservice-http,cartservice-grpc,checkoutservice-grpc,currencyservice-grpc,emailservice-grpc,frontend-http,paymentservice-grpc,productcatalogservice-grpc,recommendationservice-grpc,shippingservice-grpc,adservice-0,adservice-1,adservice-2,adservice2-0,cartservice-0,cartservice-1,cartservice-2,cartservice2-0,checkoutservice-0,checkoutservice-2,checkoutservice-1,checkoutservice2-0,currencyservice-0,currencyservice-1,currencyservice-2,currencyservice2-0,emailservice-0,emailservice-1,emailservice-2,emailservice2-0,frontend-0,frontend-1,frontend-2,frontend2-0,paymentservice-0,paymentservice-1,paymentservice-2,paymentservice2-0,productcatalogservice-0,productcatalogservice-1,productcatalogservice-2,productcatalogservice2-0,recommendationservice-0,recommendationservice-1,recommendationservice-2,recommendationservice2-0,shippingservice-0,shippingservice-1,shippingservice-2,shippingservice2-0
1420,1647704400,10.288596,17.668171,3.514067,6.834962,4.002519,120.43158,2.482756,2.992487,0.628988,2.116125,2.801665,11.553298,3.352926,2.11407,1.120628,0.247135,2.143075,13.534303,9.975641,2.047882,6.30274,19.132264,25.997789,4.035892,34.37684,3.441171,3.68948,1.374209,2.88677,2.240806,2.649328,3.133372,6.935195,1.378585,0.967829,6.75823,1.163112,1.722924,2.025688,2.048095,3.321147,7.363784,1.075427,10.298277,8.07323,1.556089,2.085067,1.576245,2.577916,2.167836,2.540141,1.491159,2.96608,5.937822,0.949028,3.895386,1.194343
1421,1647704460,10.587191,17.740073,3.559521,6.854973,4.687398,120.478554,2.489434,3.146414,0.631103,2.14291,2.825866,11.853773,3.36137,2.282814,1.122555,0.252205,2.150945,13.861307,10.007733,2.115132,6.321392,20.657408,26.033026,4.362815,34.650992,3.447369,4.06551,1.382055,2.896628,2.242492,2.660287,3.199138,7.073347,1.392314,0.975721,6.762632,1.1663,1.728709,2.09519,2.056286,3.355581,7.373756,1.078458,10.315915,8.073925,1.580985,2.155689,1.598083,2.579283,2.251584,2.647551,1.501114,2.972723,5.939368,0.959352,3.907196,1.220354
1422,1647704520,10.632755,18.073617,3.648901,6.911777,5.350826,120.54326,2.49318,3.146414,0.697429,2.157457,2.839922,12.884161,3.414633,2.330131,1.122775,0.254086,2.193245,20.353657,10.0095,2.121554,6.398748,21.963951,26.47039,5.816694,38.046035,3.455416,4.906934,1.383519,2.903362,2.293456,2.692758,3.222373,7.074934,1.408394,1.034201,6.76873,1.167034,1.790104,2.184882,2.059836,3.467763,7.378961,1.082757,10.399708,8.121273,1.608481,2.317493,1.60063,2.730813,2.278583,2.678202,1.543671,2.975411,5.945875,0.977046,3.932398,1.224467
1423,1647704580,10.736151,18.565692,3.98538,7.10869,5.601629,120.582344,2.497644,3.146414,0.823897,2.180527,2.866504,13.166401,3.682009,2.410951,1.132257,0.259431,2.193245,21.75345,10.012212,2.191416,6.406354,22.990228,28.071124,9.102433,38.162236,3.462418,4.925174,1.389148,3.076888,2.297804,2.70709,3.261193,7.147564,1.416261,1.040422,6.773348,1.170809,1.800762,2.304612,2.073735,3.660097,7.389831,1.093188,10.414104,8.153691,1.639986,2.436317,1.690157,2.756256,2.283125,2.718316,1.579214,2.978193,5.95015,0.986362,3.93632,1.234607
1424,1647704640,11.33552,19.280146,4.097373,7.18635,6.45845,120.704562,2.502479,3.206555,0.975117,2.510717,2.931644,13.696468,3.771056,2.430466,1.167964,0.260247,2.215035,21.90378,10.023356,2.205591,6.606059,23.034755,28.312529,10.374169,39.899026,3.495534,5.064123,1.391404,3.171856,2.299611,2.719122,3.279971,7.238109,1.419584,1.041093,6.776275,1.172054,1.839823,2.408118,2.076822,3.711115,7.395222,1.106128,10.414404,8.156026,1.648628,2.56607,1.691796,2.815467,2.299448,2.766581,1.583075,2.99471,5.956815,0.992916,3.99424,1.243834


Unnamed: 0,node-1,node-2,node-3,node-4,node-5,node-6,adservice-grpc,adservice-http,cartservice-grpc,checkoutservice-grpc,currencyservice-grpc,emailservice-grpc,frontend-http,paymentservice-grpc,productcatalogservice-grpc,recommendationservice-grpc,shippingservice-grpc,adservice-0,adservice-1,adservice-2,adservice2-0,cartservice-0,cartservice-1,cartservice-2,cartservice2-0,checkoutservice-0,checkoutservice-2,checkoutservice-1,checkoutservice2-0,currencyservice-0,currencyservice-1,currencyservice-2,currencyservice2-0,emailservice-0,emailservice-1,emailservice-2,emailservice2-0,frontend-0,frontend-1,frontend-2,frontend2-0,paymentservice-0,paymentservice-1,paymentservice-2,paymentservice2-0,productcatalogservice-0,productcatalogservice-1,productcatalogservice-2,productcatalogservice2-0,recommendationservice-0,recommendationservice-1,recommendationservice-2,recommendationservice2-0,shippingservice-0,shippingservice-1,shippingservice-2,shippingservice2-0,timestamp
0,5.970492,4.846319,1.543327,3.296055,1.265563,5.413027,2.663438,3.388596,16.904552,3.7216,3.070546,17.893649,6.215221,2.849988,2.197074,2.829826,4.618239,3.173672,2.060109,6.636468,6.441051,4.860549,7.866446,5.842802,8.596506,3.553921,4.629388,1.494479,3.444216,2.418639,2.757558,3.466058,7.592834,1.538366,1.091965,6.858473,1.241108,1.952903,2.222247,2.203252,3.563599,7.467568,1.12557,10.472545,8.18556,1.693511,2.377719,1.651773,2.901386,2.474299,2.650779,1.554337,3.034862,6.048302,1.031941,4.054468,1.271409,
0,0.397707,0.460924,0.989055,0.745922,0.386684,0.485784,1.807864,0.054279,1.556354,0.005583,1.003493,0.577462,1.297389,1.124526,1.244716,1.323449,3.414971,0.588618,0.624361,9.003374,0.527968,0.689002,0.869738,0.839579,0.371252,0.48528,2.897809,0.872904,0.864698,2.197722,0.855823,1.255147,0.53673,0.223533,0.338794,0.43225,0.290287,0.890812,0.581819,1.123314,0.979799,0.300269,0.698144,2.062348,3.241713,0.623883,1.930109,1.590086,0.913723,2.094687,0.247519,0.695059,0.406759,0.551529,0.535862,0.415418,0.79995,1651298000.0


In [36]:
import pprint as pp
nodes = ['node-1', 'node-2', 'node-3', 'node-4', 'node-5', 'node-6']
services = ['adservice-grpc', 'adservice-http', 'cartservice-grpc', 'checkoutservice-grpc', 'currencyservice-grpc',
            'emailservice-grpc', 'frontend-http', 'paymentservice-grpc', 'productcatalogservice-grpc',
            'recommendationservice-grpc', 'shippingservice-grpc']
pods = ['adservice-0', 'adservice-1', 'adservice-2', 'adservice2-0', 'cartservice-0', 'cartservice-1', 'cartservice-2',
        'cartservice2-0', 'checkoutservice-0', 'checkoutservice-2', 'checkoutservice-1', 'checkoutservice2-0',
        'currencyservice-0', 'currencyservice-1', 'currencyservice-2', 'currencyservice2-0', 'emailservice-0',
        'emailservice-1', 'emailservice-2', 'emailservice2-0', 'frontend-0', 'frontend-1', 'frontend-2', 'frontend2-0',
        'paymentservice-0', 'paymentservice-1', 'paymentservice-2', 'paymentservice2-0', 'productcatalogservice-0',
        'productcatalogservice-1', 'productcatalogservice-2', 'productcatalogservice2-0', 'recommendationservice-0',
        'recommendationservice-1', 'recommendationservice-2', 'recommendationservice2-0', 'shippingservice-0',
        'shippingservice-1', 'shippingservice-2', 'shippingservice2-0']
pp.pprint({i:-1 for i in nodes+services+pods})

{'adservice-0': -1,
 'adservice-1': -1,
 'adservice-2': -1,
 'adservice-grpc': -1,
 'adservice-http': -1,
 'adservice2-0': -1,
 'cartservice-0': -1,
 'cartservice-1': -1,
 'cartservice-2': -1,
 'cartservice-grpc': -1,
 'cartservice2-0': -1,
 'checkoutservice-0': -1,
 'checkoutservice-1': -1,
 'checkoutservice-2': -1,
 'checkoutservice-grpc': -1,
 'checkoutservice2-0': -1,
 'currencyservice-0': -1,
 'currencyservice-1': -1,
 'currencyservice-2': -1,
 'currencyservice-grpc': -1,
 'currencyservice2-0': -1,
 'emailservice-0': -1,
 'emailservice-1': -1,
 'emailservice-2': -1,
 'emailservice-grpc': -1,
 'emailservice2-0': -1,
 'frontend-0': -1,
 'frontend-1': -1,
 'frontend-2': -1,
 'frontend-http': -1,
 'frontend2-0': -1,
 'node-1': -1,
 'node-2': -1,
 'node-3': -1,
 'node-4': -1,
 'node-5': -1,
 'node-6': -1,
 'paymentservice-0': -1,
 'paymentservice-1': -1,
 'paymentservice-2': -1,
 'paymentservice-grpc': -1,
 'paymentservice2-0': -1,
 'productcatalogservice-0': -1,
 'productcatalogservic

In [31]:
a=np.array([[1,2,3],[3,2,4],[2,5,6]])
np.sort(a,axis=0)

array([[1, 2, 3],
       [2, 3, 4],
       [2, 5, 6]])

In [6]:
import pandas as pd
import numpy as np
a = pd.DataFrame([[2, 3, 1], [2, 3, np.NAN], [2, 3, np.NAN],
                 [2, 3, 1]], columns=['a', 'b', 'c'])
display(a)
# a=a.diff()
a['c']= a['c'].ffill().bfill()
a
# a[['a','b']].apply(lambda x: x.mean(),axis=1)

Unnamed: 0,a,b,c
0,2,3,1.0
1,2,3,
2,2,3,
3,2,3,1.0


Unnamed: 0,a,b,c
0,2,3,1.0
1,2,3,1.0
2,2,3,1.0
3,2,3,1.0


In [85]:
import joblib
std_scaler = joblib.load(
    '../../model/scaler/offline_std_scaler3.pkl')
mean = std_scaler.mean_
mean[24]

2.421316481204662