# 数据可视化


In [81]:
import os
import pandas as pd
import warnings
from tqdm.notebook import tqdm, trange
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import plotly_express as px
import plotly as py
import plotly.io as pio
from plotly.offline import download_plotlyjs, init_notebook_mode
import plotly.graph_objs as go
from plotly.subplots import make_subplots

warnings.filterwarnings("ignore")

py.offline.init_notebook_mode(connected=True)
tqdm.pandas()


## 读取数据

### 标签数据

In [82]:
label_data1 = pd.read_csv(
    '../data/training_data_with_faults/groundtruth/groundtruth-k8s-1-2022-03-20.csv')
label_data2 = pd.read_csv(
    '../data/training_data_with_faults/groundtruth/groundtruth-k8s-1-2022-03-21.csv')



In [83]:
label_data = pd.concat(
    [label_data1, label_data2])
label_data


Unnamed: 0,timestamp,level,cmdb_id,failure_type
0,1647738546,pod,shippingservice-1,k8s容器读io负载
1,1647741299,pod,emailservice-0,k8s容器读io负载
2,1647743133,node,node-1,node 内存消耗
3,1647744727,node,node-1,node 内存消耗
4,1647746396,pod,shippingservice-1,k8s容器内存负载
...,...,...,...,...
38,1647847538,pod,emailservice-1,k8s容器网络延迟
39,1647838805,service,paymentservice,k8s容器网络延迟
40,1647836321,service,productcatalogservice,k8s容器网络延迟
41,1647835475,pod,recommendationservice-2,k8s容器网络丢包


In [84]:
label_data.sort_values(by=['level', 'cmdb_id','timestamp'], inplace=True)
label_data.reset_index(drop=True, inplace=True)
label_data['datetime'] = pd.to_datetime(
    label_data['timestamp'], unit='s')

label_data.to_csv('../data/label/label1.csv', index=False)
label_data


Unnamed: 0,timestamp,level,cmdb_id,failure_type,datetime
0,1647743133,node,node-1,node 内存消耗,2022-03-20 02:25:33
1,1647744727,node,node-1,node 内存消耗,2022-03-20 02:52:07
2,1647749271,node,node-1,node 磁盘读IO消耗,2022-03-20 04:07:51
3,1647784337,node,node-1,node 磁盘空间消耗,2022-03-20 13:52:17
4,1647823965,node,node-1,node节点CPU故障,2022-03-21 00:52:45
...,...,...,...,...,...
75,1647800799,service,recommendationservice,k8s容器进程中止,2022-03-20 18:26:39
76,1647757395,service,shippingservice,k8s容器cpu负载,2022-03-20 06:23:15
77,1647761243,service,shippingservice,k8s容器进程中止,2022-03-20 07:27:23
78,1647827637,service,shippingservice,k8s容器读io负载,2022-03-21 01:53:57


### 读取业务指标

In [95]:
# service_metric_data1 = pd.read_csv(
#     '../data/training_data_normal/cloudbed-1/metric/service/metric_service.csv')
# service_metric_data2 = pd.read_csv(
#     '../data/training_data_normal/cloudbed-2/metric/service/metric_service.csv')
# service_metric_data3 = pd.read_csv(
#     '../data/training_data_normal/cloudbed-3/metric/service/metric_service.csv')


service_metric_data1 = pd.read_csv(
    '../data/training_data_with_faults/tar/2022-03-20-cloudbed1/metric/service/metric_service.csv')
service_metric_data2 = pd.read_csv(
    '../data/training_data_with_faults/tar/2022-03-21-cloudbed1/metric/service/metric_service.csv')
# service_metric_data3 = pd.read_csv(
#     '../data/training_data_with_faults/tar/2022-03-20-cloudbed2/metric/service/metric_service.csv')
# service_metric_data4 = pd.read_csv(
#     '../data/training_data_with_faults/tar/2022-03-21-cloudbed2/metric/service/metric_service.csv')
# service_metric_data5 = pd.read_csv(
#     '../data/training_data_with_faults/tar/2022-03-20-cloudbed3/metric/service/metric_service.csv')
# service_metric_data6 = pd.read_csv(
#     '../data/training_data_with_faults/tar/2022-03-21-cloudbed3/metric/service/metric_service.csv')
# service_metric_data7 = pd.read_csv(
#     '../data/training_data_with_faults/tar/2022-03-24-cloudbed3/metric/service/metric_service.csv')

# service_metric_data=pd.concat([service_metric_data1,service_metric_data2,service_metric_data3])
# service_metric_data = pd.concat(
#     [service_metric_data1, service_metric_data2, service_metric_data3, service_metric_data4, service_metric_data5,service_metric_data6,service_metric_data7])
service_metric_data = pd.concat(
    [service_metric_data1, service_metric_data2])
# service_metric_data=service_metric_data1
service_metric_data


In [97]:
def create_shapes(starts, _min, _max, sequence_type=None, xref=None, yref=None):
    if sequence_type is None:
        color = "red"
    else:
        color = "blue" if sequence_type == "true" else "red"
    shapes = []

    for r in starts:
        w = timedelta(minutes=10)
        x0 = r
        x1 = r + w
        shape = {
            "type": "rect",
            "x0": x0,
            "y0": _min,
            "x1": x1,
            "y1": _max,
            "fillcolor": color,
            "opacity": 0.2,
            "line": {
                "width": 0,
            },
        }
        if xref is not None:
            shape["xref"] = xref
            shape["yref"] = yref

        shapes.append(shape)

    return shapes

In [98]:
processed_service_metric_data = list(service_metric_data.groupby('service'))
for (service_name,service_data) in tqdm(processed_service_metric_data):
    service_data.sort_values(by='timestamp', inplace=True)
    service_data.reset_index(drop=True, inplace=True)
    service_data['datetime'] = pd.to_datetime(
        service_data['timestamp'], unit='s')
    
    # processed_data_path = f'../data/training_data_normal/processed_service_metric_data/'
    processed_data_path = f'../data/training_data_with_faults/tar/processed_service_metric_data/'
    os.makedirs(processed_data_path, exist_ok=True)
    service_data.to_csv(processed_data_path+service_name+'_metrics.csv', index=False)
    
    # visualization_path = f'../result/visualization/normal/service/'
    visualization_path = f'../result/visualization/with_faults/service/'
    os.makedirs(visualization_path, exist_ok=True)
    
    label_data_i = label_data[(label_data['level'] == 'service') & (
        label_data['cmdb_id'] == service_name.split('-')[0])]
    
    fig = make_subplots(rows=4, cols=1, shared_xaxes=True)
    shapes = []
    
    fig.append_trace(go.Scatter(x=service_data['datetime'], y=service_data['rr'], name='rr',
                                           marker=dict(color='rgb(255, 127, 14, 1)', size=1), mode='markers'), row=1, col=1)
    shapes += create_shapes(label_data_i['datetime'], _min=0,
                            _max=service_data['rr'].max(), xref='x1', yref='y1')
    
    fig.append_trace(go.Scatter(x=service_data['datetime'], y=service_data['sr'], name='sr',
                                marker=dict(color='rgb(0, 204, 150, 1)', size=1), mode='markers'), row=2, col=1)
    shapes += create_shapes(label_data_i['datetime'], _min=0,
                            _max=service_data['sr'].max(), xref='x2', yref='y2')
    
    fig.append_trace(go.Scatter(x=service_data['datetime'], y=service_data['mrt'], name='mrt',
                                line=dict(color='rgb(31, 119, 180, 1)', width=1), mode='lines'), row=3, col=1)
    shapes += create_shapes(label_data_i['datetime'], _min=0,
                            _max=service_data['mrt'].max(), xref='x3', yref='y3')
    
    fig.append_trace(go.Scatter(x=service_data['datetime'], y=service_data['count'], name='count',
                                line=dict(color='rgb(0, 0, 0, 1)', width=1), mode='lines'), row=4, col=1)
    shapes += create_shapes(label_data_i['datetime'], _min=0,
                            _max=service_data['count'].max(), xref='x4', yref='y4')
    
    # fig.update_layout(title_text=service_name)
    fig.update_layout(title_text=service_name, shapes=shapes)
    pio.write_html(fig, file=visualization_path+service_name+'.html')
    

  0%|          | 0/11 [00:00<?, ?it/s]

### 性能指标

In [100]:
node_metric_data1=pd.read_csv('../data/training_data_with_faults/tar/2022-03-20-cloudbed1/metric/node/kpi_cloudbed1_metric_0320.csv')
node_metric_data2=pd.read_csv('../data/training_data_with_faults/tar/2022-03-21-cloudbed1/metric/node/kpi_cloudbed1_metric_0321.csv')
node_metric_data=pd.concat([node_metric_data1,node_metric_data2])

# node_metric_data = pd.read_csv(
#     '../data/training_data_normal/cloudbed-1/metric/node/kpi_cloudbed1_metric_0319.csv')
node_metric_data

Unnamed: 0,timestamp,cmdb_id,kpi_name,value
0,1647619200,node-1,ping.can_connect,1.000000e+00
1,1647619200,node-2,ping.can_connect,1.000000e+00
2,1647619200,node-4,system.swap.total,0.000000e+00
3,1647619200,node-3,system.net.bytes_rcvd,7.264655e+06
4,1647619200,node-3,system.load.5,1.240000e+00
...,...,...,...,...
492592,1647705540,node-3,system.fs.inodes.free,1.197572e+10
492593,1647705540,node-3,system.disk.used,1.078189e+10
492594,1647705540,node-3,system.disk.total,1.406688e+10
492595,1647705540,node-3,system.disk.pct_usage,4.143000e+01


In [101]:
node_metric_data['datetime'] = pd.to_datetime(node_metric_data['timestamp'], unit='s')
processed_node_metric_data = list(node_metric_data.groupby('kpi_name'))
len(processed_node_metric_data)


59

In [102]:
for kpi_name,kpi_data in tqdm(processed_node_metric_data):
    kpi_data.sort_values(by=['cmdb_id', 'timestamp'], inplace=True)
    kpi_data.reset_index(drop=True, inplace=True)
    # processed_data_path = f'../data/training_data_normal/processed_node_metric_data/'
    processed_data_path = f'../data/training_data_with_faults/tar/processed_node_metric_data/'
    os.makedirs(processed_data_path, exist_ok=True)
    kpi_data.to_csv(processed_data_path+kpi_name +'_metrics.csv', index=False)
    
    node_kpi_data=list(kpi_data.groupby('cmdb_id'))
    
    # visualization_path = f'../result/visualization/normal/node/'
    visualization_path = f'../result/visualization/with_faults/node/'
    os.makedirs(visualization_path, exist_ok=True)

    fig = make_subplots(rows=len(node_kpi_data), cols=1, shared_xaxes=True)
    shapes = []
    for i, (node_name, node_data) in enumerate(node_kpi_data):
        node_data.sort_values(by='timestamp', inplace=True)
        node_data.reset_index(drop=True, inplace=True)

        label_data_i = label_data[(label_data['level'] == 'node') & (
            label_data['cmdb_id'] == node_name)]

        shapes += create_shapes(
            label_data_i['datetime'], _min=0, _max=node_data['value'].max(), xref='x'+str(i+1), yref='y'+str(i+1))
        fig.append_trace(go.Scatter(x=node_data['datetime'], y=node_data['value'], name=node_name,
                                    line=dict(color='rgb(31, 119, 180, 1)', width=1), mode='lines'), row=i + 1, col=1)

    # fig.update_layout(title_text=kpi_name)
    fig.update_layout(title_text=kpi_name, shapes=shapes)
    pio.write_html(fig, file=visualization_path+kpi_name+'.html')


  0%|          | 0/59 [00:00<?, ?it/s]