In [1]:
import numpy as np
import pandas as pd
import random
import os

In [2]:
# 随机选取一个时间戳找出其属性值并计算异常
def compute_anomaly(path):
    o = open(path, 'rb')
    df = pd.read_csv(o)
    length = len(df)

    # 随机选取一个时间戳(为方便计算不选头尾处时间戳)
    rand_idx = random.randint(150, length - 150)
    timestamp = df.loc[rand_idx][0]						# 随机选择的timestamp
    temp_df = df[rand_idx - 144 : rand_idx + 144]		# 时间间隔为5分钟，共14天数据，选取一天的数据
    
    # 计算总KPI异常值
    kpi = temp_df['KPI'].as_matrix()
    kpi = [float(i) for i in kpi]
    mean = np.mean(kpi)
    std = np.std(kpi)
    total_anomaly = int(mean + 5*std)					# 此timestamp上的总KPI异常
    # print(total_anomaly)
    return timestamp, total_anomaly
path = '../data/KPI_week2.csv'
timestamp, total_anomaly = compute_anomaly(path)

In [3]:
# 分发异常
def distribute_anomaly(timestamp, root_cause,total_anomaly):
    print(timestamp, root_cause)
    root_cause_anomaly = int(total_anomaly / len(root_cause))    # 总KPI异常分配到每一个根因
    
    path2 = '../data/origin_week2/' + str(timestamp) + '.csv'
    path3 = '../data/anomaly/' + str(timestamp) + '.csv'
    locs = []           # 后代叶子元素的索引
    o = open(path2, 'rb')
    df = pd.read_csv(o, names  = ['i', 'e', 'c', 'p', 'l', 'kpi'])      # 原数据
    df_out = pd.DataFrame(columns = ['i', 'e', 'c', 'p', 'l', 'kpi'])   # 用于插入异常条目
    
    # 对每一个根因
    for cause in root_cause:
        locs = []                   # 叶子元素索引
        root_cause_origin_kpi = 0   # 当前根因原始值
        # 遍历原数据，找到当前根因所有后代叶子元素，并计算其和（根因元素原始值）
        for idx in df.index:
            i, e, c, p, l = df.loc[idx]['i'], df.loc[idx]['e'], df.loc[idx]['c'], df.loc[idx]['p'], df.loc[idx]['l']
            leaf_attrs = [i, e, c, p, l]
            if set(cause).issubset(set(leaf_attrs)):
                locs.append(idx)
                root_cause_origin_kpi += df.loc[idx]['kpi']
        
        # 对当前根因的每个叶子元素，使用涟漪效应分配异常
        for idx in locs:
            leaf_origin_kpi = df.loc[idx]['kpi']
            leaf_anomaly_kpi = leaf_origin_kpi + int( (root_cause_anomaly - root_cause_origin_kpi) * leaf_origin_kpi / root_cause_origin_kpi)
            i, e, c, p, l = df.loc[idx]['i'], df.loc[idx]['e'], df.loc[idx]['c'], df.loc[idx]['p'], df.loc[idx]['l']
            df_out.loc[0] = {'i':i, 'e':e, 'c':c, 'p':p, 'l':l, 'kpi':df.loc[idx]['kpi'] + leaf_anomaly_kpi}
            df = pd.concat([df[:idx], df_out, df[idx+1:]], ignore_index=True)
    
    csvfile = open(path3, 'w')
    df.to_csv(path3, header=None, index=None)

In [4]:
# 一共5层，每层1~5种root cause,共25种
def find_root_causes(root_cause_dict):
    layers, kinds = 5, 5
    root_causes = []
#     random_nums = random.sample([x for x in range(5)], 5)
    for i in range(layers):
        for j in range(kinds):
            layer_vals = []
            
            # 随机选择时间戳找出其属性值并计算异常
            timestamp, total_anomaly = compute_anomaly('../data/KPI_week2.csv')
            path = '../data/origin_week2/' + str(timestamp) + '.csv'
            o = open(path, 'rb')
            df = pd.read_csv(o, names  = ['i', 'e', 'c', 'p', 'l', 'kpi'])
            length = len(df)
            
            #  从属性值中随机选择组合成根因
            rand_idx = random.sample([x for x in range(length - 1)], 5)
            random_nums = random.sample([x for x in range(5)], 5)
#             print(rand_idx, random_nums)
            for idx in range(kinds):
                vals = []
#                 rand_idx = random.randint(0, length - 1)
#                 random_nums = random.sample([x for x in range(5)], 5)
                for k in range(i + 1):
#                     print(random_nums[k])
                    vals.append(df.loc[rand_idx[idx]][random_nums[k]] )
                    k += 1
                layer_vals.append(vals)
            root_cause = layer_vals[:j+1]           # 深拷贝
#             print(root_cause)
            root_cause_dict[timestamp] = root_cause
            
            # 分发异常
            distribute_anomaly(timestamp, root_cause, total_anomaly)
            
root_cause_dict = {}
find_root_causes(root_cause_dict)

1536771600000 [['l2']]
1536602100000 [['i17'], ['i38']]
1536567600000 [['e05'], ['e11'], ['e09']]
1536796800000 [['p35'], ['p12'], ['p02'], ['p21']]
1536670800000 [['e10'], ['e04'], ['e09'], ['e10'], ['e08']]
1536693900000 [['e11', 'i14']]
1536639600000 [['c1', 'p08'], ['c1', 'p23']]
1536574200000 [['p23', 'i12'], ['p15', 'i06'], ['p12', 'i06']]
1536893400000 [['c1', 'p17'], ['c5', 'p32'], ['c5', 'p16'], ['c5', 'p30']]
1536805800000 [['p19', 'e04'], ['p07', 'e04'], ['p06', 'e04'], ['p29', 'e04'], ['p16', 'e11']]
1536813300000 [['c5', 'e01', 'i39']]
1536574800000 [['l3', 'i41', 'p11'], ['l3', 'i02', 'p16']]
1536693000000 [['p07', 'c3', 'l3'], ['p01', 'c5', 'l2'], ['p10', 'c1', 'l3']]
1536648000000 [['e12', 'l3', 'p30'], ['e04', 'l3', 'p01'], ['e04', 'l3', 'p09'], ['e08', 'l3', 'p16']]
1536644700000 [['i23', 'l3', 'p03'], ['i30', 'l3', 'p03'], ['i05', 'l3', 'p35'], ['i14', 'l3', 'p15'], ['i13', 'l3', 'p17']]
1536722400000 [['i12', 'c5', 'e08', 'l3']]
1536881100000 [['p07', 'c1', 'l3', 'i

In [5]:
for (k, v) in root_cause_dict.items():
    print(k, v)
root_cause_set = {}
root_cause_set['root_cause'] = list(root_cause_dict.values())
root_cause_set['timestamp'] = list(root_cause_dict.keys())
root_cause_set = pd.DataFrame(root_cause_set, columns = ['timestamp', 'root_cause'])
root_cause_set.to_csv('../data/timestamps_root_causes.csv', index = None)

1536771600000 [['l2']]
1536602100000 [['i17'], ['i38']]
1536567600000 [['e05'], ['e11'], ['e09']]
1536796800000 [['p35'], ['p12'], ['p02'], ['p21']]
1536670800000 [['e10'], ['e04'], ['e09'], ['e10'], ['e08']]
1536693900000 [['e11', 'i14']]
1536639600000 [['c1', 'p08'], ['c1', 'p23']]
1536574200000 [['p23', 'i12'], ['p15', 'i06'], ['p12', 'i06']]
1536893400000 [['c1', 'p17'], ['c5', 'p32'], ['c5', 'p16'], ['c5', 'p30']]
1536805800000 [['p19', 'e04'], ['p07', 'e04'], ['p06', 'e04'], ['p29', 'e04'], ['p16', 'e11']]
1536813300000 [['c5', 'e01', 'i39']]
1536574800000 [['l3', 'i41', 'p11'], ['l3', 'i02', 'p16']]
1536693000000 [['p07', 'c3', 'l3'], ['p01', 'c5', 'l2'], ['p10', 'c1', 'l3']]
1536648000000 [['e12', 'l3', 'p30'], ['e04', 'l3', 'p01'], ['e04', 'l3', 'p09'], ['e08', 'l3', 'p16']]
1536644700000 [['i23', 'l3', 'p03'], ['i30', 'l3', 'p03'], ['i05', 'l3', 'p35'], ['i14', 'l3', 'p15'], ['i13', 'l3', 'p17']]
1536722400000 [['i12', 'c5', 'e08', 'l3']]
1536881100000 [['p07', 'c1', 'l3', 'i