In [1]:
import numpy as np
import pandas as pd
import random
import os

In [3]:
# 随机选取一个时间戳找出其属性值并计算异常
def compute_anomaly(path):
    o = open(path, 'rb')
    df = pd.read_csv(o)
    length = len(df)

    # 随机选取一个时间戳(为方便计算不选头尾处时间戳)
    rand_idx = random.randint(150, length - 150)
    timestamp = df.loc[rand_idx][0]						# 随机选择的timestamp
    temp_df = df[rand_idx - 144 : rand_idx + 144]		# 时间间隔为5分钟，共14天数据，选取一天的数据
    
    # 计算总KPI异常值
    kpi = temp_df['KPI'].as_matrix()
    kpi = [float(i) for i in kpi]
    mean = np.mean(kpi)
    std = np.std(kpi)
    total_anomaly = int(mean + 5*std)					# 此timestamp上的总KPI异常
    # print(total_anomaly)
    return timestamp, total_anomaly
path = '../data/KPI_week2.csv'
timestamp, total_anomaly = compute_anomaly(path)

In [4]:
# 分发异常
def distribute_anomaly(timestamp, root_cause,total_anomaly):
    print(timestamp, root_cause)
    path2 = '../data/origin/' + str(timestamp) + '.csv'
    path3 = '../data/anomaly/' + str(timestamp) + '.csv'
    locs = []           # 后代叶子元素的索引
    o = open(path2, 'rb')
    df = pd.read_csv(o, names  = ['i', 'e', 'c', 'p', 'l', 'kpi'])
    for cause in root_cause:
        for idx in df.index:
            i, e, c, p, l = df.loc[idx]['i'], df.loc[idx]['e'], df.loc[idx]['c'], df.loc[idx]['p'], df.loc[idx]['l']
            leaf_attrs = [i, e, c, p, l]
            if set(cause).issubset(set(leaf_attrs)):
                locs.append(idx)

    print(locs)
    anomaly = int(total_anomaly / len(locs))
    df_out = pd.DataFrame(columns = ['i', 'e', 'c', 'p', 'l', 'kpi'])
    for idx in locs:
        i, e, c, p, l = df.loc[idx]['i'], df.loc[idx]['e'], df.loc[idx]['c'], df.loc[idx]['p'], df.loc[idx]['l']
        df_out.loc[0] = {'i':i, 'e':e, 'c':c, 'p':p, 'l':l, 'kpi':df.loc[idx]['kpi'] + anomaly}
        df = pd.concat([df[:idx], df_out, df[idx+1:]], ignore_index=True)
    csvfile = open(path3, 'w')
    df.to_csv(path3, header=None, index=None)

In [5]:
# 一共5层，每层1~5种root cause,共25种
def find_root_causes(root_cause_dict):
    layers, kinds = 5, 5
    root_causes = []
    for i in range(layers):
        for j in range(kinds):
            layer_vals = []
            
            # 随机选择时间戳找出其属性值并计算异常
            timestamp, total_anomaly = compute_anomaly('../data/KPI_week2.csv')
            path = '../data/origin/' + str(timestamp) + '.csv'
            o = open(path, 'rb')
            df = pd.read_csv(o, names  = ['i', 'e', 'c', 'p', 'l', 'kpi'])
            length = len(df)
            
            #  从属性值中随机选择组合成根因
            rand_idx = random.sample([x for x in range(length - 1)], kinds)
            random_nums = random.sample([x for x in range(5)], 5)
            for idx in range(kinds):
                vals = []
#                 rand_idx = random.randint(0, length - 1)
#                 random_nums = random.sample([x for x in range(5)], 5)
                for k in range(i + 1):
                    vals.append(df.loc[rand_idx[idx]][random_nums[k]] )
                layer_vals.append(vals)
            root_cause = layer_vals[:j+1]           # 深拷贝
#             print(root_cause)
            root_cause_dict[timestamp] = root_cause
            
            # 分发异常
            distribute_anomaly(timestamp, root_cause, total_anomaly)
            
root_cause_dict = {}
find_root_causes(root_cause_dict)

1536611400000 [['e03']]
[40, 60, 80, 108, 116, 142, 148, 182, 183, 188, 200, 237, 258, 302, 306, 312, 315, 361, 366, 391, 441, 462, 469, 502, 584, 622, 664, 972, 978, 1006, 1116, 1128]
1536680400000 [['p08'], ['p24']]
[69, 100, 119, 399, 412, 427, 442, 471, 486, 490, 513, 523, 547, 602, 612, 640, 654, 663, 674, 846, 928, 972, 975, 1022, 1037, 1074, 1113, 1175, 1230, 1254, 1346, 1563, 1573, 1577, 1695, 1782, 1815, 90, 95, 97, 108, 217, 230, 240, 263, 329, 346, 369, 373, 379, 405, 448, 466, 495, 503, 511, 521, 545, 648, 660, 689, 725, 754, 783, 799, 863, 976, 987, 993, 1038, 1043, 1064, 1086, 1119, 1223, 1235, 1319, 1401, 1420, 1431, 1465, 1472, 1477, 1506, 1512, 1544, 1565, 1583, 1616, 1629, 1645, 1721, 1801, 1829, 1858, 1874]
1536811500000 [['i21'], ['i05'], ['i13']]
[248, 257, 290, 324, 398, 404, 581, 602, 624, 659, 863, 877, 907, 935, 1061, 1092, 1262, 1375, 1384, 1493, 1510, 1596, 1681, 1920, 1975, 2002, 2212, 2405, 2483, 2509, 2580, 2631, 2640, 2645, 2713, 2723, 2774, 2777, 2942, 2

1536556500000 [['l3', 'i38']]
[10, 11, 12, 14, 15, 20, 23, 27, 39, 97, 102, 111, 161, 175, 224, 286, 290, 356, 368, 370, 433, 434, 449, 455, 492, 528, 533, 593, 596, 599, 610, 614, 615, 662, 671, 674, 675, 679, 686, 753, 758, 793, 799, 803, 807, 835, 863, 867, 869, 870, 875, 877, 878, 881, 914, 927, 934, 948, 958, 962, 981, 989, 1023, 1027, 1032, 1047, 1091, 1093, 1095, 1101, 1157, 1165, 1212, 1220, 1221, 1223, 1232, 1238, 1275, 1297, 1299, 1310, 1357, 1374, 1375, 1425, 1436, 1503, 1504, 1507, 1524, 1561, 1575, 1627, 1633, 1645, 1646, 1715, 1717, 1718, 1719, 1771, 1773, 1774, 1780, 1786, 1788, 1834, 1890, 1891, 1893, 1905, 1944, 1950, 1951, 1954, 1955, 1959, 1977, 1987, 1990, 1994, 2033, 2066, 2120, 2148, 2180, 2185, 2190, 2206, 2227, 2248, 2260, 2261, 2265, 2278, 2282, 2311, 2315, 2317, 2365, 2388, 2402, 2407, 2411, 2416, 2418, 2426, 2444, 2457, 2458, 2464, 2476, 2515, 2521, 2559, 2582, 2583, 2585, 2596, 2600, 2611, 2627, 2644, 2658, 2660, 2663, 2683, 2720, 2725, 2736, 2751, 2758, 277

1536588300000 [['l3', 'e03', 'p29'], ['l2', 'e08', 'p14'], ['l3', 'e10', 'p19'], ['l3', 'e11', 'p07'], ['l3', 'e10', 'p15']]
[830, 1765, 1977, 1383, 1572, 1808, 1, 35, 71, 495, 1026, 2198, 2381, 2586, 48, 341, 815, 1297, 2023, 5, 486, 821, 923, 2014]
1536615000000 [['e11', 'p11', 'c1', 'i38']]
[249]
1536727200000 [['e01', 'i03', 'l3', 'p05'], ['e12', 'i38', 'l3', 'p10']]
[1612, 1477]
1536510600000 [['i06', 'c3', 'e11', 'l2'], ['i38', 'c1', 'e10', 'l3'], ['i14', 'c5', 'e12', 'l3']]
[1485, 1516, 1625, 1658, 1816, 1, 9, 41, 42, 44, 136, 331, 381, 582, 585, 675, 718, 719, 791, 799, 804, 842, 949, 994, 1004, 1034, 1079, 1157, 1158, 1160, 1163, 1211, 1212, 1248, 1251, 1316, 1408, 1416, 1453, 1494, 1525, 1673, 1680, 1730, 1812, 1847, 74, 221, 249, 327, 467, 593, 819, 987, 1116, 1185, 1441, 1450, 1488, 1519, 1605, 1634, 1651, 1764]
1536689100000 [['e10', 'i06', 'l2', 'c5'], ['e03', 'i13', 'l3', 'c5'], ['e11', 'i14', 'l3', 'c5'], ['e03', 'i34', 'l3', 'c5']]
[9, 90, 174, 182, 294, 311, 314, 355,

In [6]:
for (k, v) in root_cause_dict.items():
    print(k, v)
root_cause_set = {}
root_cause_set['root_cause'] = list(root_cause_dict.values())
root_cause_set['timestamp'] = list(root_cause_dict.keys())
root_cause_set = pd.DataFrame(root_cause_set)
root_cause_set.to_csv('../data/timestamps_root_causes.csv', index = None)

1536611400000 [['e03']]
1536680400000 [['p08'], ['p24']]
1536811500000 [['i21'], ['i05'], ['i13']]
1536762000000 [['i09'], ['i38'], ['i02'], ['i07']]
1536753600000 [['e01'], ['e08'], ['e04'], ['e07'], ['e11']]
1536556500000 [['l3', 'i38']]
1536776400000 [['e01', 'c4'], ['e08', 'c3']]
1536877500000 [['e05', 'i06'], ['e10', 'i38'], ['e08', 'i02']]
1536544800000 [['p03', 'e09'], ['p07', 'e04'], ['p04', 'e10'], ['p35', 'e09']]
1536483000000 [['i24', 'l3'], ['i38', 'l1'], ['i06', 'l2'], ['i06', 'l1'], ['i39', 'l3']]
1536730500000 [['p29', 'l3', 'e08']]
1536558300000 [['l2', 'c3', 'i06'], ['l3', 'c3', 'i06']]
1536752100000 [['p15', 'i39', 'c5'], ['p23', 'i34', 'c5'], ['p05', 'i38', 'c1']]
1536580800000 [['i42', 'l3', 'c3'], ['i02', 'l3', 'c1'], ['i14', 'l3', 'c5'], ['i46', 'l3', 'c1']]
1536588300000 [['l3', 'e03', 'p29'], ['l2', 'e08', 'p14'], ['l3', 'e10', 'p19'], ['l3', 'e11', 'p07'], ['l3', 'e10', 'p15']]
1536615000000 [['e11', 'p11', 'c1', 'i38']]
1536727200000 [['e01', 'i03', 'l3', 'p05