In [1]:
import numpy as np
import pandas as pd
import random
import os

In [2]:
# 随机选取一个时间戳找出其属性值并计算异常
def compute_anomaly(path):
    o = open(path, 'rb')
    df = pd.read_csv(o)
    length = len(df)

    # 随机选取一个时间戳(为方便计算不选头尾处时间戳)
    rand_idx = random.randint(150, length - 150)
    timestamp = df.loc[rand_idx][0]						# 随机选择的timestamp
    temp_df = df[rand_idx - 144 : rand_idx + 144]		# 时间间隔为5分钟，共14天数据，选取一天的数据
    
    # 计算总KPI异常值
    kpi = temp_df['KPI'].as_matrix()
    kpi = [float(i) for i in kpi]
    mean = np.mean(kpi)
    std = np.std(kpi)
    total_anomaly = int(mean + 5*std)					# 此timestamp上的总KPI异常
    # print(total_anomaly)
    return timestamp, total_anomaly
path = '../data/KPI_week2.csv'
timestamp, total_anomaly = compute_anomaly(path)

In [3]:
# 分发异常
def distribute_anomaly(timestamp, root_cause,total_anomaly):
    print(timestamp, root_cause)
    path2 = '../data/origin/' + str(timestamp) + '.csv'
    path3 = '../data/anomaly/' + str(timestamp) + '.csv'
    locs = []           # 后代叶子元素的索引
    o = open(path2, 'rb')
    df = pd.read_csv(o, names  = ['i', 'e', 'c', 'p', 'l', 'kpi'])
    for cause in root_cause:
        for idx in df.index:
            i, e, c, p, l = df.loc[idx]['i'], df.loc[idx]['e'], df.loc[idx]['c'], df.loc[idx]['p'], df.loc[idx]['l']
            leaf_attrs = [i, e, c, p, l]
            if set(cause).issubset(set(leaf_attrs)):
                locs.append(idx)

    print(locs)
    anomaly = int(total_anomaly / len(locs))
    df_out = pd.DataFrame(columns = ['i', 'e', 'c', 'p', 'l', 'kpi'])
    for idx in locs:
        i, e, c, p, l = df.loc[idx]['i'], df.loc[idx]['e'], df.loc[idx]['c'], df.loc[idx]['p'], df.loc[idx]['l']
        df_out.loc[0] = {'i':i, 'e':e, 'c':c, 'p':p, 'l':l, 'kpi':df.loc[idx]['kpi'] + anomaly}
        df = pd.concat([df[:idx], df_out, df[idx+1:]], ignore_index=True)
    csvfile = open(path3, 'w')
    df.to_csv(path3, header=None, index=None)

In [16]:
# 一共5层，每层1~5种root cause,共25种
def find_root_causes(root_cause_dict):
    layers, kinds = 5, 5
    root_causes = []
    for i in range(layers):
        for j in range(kinds):
            layer_vals = []
            
            # 随机选择时间戳找出其属性值并计算异常
            timestamp, total_anomaly = compute_anomaly('../data/KPI_week2.csv')
            path = '../data/origin/' + str(timestamp) + '.csv'
            o = open(path, 'rb')
            df = pd.read_csv(o, names  = ['i', 'e', 'c', 'p', 'l', 'kpi'])
            length = len(df)
            
            #  从属性值中随机选择组合成根因
            for idx in range(kinds):
                vals = []
                rand_idx = random.randint(0, length - 1)
                random_nums = random.sample([x for x in range(5)], 5)
                for k in range(i + 1):
                    vals.append(df.loc[rand_idx][random_nums[k]] )
                layer_vals.append(vals)
            root_cause = layer_vals[:j+1]           # 深拷贝
#             print(root_cause)
            root_cause_dict[timestamp] = root_cause
            
            # 分发异常
            distribute_anomaly(timestamp, root_cause, total_anomaly)
            
root_cause_dict = {}
find_root_causes(root_cause_dict)

1536795900000 [['e03']]
[16, 36, 135, 177, 186, 198, 202, 211, 266, 281, 294, 317, 328, 348, 404, 410, 417, 426, 469, 511, 553, 595, 618, 699, 721, 732, 739, 907, 933, 961, 983, 998, 1000, 1026, 1036, 1070, 1082, 1134, 1141, 1160, 1185, 1206, 1222, 1267, 1315, 1365, 1374, 1413, 1428, 1429, 1464, 1480, 1520, 1552, 1567, 1615, 1660, 1666, 1697, 1721, 1736, 1765, 1791, 1793, 1829, 1839, 1899, 1965, 1998, 2028, 2075, 2076, 2105, 2106, 2109, 2115, 2117, 2120, 2122, 2126, 2173]
1536447000000 [['e08'], ['e12']]
[6, 10, 11, 13, 21, 30, 37, 38, 46, 75, 78, 83, 84, 87, 92, 93, 94, 96, 103, 108, 109, 119, 123, 139, 150, 157, 178, 183, 188, 190, 191, 196, 197, 209, 216, 243, 246, 248, 250, 252, 256, 258, 259, 267, 278, 290, 292, 293, 296, 307, 316, 317, 318, 321, 322, 326, 333, 334, 345, 359, 373, 374, 380, 390, 393, 403, 409, 415, 418, 429, 432, 434, 439, 443, 459, 463, 466, 467, 470, 486, 500, 501, 505, 526, 533, 544, 560, 561, 572, 585, 587, 589, 593, 621, 622, 631, 632, 641, 647, 654, 658, 659

1536744600000 [['e01'], ['l3'], ['p17'], ['e10'], ['c1']]
[8, 10, 11, 17, 33, 43, 44, 51, 56, 60, 63, 64, 68, 71, 96, 115, 117, 119, 132, 144, 145, 154, 155, 160, 161, 166, 169, 173, 174, 176, 177, 179, 180, 184, 198, 210, 216, 225, 236, 238, 250, 257, 263, 275, 285, 287, 309, 310, 314, 316, 323, 326, 329, 352, 355, 368, 391, 393, 394, 397, 409, 423, 425, 428, 433, 437, 445, 446, 454, 467, 472, 485, 509, 513, 519, 529, 532, 540, 545, 546, 555, 568, 589, 591, 597, 602, 603, 605, 617, 626, 629, 648, 650, 665, 674, 675, 676, 687, 695, 715, 720, 730, 733, 739, 742, 743, 752, 764, 766, 768, 778, 781, 789, 800, 803, 805, 807, 810, 815, 816, 820, 822, 825, 833, 834, 836, 855, 858, 859, 862, 879, 884, 891, 893, 896, 902, 903, 904, 908, 922, 924, 925, 926, 927, 929, 966, 973, 974, 976, 978, 980, 989, 994, 996, 998, 1003, 1005, 1036, 1038, 1041, 1048, 1051, 1062, 1064, 1073, 1076, 1084, 1107, 1120, 1149, 1153, 1156, 1159, 1165, 1167, 1174, 1205, 1211, 1214, 1218, 1224, 1230, 1238, 1239, 1240, 12

1536708000000 [['l3', 'p06']]
[97, 98, 107, 115, 140, 162, 400, 587, 640, 663, 681, 730, 799, 833, 839, 1036, 1135, 1297, 1306, 1430, 1446, 1461, 1579, 1642, 1678, 1707, 1737, 1836, 1926, 2005, 2086, 2098, 2132]
1536540600000 [['e05', 'p16'], ['c1', 'p21']]
[235, 1072, 1252, 1, 153, 186, 203, 337, 433, 438, 463, 505, 627, 657, 686, 701, 772, 782, 807, 1114, 1173, 1406, 1446, 1495, 1497, 1576, 1660, 1681, 1806, 1822, 1876, 1904, 2020, 2053, 2127, 2139, 2168, 2181, 2200]
1536434100000 [['l2', 'e07'], ['l3', 'e10'], ['i47', 'p35']]
[257, 259, 287, 323, 374, 618, 728, 752, 884, 1179, 1200, 7, 9, 12, 13, 24, 80, 81, 86, 87, 101, 110, 114, 115, 138, 152, 155, 172, 173, 174, 183, 195, 197, 251, 270, 299, 302, 328, 329, 357, 359, 360, 369, 389, 408, 415, 475, 476, 477, 509, 512, 541, 542, 543, 544, 549, 573, 575, 600, 604, 623, 627, 630, 638, 653, 703, 729, 739, 761, 772, 774, 779, 786, 787, 793, 833, 865, 886, 889, 890, 920, 921, 924, 925, 949, 955, 960, 987, 989, 990, 992, 1019, 1020, 1036, 

In [17]:
for (k, v) in root_cause_dict.items():
    print(k, v)
root_cause_set = {}
root_cause_set['root_cause'] = list(root_cause_dict.values())
root_cause_set['timestamp'] = list(root_cause_dict.keys())
root_cause_set = pd.DataFrame(root_cause_set)
root_cause_set.to_csv('../data/timestamps_root_causes.csv', index = None)

1536795900000 [['e03']]
1536447000000 [['e08'], ['e12']]
1536705000000 [['p21'], ['c1'], ['p10']]
1536605100000 [['p08'], ['e04'], ['i06'], ['l3']]
1536744600000 [['e01'], ['l3'], ['p17'], ['e10'], ['c1']]
1536708000000 [['l3', 'p06']]
1536540600000 [['e05', 'p16'], ['c1', 'p21']]
1536434100000 [['l2', 'e07'], ['l3', 'e10'], ['i47', 'p35']]
1536779700000 [['i06', 'l4'], ['p22', 'l3'], ['l3', 'e08'], ['c4', 'e10']]
1536617700000 [['i34', 'e04'], ['e01', 'p12'], ['c1', 'p17'], ['e09', 'p16'], ['l3', 'p08']]
1536867300000 [['e04', 'i43', 'l3']]
1536394500000 [['p19', 'l3', 'c5'], ['e01', 'p08', 'l3']]
1536687600000 [['c1', 'e07', 'l2'], ['c5', 'p19', 'e01'], ['p19', 'e08', 'i13']]
1536614700000 [['i02', 'p11', 'l3'], ['c5', 'i17', 'p07'], ['c5', 'i06', 'e09'], ['c5', 'i39', 'e04']]
1536836700000 [['c1', 'l3', 'i46'], ['c5', 'i17', 'e01'], ['l3', 'c5', 'i16'], ['p05', 'e09', 'i38'], ['i02', 'p35', 'l3']]
1536636900000 [['c3', 'i06', 'p17', 'l3']]
1536544800000 [['c5', 'l3', 'i16', 'p03'], 