In [1]:
from __future__ import division
import networkx as nx
import pickle
import math
import matplotlib.pyplot as plt
import pandas as pd
import json
import pickle
import datetime
import numpy as np

In [2]:
with open('../data/issue_topoloies.pkl', 'rb') as f:
    Topologies = pickle.load(f)

In [3]:
all_cases = [case for case in Topologies if case['y'] == 1 and 'root_cause' in case and case['root_cause']!='All']

# a showpiece using the clues introduced in the paper

In [4]:
def get_weight_from_final_fail_change(topology):
    weight = {}
    nodeList = topology['nodes']
    # CLUES PROVIDED IN THE PAPER
    node2info = {node:[0,0,0,0] for node in nodeList}
    for edge in topology['edges_info']:
        now_final_change = edge['FailCount'][-1]
        base_final_fail = min(edge['YesterFailCount'])
        weight[edge['src']+'-'+edge['des']] = max(now_final_change - base_final_fail, 0)
        
        node2info[edge['src']][0] += max(now_final_change-base_final_fail, 0)
        node2info[edge['des']][1] = max(now_final_change - base_final_fail, node2info[edge['des']][1])
        node2info[edge['src']][2] += 1
        node2info[edge['src']][3] = max(now_final_change - base_final_fail, node2info[edge['src']][3])
        
    for node in node2info:
        weight[node+'_self'] = max(node2info[node][1]-node2info[node][3],0)
        weight[node+'_back_1'] = node2info[node][0]
        weight[node+'_back_2'] = node2info[node][1]
        
        weight[node+'_has_no_outedge'] = (node2info[node][2] == 0)
        
    #print(weight)
    return weight

In [5]:
def get_anomaly_graph(topology, get_weight, logbase=10, backward_factor=0.3, additional_clues= None):
    anomaly_graph = nx.DiGraph()
    
    nodeList = topology['nodes']
    
    weightCal = get_weight(topology)
    
    for edge in topology['edges_info']:
        edgeSrc = edge['src']
        edgeDes = edge['des']
        if anomaly_graph.has_edge(edgeSrc, edgeDes):
            anomaly_graph.add_edge(edgeSrc, edgeDes, weight = max(math.log(weightCal[edgeSrc + '-' + edgeDes] + 1, logbase), anomaly_graph.get_edge_data(edgeSrc, edgeDes)['weight']))
        else:
            anomaly_graph.add_edge(edgeSrc, edgeDes, weight = math.log(weightCal[edgeSrc + '-' + edgeDes] +1, logbase))

        if anomaly_graph.has_edge(edgeDes, edgeSrc):
            anomaly_graph.add_edge(edgeDes, edgeSrc, weight = max(backward_factor*math.log(max(weightCal[edgeSrc+'_back_1']-weightCal[edgeSrc + '-' + edgeDes], weightCal[edgeSrc+'_back_2']-weightCal[edgeSrc + '-' + edgeDes] if weightCal[edgeDes+'_has_no_outedge'] else 0) + 1, logbase), anomaly_graph.get_edge_data(edgeDes, edgeSrc)['weight']))
        else:
            anomaly_graph.add_edge(edgeDes, edgeSrc, weight = backward_factor*math.log(max(weightCal[edgeSrc+'_back_1']-weightCal[edgeSrc + '-' + edgeDes], weightCal[edgeSrc+'_back_2']-weightCal[edgeSrc + '-' + edgeDes] if weightCal[edgeDes+'_has_no_outedge'] else 0) + 1, logbase))#原来参考论文是backward_factor * weightCal[edgeSrc + '-' + edgeDes]
        
    for node in nodeList:
        if anomaly_graph.has_edge(node, node):
            anomaly_graph.add_edge(node, node, weight = max(math.log(weightCal[node+'_self'] + 1, logbase), anomaly_graph.get_edge_data(node, node)['weight']))
        else:
            anomaly_graph.add_edge(node, node, weight = math.log(weightCal[node+'_self'] + 1, logbase))
            
    if additional_clues is not None:
        for pair in additional_clues:
            if anomaly_graph.has_edge(pair[0], pair[1]):
                anomaly_graph.add_edge(pair[0], pair[1], weight = max(math.log(additional_clues[pair] + 1, logbase), anomaly_graph.get_edge_data(pair[0], pair[1])['weight']))
            else:
                anomaly_graph.add_edge(pair[0], pair[1], weight = math.log(additional_clues[pair] + 1, logbase))

    return anomaly_graph

In [6]:
for case in all_cases:
    anomalyGraph = get_anomaly_graph(case, get_weight_from_final_fail_change, logbase = 10, backward_factor = 0.3)
    
    anomaly_score = nx.pagerank(anomalyGraph)#, personalization = personalization)
    anomaly_score_sorted = sorted(anomaly_score.items(), key=lambda x: x[1], reverse=True)
    
    case['pred'] = anomaly_score_sorted[0][0]

In [7]:
for case in all_cases:
    case['right'] = case['pred']==case['root_cause']

In [8]:
pd.Series([case['right'] for case in all_cases]).value_counts()

True     376
False     26
dtype: int64

In [9]:
376/(376+26)

0.9353233830845771