In [116]:
# Import libraries
import pgmpy.models
import pgmpy.inference
import numpy as np
from enum import Enum

In [117]:
numSlices = 24

class NodeNames(Enum):
    MITM = "MITM"
    SRM = "SRM"
    UC = "UC"
    UPS = "UPS"
    IMD = "IMD"
    MC = "MC"
    CC = "CC"

PySmile Network

In [118]:
import pysmile
import pysmile_license
import os
import pandas as pd

In [119]:
DEBUG = False

evidenceNodes = [NodeNames.IMD.value, NodeNames.MC.value, NodeNames.CC.value]

classNodesHandles = dict()

In [120]:
def print_node_info(net, node_handle):
    print("Node id/name: " + net.get_node_id(node_handle) + "/" +
    net.get_node_name(node_handle))
    print(" Outcomes: " + " ".join(net.get_outcome_ids(node_handle)))
    parent_ids = net.get_parent_ids(node_handle)
    if len(parent_ids) > 0:
        print(" Parents: " + " ".join(parent_ids))
    child_ids = net.get_child_ids(node_handle)
    if len(child_ids) > 0:
        print(" Children: " + " ".join(child_ids))
    print_cpt_matrix(net, node_handle)
    
def print_cpt_matrix(net, node_handle):
    cpt = net.get_node_definition(node_handle)
    parents = net.get_parents(node_handle)
    dim_count = 1 + len(parents)
    dim_sizes = [0] * dim_count
    for i in range(0, dim_count - 1):
        dim_sizes[i] = net.get_outcome_count(parents[i])
    dim_sizes[len(dim_sizes) - 1] = net.get_outcome_count(node_handle)
    coords = [0] * dim_count
    for elem_idx in range(0, len(cpt)):
        index_to_coords(elem_idx, dim_sizes, coords)
        outcome = net.get_outcome_id(node_handle, coords[dim_count - 1])
        out_str = " P(" + outcome
        if dim_count > 1:
            out_str += " | "
            for parent_idx in range(0, len(parents)):
                if parent_idx > 0:
                    out_str += ","
                parent_handle = parents[parent_idx]
                out_str += net.get_node_id(parent_handle) + "=" + \
                net.get_outcome_id(parent_handle, coords[parent_idx])
        prob = cpt[elem_idx]
        out_str += ")=" + str(prob)
        print(out_str)
    
        
def index_to_coords(index, dim_sizes, coords):
    prod = 1
    for i in range(len(dim_sizes) - 1, -1, -1):
        coords[i] = int(index / prod) % dim_sizes[i]
        prod *= dim_sizes[i]

def pint_time_cpt_marix(net, nodeHandle):
    timeCPT = net.get_node_temporal_definition(nodeHandle, 1)
    
        
def plot_time_CPT(net, nodeHandle):    
    cpt = net.get_node_temporal_definition(nodeHandle, 1)
    print(len(cpt))
    print("###")
    
def print_net_info(net, unrolled = True):
    for n in net.get_all_nodes():
        print_node_info(net, n)
        if not unrolled and net.get_node_id(n) == NodeNames.UPS.value:
            plot_time_CPT(net, n)
            
def calc_stat(confMatrix, outcome, type='P'):
    TP = confMatrix[outcome][outcome]
    FP = 0
    TN = 0
    FN = 0
    for i in range(0, len(confMatrix)):
        for j in range(0, len(confMatrix[i])):
            if i == outcome and j != outcome:
                FP += confMatrix[i][j]
            if j == outcome and i != outcome:
                FN += confMatrix[i][j]
            if i != outcome and j != outcome:
                TN += confMatrix[i][j] 
                
    if type == 'P':
        if TP + FP == 0: return float('nan')
        return TP / (TP + FP)
    if type == 'A':
        if TP + TN + FP + FN == 0: return float('nan')
        return  (TP+TN) / (TP + TN + FP + FN)
    if type == 'R':
        if TP + FN == 0: return float('nan')
        return TP / (TP + FN)
    if type == 'F':
        if (2*TP)+FP+FN == 0: return float('nan')
        return (2*TP)/((2*TP)+FP+FN)        

def print_validator_results(net, originalSliceCount, validator, nodeId): 
    nodeHandle = classNodesHandles[nodeId]
    outcomeCount = net.get_outcome_count(nodeHandle)
    accMtrx = np.zeros((outcomeCount, originalSliceCount))
    precMtrx = np.zeros((outcomeCount, originalSliceCount))
    recMtrx = np.zeros((outcomeCount, originalSliceCount))
    fMtrx = np.zeros((outcomeCount, originalSliceCount))
    
    
    for slice in range(1, originalSliceCount):
        if DEBUG: print("### Slice " + str(slice) + " ###")
        nodeHandle = classNodesHandles[nodeId + "_" + str(slice)]
        cm = validator.get_confusion_matrix(nodeHandle)
        for i in range(0, outcomeCount):
            acc = calc_stat(cm, i, 'A')
            accMtrx[i][slice] = acc
            prec = calc_stat(cm, i, 'P')
            precMtrx[i][slice] = prec
            rec = calc_stat(cm, i, 'R')
            recMtrx[i][slice] = rec
            f = calc_stat(cm, i, 'F')
            fMtrx[i][slice] = f
            
            if DEBUG:
                print("Accuracy for " + nodeId + str(i) + ": " + str(acc))
                print("Precision for " + nodeId + str(i) + ": " + str(prec))
                print("Recall for " + nodeId + str(i) + ": " + str(rec))    
        if DEBUG:    
            print("** Confusion Matrix **")
            for i in range(0, outcomeCount):
                print(cm[i])
            print("")
    
    for i in range(0, outcomeCount):
        avgAcc = np.nanmean(accMtrx[i])
        print("Average Accuracy for " + nodeId + str(i) + ": " + str(avgAcc))
        avgPrec = np.nanmean(precMtrx[i])
        print("Average Precision for " + nodeId + str(i) + ": " + str(avgPrec))
        avgRec = np.nanmean(recMtrx[i])
        print("Average Recall for " + nodeId + str(i) + ": " + str(avgRec))
        avgF = np.nanmean(fMtrx[i])
        print("Average F-score for " + nodeId + str(i) + ": " + str(avgF))
        print("") 
    

In [123]:
# Create and read the DBN from file
net = pysmile.Network()
ds = pysmile.learning.DataSet()

net.read_file(os.getcwd() + "/../../../Genie-DBN/DBN-MITM.xdsl")
df = pd.read_csv(os.getcwd() + "/outTest.csv")
ds.read_pandas_dataframe(df)
   
unrolledNet = net.unroll().unrolled     
matching = ds.match_network(unrolledNet)
validator = pysmile.learning.Validator(unrolledNet, ds, matching)
# Set class nodes (those that will not be considered as evidence nodes)
for elem in NodeNames.__members__:
    if elem not in evidenceNodes:
        classNodesHandles[elem] = unrolledNet.get_node(elem)
        validator.add_class_node(classNodesHandles[elem])
        for slice in range(1, net.get_slice_count()):
            elemCat = elem + "_" + str(slice)
            classNodesHandles[elemCat] = unrolledNet.get_node(elemCat)
            validator.add_class_node(classNodesHandles[elemCat])
# Test the predctions on the class nodes            
validator.test()
print_validator_results(unrolledNet, net.get_slice_count(), validator, NodeNames.UPS.value)


if DEBUG:
    print_net_info(unrolledNet, unrolled=True)


    

Average Accuracy for UPS0: 0.46996000000000004
Average Precision for UPS0: 0.9582255282449331
Average Recall for UPS0: 0.4678112160573201
Average F-score for UPS0: 0.5190996251179999

Average Accuracy for UPS1: 0.46996000000000004
Average Precision for UPS1: 0.003221598706078759
Average Recall for UPS1: 0.875
Average F-score for UPS1: 0.006402630039061074



# On training set

Average Accuracy for UPS0: 0.7604799999999998
Average Precision for UPS0: 0.7003425965853435
Average Recall for UPS0: 0.6701652713135376
Average F-score for UPS0: 0.6781124034589371

Average Accuracy for UPS1: 0.7604799999999998
Average Precision for UPS1: 0.3861519273016367
Average Recall for UPS1: 0.43121426837065663
Average F-score for UPS1: 0.3913844159722906