In [1]:
import numpy as np
import pandas as pd
from Utilities.G2Dependency import *
from Utilities.ClassicMM import *
from ipynb.fs.defs.RandomWalk import RandomWalk
from ipynb.fs.defs.GraphReader import GraphReader

In [2]:
class RWPC():
    
    def __init__(self, alpha, delta, max_neighbours, n_iter):
        
        self._RW = RandomWalk(alpha, delta, max_neighbours)
        self._alpha = alpha
        self._G2Dep = None
        self._arrX= None
        self._n_iter = n_iter
        return
    
    def fit(self, X):
        """
        Fit training dataset X, completing G2Dep matrix and training RW
        """
        self._RW.fit(X)
        self._G2Dep = self._RW.get_G2Dep()
        self._arrX = arrayX(X)
        return
    
    
    def __rw_node_v2(self, t_index, t_iter, threshold, CPC):
        # will work at name level, then reconstruct list
        cpc_names = [x['name'].split('@')[0] for x in CPC]
        evaluation = self._RW.evaluate_node_index(t_index,t_iter)
        results = [x for x in sorted(evaluation["Node"+str(t_index+1)].items(), key=lambda x:x[1], reverse=True)]
        cpc_results = []
        g_ratio = (1+5**(1/2))/2
        set_results = [x[0] for x in results if x[0] in cpc_names and (x[1])>t_iter*g_ratio/(len(self._arrX))]
        
        set_returnable = []
        for i in range(len(CPC)-1,-1,-1):
            if CPC[i]['name'].split('@')[0] in set_results:
                set_returnable.append(CPC.pop(i))
        #print([x['name'] for x in CPC])
        return [set_returnable, CPC]
    
    def __exist_dseparator(self, TargetNode,Xi, Z, X, alpha):
        flagExist = False
        dsepSet=[]
        #counter=0
        #print_names(Z)
        for i in range(0,(2**len(Z))-1):
            IDsubsetZ_dec = i
            IDsubsetZ_bin = bin(IDsubsetZ_dec)
            subsetZ = getZsubset(IDsubsetZ_bin,Z)
            # no cache
            #print("from exist dseparator")
            dep = self._G2Dep.dependency(TargetNode,Xi,subsetZ, alpha)
            #print(subsetZ)
            #print(dep)
            if (dep==0):
                flagExist = True
                dsepSet = subsetZ
                break
        #print("Module exist d-separator: ",counter)
        return [flagExist,dsepSet]
    
    def __classic_filtering_phase(self, TargetNode, CPC, X, alpha):
        filtered_cpc = []
        if (len(CPC)>1):
            for i in range(len(CPC)-1,-1,-1):
                evaluated_node = CPC.pop(i)
                if self.__exist_dseparator(TargetNode,evaluated_node,CPC,X,alpha)[0] == False:
                    filtered_cpc.append(evaluated_node)
        else:
            return CPC
        return filtered_cpc
    
    
    def __rw_community_selection_phase_v2(self, TargetNode, CPC, X, alpha, n_iter):
        votation_arr = []
        if (len(CPC)>1):
            # will divide the sets in np.log(size)*2
            CPC = sorted(CPC,key=lambda x:self._G2Dep.dependency(TargetNode,x,[],alpha),reverse = True)
            size_sets = ceil(np.log(len(CPC))*2)
            print("Size set for community selection phase will be of maximum: "+str(size_sets))
            n_sets = ceil(len(CPC)/size_sets)
            CPC_copy = list(CPC)
            nodes_sets = []
            if (len(CPC)>5):
                while(len(CPC_copy)>0):
                    if len(CPC_copy) == 1:
                        nodes_sets.append([CPC_copy.pop(0)])
                        break
                        
                    rand_index = random.randint(0,len(CPC_copy))-1                    
                    rand_true_index = int(CPC_copy[rand_index]['name'].split('@')[0].split('Node')[1]) - 1
                    rand_node = CPC_copy.pop(rand_index)
                    rw_results = self.__rw_node_v2(rand_true_index, n_iter, size_sets, CPC_copy)
                    #rw_results = self.__rw_node_all(rand_true_index, n_iter, size_sets, CPC_copy)
                    node_set = [rand_node] + rw_results[0]
                    CPC_copy = rw_results[1]
                    nodes_sets.append(node_set)
            else:
                nodes_sets.append(CPC)
            pre_cpc = []
            for n_set in nodes_sets:
                print([x['name'] for x in n_set])
                if (len(n_set)>1):
                    print("n_set of length: "+str(len(n_set)))
                for node in self.__classic_filtering_phase(TargetNode, n_set, X, alpha):
                    if (len(n_set)==1):
                        print("n_set of length: 1")
                        pre_cpc.append(node) 
                        continue
                    pre_cpc.append(node)
            return pre_cpc
        else:
            return CPC
    
    
    def evaluate(self, index):
        start = time.clock()
        CPC=[]
        alpha = self._alpha
        X = self._arrX
        TargetNode = [x for x in self._arrX if x['name'].split('@')[0] == ("Node"+str(index+1))][0]
        Universe = arrayUniverse(TargetNode['name'], self._arrX)
        print("Entering Growing Phase")
        print("MMPC_beggining: \n"+str(len(Universe)))
        
        CPC = self.__rw_node_v2(index, self._n_iter, -1 , Universe)[0]
        
        print("Entering Pruning Phase")
        print("Size of Micro Universe: "+str(len(CPC)))
        
        n_deleted = -1
        while n_deleted != 0 :
            old_len = len(CPC)
            CPC = self.__rw_community_selection_phase_v2(TargetNode, CPC, X, self._alpha, self._n_iter)
            new_len = len(CPC)
            n_deleted = old_len - new_len
        
        
        stop = time.clock()
        results = {}
        results['CPC'] = CPC
        results['time'] = stop - start
        
        return results

In [3]:
X = pd.read_csv("/home/a20114261/sdelrio/alarm_datasets/Alarm10/Alarm10_s1000_v4.txt",delimiter='  ',header=None, engine='python')

In [4]:
RWPC_model = RWPC(0.05,0.0,1,10000)

Random Walk successful instantiation with:
-Alpha: 0.05
-Delta: 0.0
-Max Neighbours per run: 1


In [5]:
RWPC_model.fit(X)

Succesful instantiation of Dep object
Succesfully fitted array of 370 columns
and 1000 rows per column.


In [10]:
%%time
t_index = 103
CPC = RWPC_model.evaluate(t_index)

print("==================================")
print("Expected PC: ")
print(graph_reader.getPC(t_index))
print("Result is:")
print([x['name'] for x in CPC['CPC']])
print("Size of results:")
print(len(CPC['CPC']))
print("Distance is:")
print(graph_reader.getDistance(t_index,[x['name'] for x in CPC['CPC']]))

Entering Growing Phase
MMPC_beggining: 
369
Entering Pruning Phase
Size of Micro Universe: 4
Size set for community selection phase will be of maximum: 3
['Node77@3', 'Node66@3', 'Node38@4', 'Node42@3']
n_set of length: 4
Size set for community selection phase will be of maximum: 3
['Node77@3', 'Node66@3', 'Node42@3']
n_set of length: 3
Expected PC: 
['Node77', 'Node102', 'Node259']
Result is:
['Node42@3', 'Node66@3', 'Node77@3']
Size of results:
3
Distance is:
{'distance-mb': 1.0000337768737348, 'distance-pc': 1.0000334097426615}
CPU times: user 3.02 s, sys: 53.7 ms, total: 3.07 s
Wall time: 2.87 s


['Node76@4', 'Node96@4', 'Node78@4', 'Node133@4', 'Node23@4', 'Node39@4']

In [7]:
graph_reader = GraphReader('/home/a20114261/sdelrio/Alarm10_graph.txt')