In [1]:
#import tensorflow as tf
import numpy as np
import pandas as pd
import scipy as sp
from scipy.stats import chi2
from math import floor
from sklearn.svm import LinearSVC
import operator
from functools import reduce
from Utilities.G2Dependency import *
from Utilities.ClassicMM import *
from ipynb.fs.defs.GraphReader import GraphReader
import sys
import os
from random import shuffle
from multiprocessing import Semaphore
import time

In [2]:
import multiprocessing.pool
from contextlib import closing
from functools import partial

class NoDaemonProcess(multiprocessing.Process):
    # make 'daemon' attribute always return False
    def _get_daemon(self):
        return False
    def _set_daemon(self, value):
        pass
    daemon = property(_get_daemon, _set_daemon)

# We sub-class multiprocessing.pool.Pool instead of multiprocessing.Pool
# because the latter is only a wrapper function, not a proper class.
class Pool(multiprocessing.pool.Pool):
    Process = NoDaemonProcess

In [3]:
class MMPC():
    
    def init(self,s):
        global semaphore
        semaphore = s
        return
    
   
    
    
    def __init__(self, alpha):
        
        self._alpha = alpha
        self._arrX= None
        return
        
    def fit(self, X):
        """
        Fit training dataset X
        """
        x_heads = []
        for i in range(0,len(X.keys())):
            max_v=1
            for row in X[X.keys()[i]]:
                if (row>max_v-1):
                    max_v+=row-(max_v-1)
            x_heads.append("Node"+str(i+1)+"@"+str(max_v))
        X.columns = x_heads
        
        self._arrX = arrayX(X)
        
        print("Finished fitting of X in MMPC object")
        return

        
    def evaluate(self, index):
        start = time.clock()
        CPC=[]
        alpha = self._alpha
        X = self._arrX
        TargetNode = [x for x in self._arrX if x['name'].split('@')[0] == ("Node"+str(index+1))][0]
        Universe = arrayUniverse(TargetNode['name'], self._arrX)
        
        
        CPC=[]
        print("Entering Phase I")
        print("MMPC_beggining: \n"+str(len(Universe)))
        while len(Universe)>0:
            t_start = time.time()
            CPC_old = list(CPC) # copy
            maxminheur=self.__mm_heuristic(TargetNode,CPC,list(Universe),X,alpha)
            F = maxminheur[0]
            assocF = maxminheur[1]
            Universe = maxminheur[2]
            if assocF > 0:
                CPC.append(F)
                indF=Universe.index(F)
                Universe.pop(indF)
            #if (len(CPC)==len(CPC_old)) or (len(CPC)>0.3*(len(Universe)-1)):
            if (len(CPC)==len(CPC_old)):
                break
            print("\nUniverse actual size:")
            print(len(Universe))
            print("CPC actual size:")
            print(len(CPC))
            print("CPC contents:")
            print([x['name'] for x in CPC])
            print("Time elapsed was:",time.time()-t_start)

        # Phase 2: Backward
        print("\nEntering Phase II")
        CPC=CPC[::-1]
        if len(CPC)>1:
            Z=list(CPC)
            for i in range(len(CPC)-1,-1,-1):
                # index is i
                Z.pop(i)
                #print("Analyzing D-separator for ",CPC[i]['name'])
                if self.__exist_dseparator(TargetNode,CPC[i],Z,X,alpha)[0] == True:
                    #print("it did exist! removing from cpc.")
                    CPC.pop(i)
        stop = time.clock()
        results = {}
        results['CPC'] = CPC
        results['time'] = stop-start
        
        return results
        
    def __exist_dseparator(self, TargetNode,Xi, Z, X, alpha):
        flagExist = False
        dsepSet=[]
        #counter=0
        #print_names(Z)
        for i in range((2**len(Z))-1,0,-1): # modded 20190226 00:00
            IDsubsetZ_dec = i
            IDsubsetZ_bin = bin(IDsubsetZ_dec)
            subsetZ = getZsubset(IDsubsetZ_bin,Z)
            # no cache
            #print("from exist dseparator")
            dep = self.__get_g2dep(TargetNode,Xi,subsetZ, alpha)
            #print(subsetZ)
            #print(dep)
            if (dep==0):
                flagExist = True
                dsepSet = subsetZ
                break
        #print("Module exist d-separator: ",counter)
        return [flagExist,dsepSet]

In [4]:
class MMPC(MMPC):
    def __mm_heuristic(self, TargetNode, CPC, Universe, X, alpha):
        F=[]
        assocF=-1
        Z = CPC
        fixedCondVars = []
        if (len(CPC)>0):
            Z = CPC[0:-1]           # all but the last one   
            fixedCondVars = [CPC[-1]] # we use last one
        for i in range(len(Universe)-1,-1,-1):
            if (len(Universe[i])==0):
                continue
            assoc = self.__min_assoc(TargetNode,Universe[i],Z,fixedCondVars,X,alpha)
            if (assoc>assocF):
                assocF = assoc
                F = Universe[i]
            if (assoc==0):
                Universe.pop(i)
        return [F,assocF,Universe]
    
    def __get_g2dep(self, TargetNode, Xi, fixedCondVars, alpha):
        """
        Instantiate new G2Dep object to calculate the dependency
        between two atributes and any conditional variables
        selected
        """
        
        old_stdout = sys.stdout
        sys.stdout = open(os.devnull, "w") # muting output
        
        G2Dep_object = G2Dependency(self._alpha, len(self._arrX[0]['data']))
        
        sys.stdout = old_stdout # unmuting output
        return G2Dep_object.dependency(TargetNode, Xi, fixedCondVars, alpha)
    
    
    def Dep_min_assoc(self,params_arr):
        lesser = 999
        counter = 0
        for e in params_arr:
            #print(len(e))
            if (semaphore.get_value()==1):
                result = self.__get_g2dep(*e)
                counter += 1
                if result == 0:
                    semaphore.acquire()
                    lesser = result
                    semaphore.release()
                    semaphore.release()
                    return [counter,result]
                else:
                    if lesser>result:
                        lesser = result
            else:
                return [counter,lesser]
        return [counter,lesser]
    
    
    
    def __min_assoc(self, TargetNode, Xi,Z, fixedCondVars, X, alpha):
        min_assoc=999
        if len(Z)==0:
            min_assoc = self.__get_g2dep(TargetNode, Xi, fixedCondVars, alpha)
            subsetZ_min_assoc = fixedCondVars        
        else:
            #print(2**len(Z)-1)
            
            if (len(Z)>=3):
                n_processes = 4
                s = Semaphore(1)
                p = Pool(n_processes,initializer=self.init,initargs=(s,))
                params_arr = [[TargetNode,Xi,fixedCondVars+getZsubset(bin(index),Z),alpha] for index in list(range(1,2**len(Z)))]
                batch_arr = []
                step = int(2**len(Z)/n_processes)
                for n in range(0,n_processes):
                    batch_arr.append(params_arr[n:n+step])
                results = p.map(partial(self.Dep_min_assoc),batch_arr)
                p.close()
                p.join()
                #print(results)
                for r in results:
                    if r[1]>min_assoc:
                        min_assoc = r[1]
                        if min_assoc == 0:
                            return min_assoc
                return min_assoc
            
            
            
            
            for IDsubsetZ_dec in range(0,2**len(Z)):
                IDsubsetZ_bin = bin(IDsubsetZ_dec)
                subsetZ = getZsubset(IDsubsetZ_bin,Z)            
                subsetZ_assoc=self.__get_g2dep(TargetNode, Xi, fixedCondVars+subsetZ,alpha)
                #counter+=1
                #print(subsetZ_assoc[IDsubsetZ_dec])
                if subsetZ_assoc < min_assoc:
                    min_assoc = subsetZ_assoc
                    subsetZ_min_assoc = fixedCondVars + subsetZ
                    if (min_assoc==0):
                        break
        return min_assoc
    
    def shuffle_rows(self):
        shuffle(self._arrX)
        
        return

In [5]:
MMPC_model = MMPC(0.05)

In [6]:
X = pd.read_csv("/home/a20114261//sdelrio/alarm_datasets/Alarm10/Alarm10_s500_v7.txt",delimiter='  ',header=None, engine='python')

In [11]:
len(X.columns)

370

In [7]:
MMPC_model.fit(X)

Finished fitting of X in MMPC object


In [8]:
def mmpc_parallel(params):
    MMPC_mode, node_index = params
    return MMPC_model.evaluate(node_index)

In [13]:
%%capture
t_start = time.time()
cpcs = Pool(4).map(mmpc_parallel, [[MMPC_model,index] for index in list(range(1,len(X.columns)))])
t_finish = time.time()-t_start

In [15]:
t_finish/60

52.1971186876297

In [10]:
cpcs[0]

{'CPC': [{'name': 'Node21@4',
   'data': [1,
    0,
    0,
    0,
    0,
    0,
    1,
    1,
    0,
    0,
    0,
    0,
    0,
    0,
    1,
    0,
    0,
    0,
    0,
    1,
    0,
    0,
    0,
    1,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    1,
    0,
    0,
    0,
    0,
    0,
    0,
    1,
    0,
    1,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    1,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    1,
    1,
    0,
    0,
    0,
    3,
    0,
    0,
    0,
    0,
    0,
    0,
    3,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    1,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    1,
    0,
    0,
    0,
    0,
    0,
    0,
    1,
    0,
    0,
    0,
    1,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    1,
    0,
    0,
   

In [17]:
%%time
cpc = MMPC_model.evaluate(1)
print("Results")
print(len([x['name'] for x in cpc['CPC']]))
print([x['name'] for x in cpc['CPC']])

Entering Phase I
MMPC_beggining: 
369

Universe actual size:
8
CPC actual size:
1
CPC contents:
['Node27@2']
Time elapsed was: 0.5831630229949951

Universe actual size:
7
CPC actual size:
2
CPC contents:
['Node27@2', 'Node29@3']
Time elapsed was: 0.02416825294494629

Universe actual size:
6
CPC actual size:
3
CPC contents:
['Node27@2', 'Node29@3', 'Node25@4']
Time elapsed was: 0.12342309951782227

Universe actual size:
5
CPC actual size:
4
CPC contents:
['Node27@2', 'Node29@3', 'Node25@4', 'Node1@4']
Time elapsed was: 0.07274436950683594
[[2, 0.8702538974028231], [2, 0.8904308292974297], [2, 1], [2, 1]]
[[2, 0.9994769917006887], [2, 1], [2, 1], [2, 1]]
[[2, 0.9481386552286999], [2, 1], [2, 1], [2, 1]]
[[2, 1], [2, 1], [2, 1], [2, 1]]
[[2, 1], [2, 1], [2, 1], [2, 1]]

Universe actual size:
4
CPC actual size:
5
CPC contents:
['Node27@2', 'Node29@3', 'Node25@4', 'Node1@4', 'Node239@2']
Time elapsed was: 0.7349390983581543
[[4, 0.9915331238199223], [4, 0.9966861767199289], [4, 0.9966861767

In [16]:
cpc

dict_keys(['CPC', 'time'])

In [32]:
%%time
cpc = MMPC_model.evaluate(1)
print("Results")
print(len([x['name'] for x in cpc]))
print([x['name'] for x in cpc])

Entering Phase I
MMPC_beggining: 
369

Universe actual size:
14
CPC actual size:
1
CPC contents:
['Node29@3']
Time elapsed was: 3.107295274734497

Universe actual size:
12
CPC actual size:
2
CPC contents:
['Node29@3', 'Node27@2']
Time elapsed was: 0.2096247673034668

Universe actual size:
11
CPC actual size:
3
CPC contents:
['Node29@3', 'Node27@2', 'Node26@3']
Time elapsed was: 0.5045194625854492

Universe actual size:
10
CPC actual size:
4
CPC contents:
['Node29@3', 'Node27@2', 'Node26@3', 'Node25@4']
Time elapsed was: 1.5386831760406494

Universe actual size:
9
CPC actual size:
5
CPC contents:
['Node29@3', 'Node27@2', 'Node26@3', 'Node25@4', 'Node23@4']
Time elapsed was: 4.577632904052734

Universe actual size:
8
CPC actual size:
6
CPC contents:
['Node29@3', 'Node27@2', 'Node26@3', 'Node25@4', 'Node23@4', 'Node1@4']
Time elapsed was: 8.32193636894226

Universe actual size:
7
CPC actual size:
7
CPC contents:
['Node29@3', 'Node27@2', 'Node26@3', 'Node25@4', 'Node23@4', 'Node1@4', 'Node

KeyboardInterrupt: 