In [2]:
from __future__ import division
########import##########
from multiprocessing.dummy import Pool
from collections import namedtuple
import numpy as np
import math
import datetime
import h5py
import sys
import networkx as nx

  from ._conv import register_converters as _register_converters


In [7]:
# Generate random multiplexes, check if the instance generated has super-diffusion or not and store the labeled examples in a list
# input: N number of nodes per layer
#        p1, p2 connection probabilities in layer one and two
#       iteration number of random examples generated
# output: data list of instances labeled; every element is composed by [p1, p2, Adj1, Adj2, SD] (SD==1 -> super-diffusion is present; SD==0 otherwise)
def SuperDiffusionHappens(N, p1, p2, iteration):
    
    # list that will contain the labeled instances
    data=[]

    # generate n instances
    for i in range(iteration):
    
        lambda1, lambda2, lambdaSup = 0, 0, 0
        
        ## create Multiplex
        G1 = nx.erdos_renyi_graph(N,p1)
        G2 = nx.erdos_renyi_graph(N,p2)

        
        ## algebraic connectivity of first layer. 
        L1 = nx.laplacian_matrix(G1).toarray()
        eigVal1, eigVec1 = np.linalg.eig(L1)
        eigVal1.sort()
        for e in eigVal1:
            if e>1e-10:
                lambda1=e
                break;

                
        ## algebraic connectivity of second layer.
        L2 = nx.laplacian_matrix(G2).toarray()
        eigVal2, eigVec2 = np.linalg.eig(L2)
        eigVal2.sort()
        for e in eigVal2:
            if e>1e-10:
                lambda2=e
                break;


        ## algebraic connectivity of superposition of layers. 
        LSup=0.5*(L1+L2)
        eigValSup,eigVecSup=np.linalg.eig(LSup)
        eigValSup.sort()
        for e in eigValSup:
            if e>1e-10:
                lambdaSup=e
                break;

        # check SD presence and append the instance to data
        if lambdaSup>=max(lambda1,lambda2):                  #S-D is present
            data.append((p1, p2, nx.adjacency_matrix(G1).toarray(), nx.adjacency_matrix(G2).toarray(), 1))
        else:                                                #S-D is NOT present
            data.append((p1, p2, nx.adjacency_matrix(G1).toarray(), nx.adjacency_matrix(G2).toarray(), 0))
                
    return data
    

In [8]:
########initialization##########

# list used for storing sequence of parameters fed to different threads
params=[]

# array that will collect the generated multiplex instances across the different threads
data=[]

# number of nodes in each layer of the multiplex
N=50

# CHANGE WITH ACTUAL NUMBER OF THREAD
numThreads=6

# Struct used for pass data to the diffusion function in threads 
SDParam = namedtuple("SDParam", "N P1 P2 iterations id_th")

In [9]:
# function called in the different threads, just call the SuperDiffusion and store the data
def callSuperDiff(param):
    for par in param:
        dat=SuperDiffusionHappens(par[0], par[1], par[2], par[3])
        data.extend(dat)      

In [17]:
# generate p_samples^2 * num_samples random multiplex instances labeled with super-diffusion presence or absence
# input: p_samples number of different connection probabilities generate
#        p_min min connection probability used
#        p_max max connection probability used
#        num_samples number of samples generated with a specific pair of connection probabilites
#        name name of the .h5 file storing the labeled instances
def Generation(p_samples, p_min, p_max, num_samples, name):
    # generate  p_samples^2 combinations of (p1, p2)
    P1, P2 = np.meshgrid(np.linspace(p_min, p_max, p_samples), np.linspace(p_min, p_max, p_samples))
    zipped = zip(P1.ravel(), P2.ravel())
    listZipped=list(zipped)
    
    print("length meshgrid: ", len(listZipped))
    
    #number of combinations that every thread need to process 
    fract=math.ceil(len(listZipped)/numThreads)
    print("meshgrid per thread: ", fract)

    # generate list of parameters needed to feed the threads 
    #print("++++++++++++++++++++")
    for i in range(0, numThreads):
        pars=[]
        for lisZ in listZipped[i*fract:(i+1)*fract]:
            pars.append(SDParam(N=N, P1=lisZ[0], P2=lisZ[1], iterations=num_samples, id_th=i))
        #print(i*fract)
        #print(len(pars))
        params.append(pars)
        #print("----------")

    # make the Pool of workers
    pool = Pool(numThreads) 

    print("Number of processes: ", pool._processes)
    start = datetime.datetime.now()

    # start the workers
    _ = pool.map(callSuperDiff, params)

    # close the pool and wait for the work to finish 
    pool.close() 
    pool.join() 

    elapsedTime = datetime.datetime.now()-start
    
    print( "generation of ", p_samples*p_samples*num_samples, " ", name, " examples took ", str(elapsedTime))
    
    # data structures used for collecting the instances
    data_p1 = np.empty(shape=(p_samples*p_samples*num_samples))          # connection probability of layer 1
    data_p2 = np.empty(shape=(p_samples*p_samples*num_samples))          # connection probability of layer 2
    data_A1 = np.zeros(shape=(p_samples*p_samples*num_samples, N, N))    # adjacency matrix of layer 1
    data_A2 = np.zeros(shape=(p_samples*p_samples*num_samples, N, N))    # adjacency matrix of layer 2
    data_Y = np.zeros(shape=(p_samples*p_samples*num_samples))           # presence 1 (or not 0) of super-diffusion

    # for each instance generated add p1, p2, A1, A2 and Y 
    i=0
    for dat in data:
        data_p1[i]=dat[0]
        data_p2[i]=dat[1]
        data_A1[i]=dat[2]
        data_A2[i]=dat[3]
        data_Y[i]=dat[4]
        i=i+1

    # open the .h5 file and write p1, p2, A1, A2 and Y in different datasets
    hf = h5py.File(name + '.h5', 'w')
    print(hf.name)
    print(hf.create_dataset(name+'_p1',  data=data_p1))
    print(hf.create_dataset(name+'_p2',  data=data_p2))
    print(hf.create_dataset(name+'_A1',  data=data_A1))
    print(hf.create_dataset(name+'_A2',  data=data_A2))
    print(hf.create_dataset(name+'_Y', data=data_Y))

    hf.close()

In [None]:
data=[]
params=[]
# example generating random multiplexes from a distribution of 50 by 50 p1 and p2 points
# both starting from 0.01 and ending to 0.99. for every possible combination generate 5 random instances
Generation(50, 0.01, 0.99, 5, "train")