In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve, roc_curve, auc

In [2]:
# Command to use the full widht of the screen
#from IPython.core.display import display, HTML
#display(HTML("<style>.container { width:100% !important; }/style>"))

In [3]:
def lcls(x, links):
    s = links.shape[0]
    lcl = np.zeros(s)
    nodes_r = np.unique(links[:,0])
    nodes_c = np.unique(links[:,1])
    ne_r = dict()
    ne_c = dict()
    for i in range(x.shape[0]):
        if any(i==nodes_r):
            ne_c[i] = np.argwhere(x[i,:])
    for i in range(x.shape[1]):
        if any(i==nodes_c):
            ne_r[i] = np.argwhere(x[:,i])
    for i in range(s):
        n_r = ne_r[links[i,1]].flatten()
        n_c = ne_c[links[i,0]].flatten()
        if x[links[i,0], links[i,1]]:
            n_r = n_r[n_r != links[i,0]]
            n_c = n_c[n_c != links[i,1]]
        sx = x[n_r][:,n_c]
        lcl[i] = np.sum(sx)
    return lcl

In [4]:
def create_LCL(S, links, l_param):
    if links[0].size != 0:
        lcl = lcls(S, np.array(links).T)
        p_lcl = lcl/np.sum(lcl)
        if np.isnan(p_lcl).all():
            if l_param < links[0].size:
                rand_edge = np.random.choice(links[0].size, size=l_param, replace=False)
            else:
                rand_edge = range(links[0].size)
        else:
            if l_param > np.count_nonzero(p_lcl):
                rand_edge = np.random.choice(links[0].size, size=np.count_nonzero(p_lcl), p=p_lcl, replace=False)
            else:
                rand_edge = np.random.choice(links[0].size, size=l_param, p=p_lcl, replace=False)
        a = links[0][rand_edge]
        b = links[1][rand_edge]
        S[a,b] =  1

In [5]:
def BALCL(init_nodes_x, final_nodes_x, init_nodes_y, final_nodes_y, m_param_x, m_param_y, l_param, Initial_Connectivity='all-to-all', LCL=True):
    assert init_nodes_x > 1
    assert init_nodes_y > 1
    assert init_nodes_x < final_nodes_x
    assert init_nodes_y < final_nodes_y
    assert init_nodes_x >= m_param_x
    assert init_nodes_y >= m_param_y

    #INITIAL NETWORK:
    A = np.zeros((final_nodes_x, final_nodes_y))
    if Initial_Connectivity == 'all-to-all':
        for i in range(init_nodes_x):
            for j in range(init_nodes_y):
                A[i,j] = 1
    elif Initial_Connectivity == 'my_random':
        for i in range(init_nodes_x):
            for j in range(init_nodes_y):
                A[i,j] = int(np.random.random()+0.5)
            if not A[i].any() != 0:
                #print("Empty row:", i, A[i])
                rand_column = np.random.choice(range(init_nodes_y))
                A[i, rand_column] = 1
        for j in range(init_nodes_y):
            if not A[:,j].any() != 0:
                #print("Empty column:", j, A[:,j])
                rand_row = np.random.choice(range(init_nodes_x))
                A[rand_row, j] = 1
    
    #NETWORK EVOLUTION:
    X = np.arange(init_nodes_x, final_nodes_x)
    Y = np.arange(init_nodes_y, final_nodes_y)
    x = len(X)
    y = len(Y)
    if x >= y:
        c = x/y
        for i in range(y):
            for j in range(int(c)):
                idx = j+i*int(c)
                p_deg_target = np.sum(A[:,:Y[i]], axis=0)
                p_deg_target /= np.sum(p_deg_target)
                rand_target = np.random.choice(Y[i], size=m_param_x, p=p_deg_target, replace=False)
                A[X[idx], rand_target] = 1
                if LCL:
                    S = A[:X[idx]+1, :Y[i]]
                    missing = np.nonzero(S==0)
                    create_LCL(S, missing, l_param)
            p_deg_source = np.sum(A[:X[idx]+1,:], axis=1)
            p_deg_source /= np.sum(p_deg_source)
            rand_source = np.random.choice(X[idx]+1, size=m_param_y, p=p_deg_source, replace=False)
            A[rand_source, Y[i]] = 1
            if LCL:
                S = A[:X[idx]+1, :Y[i]+1]
                missing = np.nonzero(S==0)
                create_LCL(S, missing, l_param)
        for k in range(idx+1, len(X)):
            p_deg_target = np.sum(A[:,:Y[i]], axis=0)
            p_deg_target /= np.sum(p_deg_target)
            rand_target = np.random.choice(Y[i], size=m_param_x, p=p_deg_target, replace=False)
            A[X[k], rand_target] = 1
            if LCL:
                S = A[:X[k]+1, :Y[i]+1]
                missing = np.nonzero(S==0)
                create_LCL(S, missing, l_param)
    else:
        c = y/x
        for i in range(x):
            for j in range(int(c)):
                idx = j+i*int(c)
                p_deg_source = np.sum(A[:X[i],:], axis=1)
                p_deg_source /= np.sum(p_deg_source)
                rand_source = np.random.choice(X[i], size=m_param_y, p=p_deg_source, replace=False)
                A[rand_source, Y[idx]] = 1
                if LCL:
                    S = A[:X[i], :Y[idx]+1]
                    missing = np.nonzero(S==0)
                    create_LCL(S, missing, l_param)
            p_deg_target = np.sum(A[:,:Y[idx]+1], axis=0)
            p_deg_target /= np.sum(p_deg_target)
            rand_target = np.random.choice(Y[idx]+1, size=m_param_x, p=p_deg_target, replace=False)
            A[X[i], rand_target] = 1
            if LCL:
                S = A[:X[i]+1, :Y[idx]+1]
                missing = np.nonzero(S==0)
                create_LCL(S, missing, l_param)
        for k in range(idx+1, len(Y)):
            p_deg_source = np.sum(A[:X[i],:], axis=1)
            p_deg_source /= np.sum(p_deg_source)
            rand_source = np.random.choice(X[i], size=m_param_y, p=p_deg_source, replace=False)
            A[rand_source, Y[k]] = 1
            if LCL:
                S = A[:X[i]+1, :Y[k]+1]
                missing = np.nonzero(S==0)
                create_LCL(S, missing, l_param)

    return A

In [6]:
testBALCL = BALCL(7, 20, 6, 16, 1, 1, 2, Initial_Connectivity='my_random', LCL=True)
testBALCL

array([[1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0.],
       [0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1., 1., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0.],
       [1., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1., 0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 1., 1., 1., 0., 1., 0., 0., 1., 0., 0., 1.],
       [1., 1., 0., 1., 1., 1., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0.],
       [0., 1., 1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [1., 1., 0., 1., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0.,

# Link prediction

In [7]:
def get_links_to_del(x, fraction=0.1, loops=1):
    present = np.argwhere(x)
    num_to_del = int(present.shape[0] * fraction)
    idx_to_del = [np.random.choice(present.shape[0], size=num_to_del, replace=False) for i in range(loops)]
    links_to_del = np.array([present[idx] for idx in idx_to_del])
    return links_to_del

In [8]:
def delete_links(x, links_to_del):
    """Deletes the specified links from an adjacency matrix."""
    z = np.copy(x)
    for link in links_to_del:
        z[link[0], link[1]] = 0
    return z

In [9]:
def evaluate_predictions(true, pred, plot=False):
    precision, recall, _ = precision_recall_curve(true, pred)
    fpr, tpr, _ = roc_curve(true, pred)
    aupr = auc(recall, precision)
    auroc = auc(fpr, tpr)
    if plot:
        fig, ax = plt.subplots()
        ax.plot(recall, precision, 'go', label='Precision-Recall curve')
        ax.plot(fpr, tpr, '.', label='ROC curve')
        plt.legend()
        plt.show()
    return aupr, auroc

In [10]:
def cross_val_from_notebook(adjacency, links_to_del, loops=1, raw_output=False, verbose=False):
    if raw_output:
        scores_list = []
        true_list = []
    else:
        results = np.zeros((loops, 2))
    for i in range(loops):
        if verbose:
            print('Trial {} of {}'.format(i+1, loops))
        x_ = delete_links(adjacency, links_to_del[i])
        missing = np.argwhere(x_ == 0)
        if verbose:
            print("missing", missing, missing.shape[0])
        if missing.shape[0] > 0:
            true_present = adjacency[missing[:,0], missing[:,1]]
            scores = lcls(x_, missing)
            if verbose:#true_present.any():
                print("adjacency")
                print(adjacency)
                print("x_")
                print(x_)
                print("missing:")
                print(missing, missing.shape[0])
                print("true_present", true_present)
                print("scores", scores)
            if raw_output:
                true_list.append(true_present)
                scores_list.append(scores)
            else:
                results[i] = evaluate_predictions(true_present, scores)#, plot=plot)
                #if not np.isnan(results[i]).any():
                #    print("results[i]", results[i])
        else:
            if verbose:
                print("WTF?")
            break
    if raw_output:
        return np.array(true_list).flatten(), np.asarray(scores_list).flatten()
    else:
        if verbose:
            print("results")
            print(results)
        return results

In [11]:
def running_all_parameters(init_nodes_x, final_nodes_x, init_nodes_y, final_nodes_y, l, fraction, loops, Initial_Connectivity, LCL):
    areas_under_curves = np.zeros((init_nodes_x, init_nodes_y, 2))
    for m_param_x in range(init_nodes_x):#+1):
        for m_param_y in range(init_nodes_y):#+1):
            B = BALCL(init_nodes_x, final_nodes_x, init_nodes_y, final_nodes_y, m_param_x, m_param_y, l, Initial_Connectivity, LCL)
            links_to_del = get_links_to_del(B, fraction, loops)
            r1 = cross_val_from_notebook(B, links_to_del, loops)
            areas_under_curves[m_param_x, m_param_y,:] = np.mean(r1, axis=0)
    return areas_under_curves

In [12]:
testRAP = running_all_parameters(5, 32, 4, 28, 2, fraction=0.1, loops=5, Initial_Connectivity='my_random', LCL=True)

  after removing the cwd from sys.path.


In [13]:
testRAP

array([[[0.92486937, 0.9988087 ],
        [0.95244039, 0.99932097],
        [0.97928737, 0.99218275],
        [0.82289703, 0.9965902 ]],

       [[0.83466412, 0.98203058],
        [0.43675167, 0.84350239],
        [0.28501789, 0.83010372],
        [0.36715991, 0.84294056]],

       [[0.61068755, 0.99059939],
        [0.22755726, 0.81135381],
        [0.34300683, 0.79348739],
        [0.23068266, 0.76132549]],

       [[0.94267022, 0.99891561],
        [0.32482165, 0.822454  ],
        [0.234961  , 0.73769142],
        [0.23000564, 0.76137948]],

       [[0.51753237, 0.9779042 ],
        [0.34815082, 0.87247184],
        [0.35172607, 0.82774115],
        [0.26856666, 0.76525419]]])

In [14]:
testRAP.shape

(5, 4, 2)