In [3]:
import networkx as nx
import numpy as np
import os
import time
import pandas as pd
from itertools import chain
from collections import defaultdict
from gensim.models import Word2Vec
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score



# Pipeline

### Data Loader

We need to load networks into memory. Usually networks are organized as pairs of nodes. And sometimes different edges have different weights. Hence, we use networkx.DiGraph to store such structure information and attributes.

In [4]:
def load_data(file_name):
    """
    read edges from an edge file
    """
    edges = list()
    df = pd.read_csv(file_name)
    for idx, row in df.iterrows():
        user_id, friends = row["user_id"], eval(row["friends"])
        for friend in friends:
            # add each friend relation as an edge
            ########## begin ##########
            edges.append((user_id, friend))
            ########## end ##########
    edges = sorted(edges)
    
    return edges

def generate_false_edges(true_edges, num_false_edges=5):
    """
    generate false edges given true edges
    """
    nodes = list(set(chain.from_iterable(true_edges)))
    true_edges = set(true_edges)
    false_edges = set()
    while len(false_edges) < num_false_edges:
        # randomly sample two different nodes and check whether the pair exisit or not
        ########## begin ##########
        head, tail = np.random.choice(nodes, 2)
        if head != tail and (head, tail) not in true_edges and (head, tail) not in false_edges:
            false_edges.add((head, tail))
        ########## end ##########

    false_edges = sorted(false_edges)    
    return false_edges

def construct_graph_from_edges(edges):
    """
    generate a directed graph object given true edges
    DiGraph documentation: https://networkx.github.io/documentation/stable/reference/classes/digraph.html
    """
    # convert a list of edges {(u, v)} to a list of edges with weights {(u, v, w)}
    edge_weight = defaultdict(float)
    for e in edges:
        edge_weight[e] += 1.0
    weighed_edge_list = list()
    for e in sorted(edge_weight.keys()):
        weighed_edge_list.append((e[0], e[1], edge_weight[e]))
        
    graph = nx.DiGraph()
    graph.add_weighted_edges_from(weighed_edge_list)
    
    print("number of nodes:", graph.number_of_nodes())
    print("number of edges:", graph.number_of_edges())
    
    return graph

Firstly, we need to load edges into memory and use the networkx.DiGraph structure to store the graph.

In [6]:
user_file = "data/user.csv"
edges = load_data(user_file)
graph = construct_graph_from_edges(edges)

number of nodes: 3930
number of edges: 15688


### Random Walk Generator

Random walk generators or random walkers yield random walks that contain both local and higher-order neighborhood information. However, naive non-uniform sampling is very slow, which requires O(n) time complexity. Here alias sampling can reduce the time complexity to O(1) with O(n) space. If you are interested, please see the following blog.

In [8]:
def alias_setup(probs):
    """
    compute utility lists for non-uniform sampling from discrete distributions.
    details: https://lips.cs.princeton.edu/the-alias-method-efficient-sampling-with-many-discrete-outcomes/
    """
    K = len(probs)
    q = np.zeros(K)
    J = np.zeros(K, dtype=np.int)

    smaller = list()
    larger = list()
    for kk, prob in enumerate(probs):
        q[kk] = K * prob
        if q[kk] < 1.0:
            smaller.append(kk)
        else:
            larger.append(kk)

    while len(smaller) > 0 and len(larger) > 0:
        small = smaller.pop()
        large = larger.pop()

        J[small] = large
        q[large] = q[large] + q[small] - 1.0
        if q[large] < 1.0:
            smaller.append(large)
        else:
            larger.append(large)

    return J, q

def get_alias_node(graph, node):
    """
    get the alias node setup lists for a given node.
    """
    # get the unnormalized probabilities with the first-order information
    unnormalized_probs = list()
    for nbr in graph.neighbors(node):
        unnormalized_probs.append(graph[node][nbr]['weight'])
    unnormalized_probs = np.array(unnormalized_probs)
    if len(unnormalized_probs) > 0:
        normalized_probs = unnormalized_probs / unnormalized_probs.sum()
    else:
        normalized_probs = unnormalized_probs
        
    return alias_setup(normalized_probs)
    
def get_alias_edge(graph, src, dst, p=1, q=1):
    """
    get the alias edge setup lists for a given edge.
    """
    # get the unnormalized probabilities with the second-order information
    unnormalized_probs = list()
    for dst_nbr in graph.neighbors(dst):
        if dst_nbr == src: # distance is 0
            unnormalized_probs.append(graph[dst][dst_nbr]['weight'] / p)
        elif graph.has_edge(dst_nbr, src): # distance is 1
            unnormalized_probs.append(graph[dst][dst_nbr]['weight'])
        else: # distance is 2
            unnormalized_probs.append(graph[dst][dst_nbr]['weight'] / q)
    unnormalized_probs = np.array(unnormalized_probs)
    if len(unnormalized_probs) > 0:
        normalized_probs = unnormalized_probs / unnormalized_probs.sum()
    else:
        normalized_probs = unnormalized_probs

    return alias_setup(normalized_probs)

def preprocess_transition_probs(graph, p=1, q=1):
    """
    preprocess transition probabilities for guiding the random walks.
    """
    alias_nodes = dict()
    for node in graph.nodes():
        alias_nodes[node] = get_alias_node(graph, node)

    alias_edges = dict()
    for edge in graph.edges():
        alias_edges[edge] = get_alias_edge(graph, edge[0], edge[1], p=p, q=q)

    return alias_nodes, alias_edges

After that, we can use preprocess transition probabilities with the help of alias sampling.

In [9]:
alias_nodes, alias_edges = preprocess_transition_probs(graph, p=2, q=2)

The difference between DeepWalk and node2vec is how to generate random walks. The former only consider the first-order information while the latter also involves the second-order information.

In [10]:
def alias_draw(J, q):
    """
    draw sample from a non-uniform discrete distribution using alias sampling.
    """
    K = len(J)

    kk = int(np.floor(np.random.rand() * K))
    if np.random.rand() < q[kk]:
        return kk
    else:
        return J[kk]

    
def fallback(walk, fetch_last_num=1):
    if len(walk) > fetch_last_num:
        walk.pop()
        fetched = []
        for i in range(fetch_last_num):
            fetched.append(walk[-1-i])
        return walk, fetched
    else:
        return [], [None for _ in range(fetch_last_num)]

# generate the first order random walk
def generate_first_order_random_walk(graph, alias_nodes, 
                                     walk_length=10, start_node=None, verbose=False, force=True, max_tried=10):
    """
    simulate a random walk starting from start node and considering the first order information.
    """
    if start_node == None:
        start_node = np.random.choice(graph.nodes())
    walk = [start_node]
    cur = start_node
    num_tried = 0
    
    while len(walk) < walk_length:
        cur_nbrs = list(graph.neighbors(cur))
        if len(cur_nbrs) > 0:
            # sample the next node based on alias_nodes
            cur = cur_nbrs[alias_draw(*alias_nodes[cur])]
            walk.append(cur)
        else:
            if force:
                walk, fetched = fallback(walk, fetch_last_num=1)
                cur = fetched[0]
                if len(walk) == 0:
                    start_node = np.random.choice(graph.nodes())
                    walk = [start_node]
                    cur = start_node
                num_tried += 1
                if num_tried > max_tried: break
            else:
                break
    if verbose: print(f'walk of lenght {len(walk)} generated with {num_tried} trails')
    return walk
    
# second order random walk
def generate_second_order_random_walk(graph, alias_nodes, alias_edges, 
                                      walk_length=10, start_node=None, verbose=False, force=True, max_tried=10):
    """
    simulate a random walk starting from start node and considering the second order information.
    """
    if start_node == None:
        start_node = np.random.choice(graph.nodes())
    walk = [start_node]
    
    prev = None
    cur = start_node
    num_tried = 0
    
    while len(walk) < walk_length:
        cur_nbrs = list(graph.neighbors(cur))
        if len(cur_nbrs) > 0:
            if prev is None:
                # sample the next node based on alias_nodes
                prev, cur = cur, cur_nbrs[alias_draw(*alias_nodes[cur])]
            else:
                # sample the next node based on alias_edges
                prev, cur = cur, cur_nbrs[alias_draw(*alias_edges[(prev, cur)])]
            walk.append(cur)
        else:
            if force:
                walk, (cur, prev) = fallback(walk, fetch_last_num=2)
                if len(walk) == 0:
                    start_node = np.random.choice(graph.nodes())
                    walk = [start_node]
                    cur = start_node
                    prev = None
                num_tried += 1
                if num_tried > max_tried: break
            else:
                break
    if verbose: print(f'walk of lenght {len(walk)} generated with {num_tried} trails')
    return walk

Let's try to generate a first-order random walk and a second-order random walk.

In [38]:
### 行100步途中遇到的所有nodes, if we found 100 nodes, will return
generate_first_order_random_walk(graph, alias_nodes=alias_nodes,
                                 start_node="N6ZTMIue-2b30CJv2tyPGg", walk_length=100, force=True, verbose=True)

walk of lenght 100 generated with 5 trails


['N6ZTMIue-2b30CJv2tyPGg',
 'iDlkZO2iILS8Jwfdy7DP9A',
 'AtDUaCAPuSKR6E6QaSmmow',
 'UYcmGbelzRa0Q6JqzLoguw',
 '9Ms5wpxVloadWFvDbb77kg',
 'jRyO2V1pA4CdVVqCIOPc1Q',
 'xhhE0txKwQtRzgQVVdKkvg',
 's-591-mtIyP7F1Lffw98jw',
 'bLbSNkLggFnqwNNzzq-Ijw',
 'xFSLb_pxXta5G4oaRB1ylQ',
 'm-BZLIIh5PCAKnzH0qj_0Q',
 'jRyO2V1pA4CdVVqCIOPc1Q',
 'm-BZLIIh5PCAKnzH0qj_0Q',
 'IGBilULpgNHfKitLfrs7Hw',
 '2EuPAGalYnP7eSxPgFCNDg',
 'BQkC6RneYfvfG6wRe0hsvQ',
 'bLbSNkLggFnqwNNzzq-Ijw',
 'VgG_4NU41eZbpidLyfk3vw',
 'bLbSNkLggFnqwNNzzq-Ijw',
 '2JDWKHpR0g4QA4MHmFDprg',
 'yyDp7MZ2st7p0fOQuFYpcA',
 'w7RP1MZad3YO7Vjgp3s3Dg',
 'DWpDDBJvO_fLqhwEwY2joQ',
 '9bvlW5Gos1kxzvcM3c-12w',
 'DWpDDBJvO_fLqhwEwY2joQ',
 'H-ROZEeDUP5j4DjLXn8iOA',
 'zoyb93pskMhDgXZgv-DKtA',
 'AmMd7xpnaf8axS_roCBFRw',
 'bLbSNkLggFnqwNNzzq-Ijw',
 'q-v8elVPvKz0KvK69QSj1Q',
 'PkeDOqXbgEOkR-aKUHoQ_A',
 '_iQh8_WAt-FBmjGB_fJQ5w',
 'PkeDOqXbgEOkR-aKUHoQ_A',
 'BfnYXZEfWNdmW9VbWVeg7Q',
 'PkeDOqXbgEOkR-aKUHoQ_A',
 'BfnYXZEfWNdmW9VbWVeg7Q',
 '68rI8SriEjPYvGk81K1Dhw',
 

In [28]:
generate_second_order_random_walk(graph, alias_nodes=alias_nodes, alias_edges=alias_edges,
                                  start_node="N6ZTMIue-2b30CJv2tyPGg", walk_length=100, force=True, verbose=True)

walk of lenght 78 generated with 11 trails


['N6ZTMIue-2b30CJv2tyPGg',
 'iDlkZO2iILS8Jwfdy7DP9A',
 'N7E-CfqdME28dakWdEKNvw',
 'gqL5KBs2oS7qobnyd99iKg',
 'zTK1nPD2Hpa-ksSXsE-JzQ',
 'MQwSyZ2MZ6N7rtAmphZCow',
 'wUgRsMwL-BCreuMBgmFdWg',
 'AmMd7xpnaf8axS_roCBFRw',
 'F2av57ztcbYiPADtT-YpdA',
 '0d7gFJUi4cV3I7j1dHn9fQ',
 'F2av57ztcbYiPADtT-YpdA',
 'AmMd7xpnaf8axS_roCBFRw',
 'xhhE0txKwQtRzgQVVdKkvg',
 'JXHrhT72U6sZJQSkFfNzjw',
 'xhhE0txKwQtRzgQVVdKkvg',
 'Bj_MarPEKBe2xN12YimekQ',
 'bLbSNkLggFnqwNNzzq-Ijw',
 'doGsaahbqD7ePHP19UsDsg',
 'UYcmGbelzRa0Q6JqzLoguw',
 'renPzRDqMZpMaHiCD_e1_A',
 'xhhE0txKwQtRzgQVVdKkvg',
 'renPzRDqMZpMaHiCD_e1_A',
 'xFSLb_pxXta5G4oaRB1ylQ',
 'bLbSNkLggFnqwNNzzq-Ijw',
 'xFSLb_pxXta5G4oaRB1ylQ',
 'renPzRDqMZpMaHiCD_e1_A',
 'xhhE0txKwQtRzgQVVdKkvg',
 'IGBilULpgNHfKitLfrs7Hw',
 'renPzRDqMZpMaHiCD_e1_A',
 's-591-mtIyP7F1Lffw98jw',
 '2EuPAGalYnP7eSxPgFCNDg',
 'xhhE0txKwQtRzgQVVdKkvg',
 'Bj_MarPEKBe2xN12YimekQ',
 'JcNSd3dXmIMVHP2CUpvMHA',
 'kjaUSiRWhR9bF9KxOMbVvg',
 'SVH0qJBvYGLxrt6YuUdyyw',
 'Go1C5ZgO0jackKNTHxNHJw',
 

### Network Embedding Algorithms

In [39]:
def build_deepwalk(graph, alias_nodes, node_dim=10, num_walks=10, walk_length=10):
    """
    build a deepwalk model
    """
    print("building a DeepWalk model...", end="\t")
    st = time.time()
    np.random.seed(0)
    nodes = list(graph.nodes())
    walks = list()
    # generate random walks
    for walk_iter in range(num_walks):
        np.random.shuffle(nodes)
        for node in nodes:
            walks.append(generate_first_order_random_walk(graph, alias_nodes, walk_length=walk_length, start_node=node))
        
    walk_lens = [len(w) for w in walks]
    if len(walk_lens) > 0:
        avg_walk_len = sum(walk_lens) / len(walk_lens)
    else:
        avg_walk_len = 0.0
    print("number of walks: %d\taverage walk length: %.4f" % (len(walks), avg_walk_len), end="\t")
    
    # train a skip-gram model for these walks
    model = Word2Vec(walks, size=node_dim, window=3, min_count=0, sg=1, workers=os.cpu_count(), iter=10)
    print("trainig time: %.4f" % (time.time()-st))
    
    return model

def build_node2vec(graph, alias_nodes, alias_edges, node_dim=10, num_walks=10, walk_length=10, seed=1):
    """
    build a node2vec model
    """
    print("building a node2vec model...", end="\t")
    st = time.time()
    np.random.seed(0)
    nodes = list(graph.nodes())
    walks = list()
    # generate random walks
    for walk_iter in range(num_walks):
        np.random.shuffle(nodes)
        for node in nodes:
            walks.append(generate_second_order_random_walk(graph, alias_nodes, alias_edges, walk_length=walk_length, start_node=node))
            
    walk_lens = [len(w) for w in walks]
    if len(walk_lens) > 0:
        avg_walk_len = sum(walk_lens) / len(walk_lens)
    else:
        avg_walk_len = 0.0    
    print("number of walks: %d\taverage walk length: %.4f" % (len(walks), avg_walk_len), end="\t")
    
    # train a skip-gram model for these walks
    model = Word2Vec(walks, size=node_dim, window=3, min_count=0, sg=1, workers=os.cpu_count(), iter=10, seed=1)
    print("trainig time: %.4f" % (time.time()-st))
    
    return model

And we can build a DeepWalk model and a node2vec model. Here we set p=q=0.5 so that the walker will not go very far away from the start node.

In [40]:
deepwalk = build_deepwalk(graph, alias_nodes, node_dim=10, num_walks=10, walk_length=10)

building a DeepWalk model...	number of walks: 39300	average walk length: 8.5928	

TypeError: __init__() got an unexpected keyword argument 'size'

In [11]:
node2vec = build_node2vec(graph, alias_nodes, alias_edges, node_dim=10, num_walks=10, walk_length=10)

building a node2vec model...	number of walks: 39300	average walk length: 9.8948	trainig time: 13.6509


### Scorer

In [12]:
def get_cosine_sim(model, u, v):
    """
    get the cosine similarity between two nodes
    """
    try:
        u = model.wv[u]
        v = model.wv[v]
        return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))
    except:
        return 0

def get_auc_score(model, true_edges, false_edges):
    """
    get the auc score
    """
    y_true = [1] * len(true_edges) + [0] * len(false_edges)
    
    y_score = list()
    for e in true_edges:
        y_score.append(get_cosine_sim(model, e[0], e[1]))
    for e in false_edges:
        y_score.append(get_cosine_sim(model, e[0], e[1]))
    
    return roc_auc_score(y_true, y_score)

### Try them over a Real-life Network

Let's see the node embeddings of three nodes, and cosine similarities of two edges.

In [13]:
print("node embedding (\"N6ZTMIue-2b30CJv2tyPGg\"):",
      deepwalk.wv["N6ZTMIue-2b30CJv2tyPGg"])
print("node embedding (\"N7E-CfqdME28dakWdEKNvw\"):",
      deepwalk.wv["N7E-CfqdME28dakWdEKNvw"])
print("node embedding (\"MmlJSLDg-IFaeXb5wdJbgg\"):",
      deepwalk.wv["MmlJSLDg-IFaeXb5wdJbgg"])
print("true edge (\"N6ZTMIue-2b30CJv2tyPGg\", \"N7E-CfqdME28dakWdEKNvw\"):",
      get_cosine_sim(deepwalk, "N6ZTMIue-2b30CJv2tyPGg", "N7E-CfqdME28dakWdEKNvw"))
print("false edge (\"N6ZTMIue-2b30CJv2tyPGg\", \"MmlJSLDg-IFaeXb5wdJbgg\"):",
      get_cosine_sim(deepwalk, "N6ZTMIue-2b30CJv2tyPGg", "MmlJSLDg-IFaeXb5wdJbgg"))

node embedding ("N6ZTMIue-2b30CJv2tyPGg"): [-0.36232373 -0.72744584 -0.47010353  0.17939933 -0.04121303  0.92036295
  0.8701034  -0.97390413  0.69072515  0.7475717 ]
node embedding ("N7E-CfqdME28dakWdEKNvw"): [ 0.5813705  -1.162643   -0.00645171 -0.22482896  0.17096843  0.9237789
  1.5884651  -1.3143146   1.1850793   1.0500975 ]
node embedding ("MmlJSLDg-IFaeXb5wdJbgg"): [ 1.0545577   0.11629698  0.23369302 -3.375658    0.02999506  2.4090235
  0.42507368 -1.683669    0.5016573  -0.13335882]
true edge ("N6ZTMIue-2b30CJv2tyPGg", "N7E-CfqdME28dakWdEKNvw"): 0.87795585
false edge ("N6ZTMIue-2b30CJv2tyPGg", "MmlJSLDg-IFaeXb5wdJbgg"): 0.3330828


# Link Prediction

Link prediction is a task to prediction unseen edges based on graph information. Let's use cross validation to check their performance in the link prediction task.

In [14]:
def case(kfold=5, node_dim=10, num_walks=10, walk_length=5, p=0.5, q=0.5):
    np.random.seed(0)

    deepwalk_auc_scores = list()
    node2vec_auc_scores = list()
    kf = KFold(n_splits=kfold, shuffle=True)
    for k, (train_idx, valid_idx) in enumerate(kf.split(edges)):
        # split edges into training and validation
        train_edges = [edges[idx] for idx in train_idx]
        valid_edges = [edges[idx] for idx in valid_idx]
        # generate the same validation size of false edges
        false_edges = generate_false_edges(edges, num_false_edges=len(valid_edges))

        # construct the graph and preprocess transition probabilities
        graph = construct_graph_from_edges(train_edges)
        alias_nodes, alias_edges = preprocess_transition_probs(graph, p=p, q=q)

        # build models and get auc scores
        model = build_deepwalk(graph, alias_nodes,
                               node_dim=node_dim, num_walks=num_walks, walk_length=walk_length)
        deepwalk_auc_scores.append(get_auc_score(model, valid_edges, false_edges))

        model = build_node2vec(graph, alias_nodes, alias_edges,
                               node_dim=node_dim, num_walks=num_walks, walk_length=walk_length)
        node2vec_auc_scores.append(get_auc_score(model, valid_edges, false_edges))

    deepwalk_auc_scores = np.array(deepwalk_auc_scores)
    node2vec_auc_scores = np.array(node2vec_auc_scores)
    print("DeepWalk: avg auc score: %.4f\tstd: %.4f" % (deepwalk_auc_scores.mean(), deepwalk_auc_scores.std()))
    print("node2vec: avg auc score: %.4f\tstd: %.4f" % (node2vec_auc_scores.mean(), node2vec_auc_scores.std()))

In [15]:
case(kfold=5, node_dim=2, num_walks=2, walk_length=2, p=0.5, q=0.5)

number of nodes: 3574
number of edges: 12550
building a DeepWalk model...	number of walks: 7148	average walk length: 2.0000	trainig time: 1.2047
building a node2vec model...	number of walks: 7148	average walk length: 2.0000	trainig time: 1.2289
number of nodes: 3610
number of edges: 12550
building a DeepWalk model...	number of walks: 7220	average walk length: 2.0000	trainig time: 1.1850
building a node2vec model...	number of walks: 7220	average walk length: 2.0000	trainig time: 1.2523
number of nodes: 3639
number of edges: 12550
building a DeepWalk model...	number of walks: 7278	average walk length: 2.0000	trainig time: 1.2255
building a node2vec model...	number of walks: 7278	average walk length: 2.0000	trainig time: 1.2693
number of nodes: 3582
number of edges: 12551
building a DeepWalk model...	number of walks: 7164	average walk length: 2.0000	trainig time: 1.2548
building a node2vec model...	number of walks: 7164	average walk length: 2.0000	trainig time: 1.2015
number of nodes: 360

In [16]:
case(kfold=5, node_dim=5, num_walks=5, walk_length=5, p=0.5, q=0.5)

number of nodes: 3574
number of edges: 12550
building a DeepWalk model...	number of walks: 17870	average walk length: 4.2996	trainig time: 2.4550
building a node2vec model...	number of walks: 17870	average walk length: 4.9248	trainig time: 5.3298
number of nodes: 3610
number of edges: 12550
building a DeepWalk model...	number of walks: 18050	average walk length: 4.3637	trainig time: 2.5155
building a node2vec model...	number of walks: 18050	average walk length: 4.9461	trainig time: 5.2827
number of nodes: 3639
number of edges: 12550
building a DeepWalk model...	number of walks: 18195	average walk length: 4.3013	trainig time: 2.5971
building a node2vec model...	number of walks: 18195	average walk length: 4.9227	trainig time: 5.6862
number of nodes: 3582
number of edges: 12551
building a DeepWalk model...	number of walks: 17910	average walk length: 4.3045	trainig time: 2.4990
building a node2vec model...	number of walks: 17910	average walk length: 4.9412	trainig time: 5.6011
number of no

In [17]:
case(kfold=5, node_dim=5, num_walks=5, walk_length=5, p=0.25, q=0.25)

number of nodes: 3574
number of edges: 12550
building a DeepWalk model...	number of walks: 17870	average walk length: 4.2996	trainig time: 2.5157
building a node2vec model...	number of walks: 17870	average walk length: 4.9193	trainig time: 5.3296
number of nodes: 3610
number of edges: 12550
building a DeepWalk model...	number of walks: 18050	average walk length: 4.3637	trainig time: 2.6554
building a node2vec model...	number of walks: 18050	average walk length: 4.9467	trainig time: 5.2629
number of nodes: 3639
number of edges: 12550
building a DeepWalk model...	number of walks: 18195	average walk length: 4.3013	trainig time: 2.6548
building a node2vec model...	number of walks: 18195	average walk length: 4.9294	trainig time: 5.5435
number of nodes: 3582
number of edges: 12551
building a DeepWalk model...	number of walks: 17910	average walk length: 4.3045	trainig time: 2.5526
building a node2vec model...	number of walks: 17910	average walk length: 4.9456	trainig time: 5.4681
number of no

In [18]:
case(kfold=5, node_dim=5, num_walks=5, walk_length=5, p=1, q=1)

number of nodes: 3574
number of edges: 12550
building a DeepWalk model...	number of walks: 17870	average walk length: 4.2996	trainig time: 2.4469
building a node2vec model...	number of walks: 17870	average walk length: 4.9239	trainig time: 5.3298
number of nodes: 3610
number of edges: 12550
building a DeepWalk model...	number of walks: 18050	average walk length: 4.3637	trainig time: 2.6087
building a node2vec model...	number of walks: 18050	average walk length: 4.9474	trainig time: 5.2453
number of nodes: 3639
number of edges: 12550
building a DeepWalk model...	number of walks: 18195	average walk length: 4.3013	trainig time: 2.7338
building a node2vec model...	number of walks: 18195	average walk length: 4.9265	trainig time: 6.2089
number of nodes: 3582
number of edges: 12551
building a DeepWalk model...	number of walks: 17910	average walk length: 4.3045	trainig time: 2.5516
building a node2vec model...	number of walks: 17910	average walk length: 4.9487	trainig time: 5.8662
number of no

In [19]:
case(kfold=5, node_dim=10, num_walks=10, walk_length=10, p=0.5, q=0.5)

number of nodes: 3574
number of edges: 12550
building a DeepWalk model...	number of walks: 35740	average walk length: 8.3403	trainig time: 5.3430
building a node2vec model...	number of walks: 35740	average walk length: 9.6946	trainig time: 11.5061
number of nodes: 3610
number of edges: 12550
building a DeepWalk model...	number of walks: 36100	average walk length: 8.5036	trainig time: 5.2952
building a node2vec model...	number of walks: 36100	average walk length: 9.8084	trainig time: 11.0941
number of nodes: 3639
number of edges: 12550
building a DeepWalk model...	number of walks: 36390	average walk length: 8.4013	trainig time: 5.5094
building a node2vec model...	number of walks: 36390	average walk length: 9.7810	trainig time: 11.5867
number of nodes: 3582
number of edges: 12551
building a DeepWalk model...	number of walks: 35820	average walk length: 8.3423	trainig time: 5.2447
building a node2vec model...	number of walks: 35820	average walk length: 9.7650	trainig time: 11.5482
number o

In [20]:
case(kfold=5, node_dim=10, num_walks=10, walk_length=10, p=0.25, q=0.25)

number of nodes: 3574
number of edges: 12550
building a DeepWalk model...	number of walks: 35740	average walk length: 8.3403	trainig time: 5.2807
building a node2vec model...	number of walks: 35740	average walk length: 9.6844	trainig time: 11.4601
number of nodes: 3610
number of edges: 12550
building a DeepWalk model...	number of walks: 36100	average walk length: 8.5036	trainig time: 5.4052
building a node2vec model...	number of walks: 36100	average walk length: 9.7904	trainig time: 11.0065
number of nodes: 3639
number of edges: 12550
building a DeepWalk model...	number of walks: 36390	average walk length: 8.4013	trainig time: 5.7579
building a node2vec model...	number of walks: 36390	average walk length: 9.7606	trainig time: 11.8396
number of nodes: 3582
number of edges: 12551
building a DeepWalk model...	number of walks: 35820	average walk length: 8.3423	trainig time: 5.2714
building a node2vec model...	number of walks: 35820	average walk length: 9.7490	trainig time: 11.4608
number o