In [1]:
from kce.evaluate import pre_process, node_classification_pipeline
from kce.walk_based.deepwalk import DeepWalk
from kce.walk_based.corewalk import CoreWalkLinear, CoreWalkPower, CoreWalkSigmoid

from sklearn.linear_model import LogisticRegression
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np


In [2]:
path_to_data = '../data/'

In [3]:
seed=11

In [4]:
graph = nx.read_gml(path_to_data + "dblp.gml")
graph = pre_process(graph)


In [5]:
max_k = max(nx.core_number(graph).values())
print(max_k)

26


In [6]:
print(len(graph.nodes), len(graph.edges))

27199 66832


In [8]:
def plot_nwalks(k_max, model):
    K = range(1, k_max+1)
    plt.plot(K, [model._n_walks(k, k_max) for k in K])
    plt.xlabel("Core index")
    plt.ylabel("Number of random walks")
    plt.grid()
    plt.show()

In [9]:
def disp_res(res):
    print("RW gen time: ", res["rw_gen_time"])
    print("Embedding training time: ", res["embed_train_time"])
    print("Micro F1", res["micro_f1"])
    print("Macro F1: ", res["macro_f1"])

## Train the embeddings

In [10]:
args = dict(out_dim=100, n_walks=20, walk_length=20, win_size=5)

dw = DeepWalk(**args)
cwl = CoreWalkLinear(coef=20/max_k, **args)
cwp = CoreWalkPower(pow=2, **args)
cws = CoreWalkSigmoid(**args)

### Vanilla DeepWalk

In [29]:
model = LogisticRegression(penalty='l2', solver='lbfgs', multi_class='ovr')
resdw = node_classification_pipeline(graph, dw, model)

In [35]:
disp_res(resdw)

RW gen time:  731.1468095779419
Embedding training time:  247.66105604171753
Micro F1 0.5767156862745098
Macro F1:  0.48405795964523557


### Linear scaled DeepWalk

In [None]:
model = LogisticRegression(penalty='l2', solver='lbfgs', multi_class='ovr')
rescwl = node_classification_pipeline(graph, cwl, model)

In [None]:
plot_nwalks(max_k, cwl)
disp_res(rescwl)

### Quadratic scaled DeepWalk

In [None]:
model = LogisticRegression(penalty='l2', solver='lbfgs', multi_class='ovr')
rescwp = node_classification_pipeline(graph, cwp, model)

In [None]:
plot_nwalks(max_k, cwp)
disp_res(rescwp)

### Sigmoid scaled DeepWalk

In [None]:
model = LogisticRegression(penalty='l2', solver='lbfgs', multi_class='ovr')
rescws = node_classification_pipeline(graph, cws, model)

In [None]:
plot_nwalks(max_k, cws)
disp_res(rescws)