## Load Embedding Methods and Datasets 

In [60]:
from semb.methods import load as load_method
from semb.methods import get_method_ids
for mid in get_method_ids():
    print(mid)
    load_method(mid)

graphwave
degree2
drne
node2vec
degree
role2vec
line
degree1
struc2vec
xnetmf
multilens
segk
riwalk


These are the method_id for the existing datasets.

In [61]:
from semb.datasets import load as load_dataset
from semb.datasets import get_dataset_ids
for did in get_dataset_ids():
    print(did)
    load_dataset(did)

BlogCatalog
ICEWS
Facebook
DD6
PPI
airports


These are the dataset_id for the existing datasets.

## Load Dataset

In [62]:
# Get airports datasets
DataProvider = load_dataset("airports")
Datasets = DataProvider().get_datasets()
dataset_graph = DataProvider().load_dataset(Datasets[0])

In [63]:
dataset_graph

<networkx.classes.graph.Graph at 0x7fb5eec14b00>

Note that there are three datasets in the airports dataset.

Datasets\[0\] represents the BR-air traffic Dataset

Datasets\[1\] represents the EU-air traffic Dataset

Datasets\[2\] represents the US-air traffic Dataset

In [64]:
# Example code for getting the other datasets
DataProvider = load_dataset("Facebook")
Facebook_dataset = DataProvider().get_datasets()
Facebook_graph = DataProvider().load_dataset(Facebook_dataset[0])

In [65]:
Facebook_graph

<networkx.classes.graph.Graph at 0x7fb5eec14710>

## Get Embedding Result Using struc2vec

In [66]:
# Define a hyper-class to load the embedding method
EmbMethodClass = load_method("struc2vec")

# Call the embedding method with the graph for initialization
struc2vec = EmbMethodClass(dataset_graph, 
                           num_walks=10, 
                           walk_length=80, 
                           window_size=10, 
                           dim=128, 
                           opt1=True, opt2=True, opt3=True, until_layer=2)
struc2vec.train()

# Get the embedding result with the get_embeddings() method,
# The return type is a dictionary with key as node_id and value as the embedding
dict_struc2vec_emb = struc2vec.get_embeddings()

rm /Users/mark/GoogleDrive/UM/S4/GEMS/Git/StrucEmbeddingLibrary/semb/methods/struc2vec/pickles/weights_distances-layer-*.pickle


In [67]:
# This shows the tunable hyper-parameters for the certain embedding method
# Here, for example, list the tunable hyper-parameters for struc2vec
EmbMethodClass.__PARAMS__

{'dim': 128,
 'walk_length': 80,
 'num_walks': 10,
 'window_size': 10,
 'until_layer': None,
 'iter': 5,
 'workers': 1,
 'weighted': False,
 'directed': False,
 'opt1': False,
 'opt2': False,
 'opt3': False}

In [68]:
# This shows the inside structure of the returned embedding file
# The key is the node_id, the value is the embedding in list()
list(dict_struc2vec_emb.items())[:1]

[(7,
  [0.2464672178030014,
   0.5599985718727112,
   0.08907857537269592,
   0.2639751136302948,
   0.09001614898443222,
   -0.08490040153265,
   -0.25430381298065186,
   -0.31040051579475403,
   0.45050930976867676,
   0.5215111970901489,
   -0.43628236651420593,
   0.7037710547447205,
   0.3541853129863739,
   -0.014188420958817005,
   -0.24672721326351166,
   -0.16237299144268036,
   0.11043626815080643,
   0.017458433285355568,
   -0.21678726375102997,
   0.29864057898521423,
   -0.18566207587718964,
   -0.81545490026474,
   0.22579821944236755,
   0.011725102551281452,
   -0.18718162178993225,
   -0.34135153889656067,
   0.025627631694078445,
   0.21843190491199493,
   -0.22646835446357727,
   0.21700456738471985,
   0.16845807433128357,
   0.20939825475215912,
   -0.6749688386917114,
   -0.25944414734840393,
   0.10627547651529312,
   0.02292950078845024,
   -0.2502276301383972,
   -0.07110105454921722,
   0.35667163133621216,
   -0.2711651921272278,
   -0.17586614191532135,
   

## Use xNetMF to get the embedding

In [69]:
EmbMethodClassXnetmf = load_method("xnetmf")

In [70]:
EmbMethodClassXnetmf.__PARAMS__

{'dim': 128, 'max_layer': 2, 'discount': 0.1, 'gamma': 1}

In [71]:
xnetmf = EmbMethodClassXnetmf(dataset_graph,
                              dim = 128,
                              max_layer = 2,
                              discount = 0.1,
                              gamma = 1
                       )

In [72]:
xnetmf.train()
dict_xnetmf_emb = xnetmf.get_embeddings()

max degree:  80
got k hop neighbors in time:  0.0725870132446289
got degree sequences in time:  0.024322032928466797
computed representation in time:  0.011445999145507812


## Graph input as nx.Graph()

In [73]:
complete_graph = nx.complete_graph(100)

In [74]:
xnetmf = EmbMethodClassXnetmf(complete_graph,
                              dim = 128,
                              max_layer = 2,
                              discount = 0.1,
                              gamma = 1
                       )
xnetmf.train()
xnetmf.get_embeddings()

max degree:  99
got k hop neighbors in time:  0.1983051300048828
got degree sequences in time:  0.01099705696105957
computed representation in time:  0.009054183959960938


{0: [-1.0,
  -3.20532487364298e-24,
  2.5849394142282096e-24,
  5.492996255234946e-25,
  -3.980806697911443e-24,
  -5.686866711302062e-25,
  2.1842738050228372e-24,
  -5.1698788284564195e-26,
  -2.7917345673664666e-24,
  -1.0339757656912839e-25,
  1.4992648602523617e-24,
  4.1746771539785586e-24,
  -1.3053944041852459e-24,
  -1.809457589959747e-25,
  1.3958672836832333e-24,
  4.048661357534933e-24,
  -1.7060600133906185e-24,
  -1.1632227364026945e-25,
  1.996865697491292e-24,
  6.591595506281935e-25,
  4.607654505861784e-24,
  1.349015256800347e-25,
  -2.1971985020939783e-24,
  -2.1131879711315614e-24,
  1.2924697071141049e-26,
  -1.0985992510469891e-24,
  -1.2409728672212772e-24,
  -2.455692443516799e-25,
  3.812785635986609e-25,
  1.0178198943523575e-24,
  1.4734154661100796e-24,
  -5.1698788284564195e-26,
  -3.051440199139707e-24,
  -1.042053701360747e-24,
  -1.2246150474906145e-24,
  3.2634860104631147e-25,
  -1.5638883456080668e-24,
  5.169878828456419e-25,
  3.2222885385488527e-2

In other words, the only part that requires the user's hand engineering is to read the graph into networkx form. There are many [built-in functions](https://networkx.org/documentation/stable//reference/readwrite/index.html) in networkx for reading graph from different formats.

## Load Evaluation Library and Perform Evaluation

In [76]:
from semb.evaluations.classification import *
from semb.evaluations.clustering import *
from semb.evaluations.utils import *

### Perform Classification

In [78]:
# Read the label file with the get_label(fn) function
dict_labels = get_label("./sample-data/labels/airport_Brazil_label.txt")

Read in 131 node labels.
>>> Label 0 appears 32 times
>>> Label 1 appears 32 times
>>> Label 3 appears 35 times
>>> Label 2 appears 32 times


In [79]:
perform_classification(dict_struc2vec_emb, dict_labels)

{'overall': {'accuracy': {'mean': 0.7783, 'std': 0.0791},
  'f1_macro': {'mean': 0.7711, 'std': 0.0804},
  'f1_micro': {'mean': 0.7783, 'std': 0.0791},
  'auc_micro': {'mean': 0.9271, 'std': 0.0308},
  'auc_macro': {'mean': 0.9361, 'std': 0.0287}},
 'detailed': {0: {'accuracy': 0.8148,
   'f1_macro': 0.805,
   'f1_micro': 0.8148,
   'auc_micro': 0.9328,
   'auc_macro': 0.9389},
  1: {'accuracy': 0.6538,
   'f1_macro': 0.6542,
   'f1_micro': 0.6538,
   'auc_micro': 0.8713,
   'auc_macro': 0.8886},
  2: {'accuracy': 0.7308,
   'f1_macro': 0.7045,
   'f1_micro': 0.7308,
   'auc_micro': 0.9403,
   'auc_macro': 0.9348},
  3: {'accuracy': 0.8846,
   'f1_macro': 0.8769,
   'f1_micro': 0.8846,
   'auc_micro': 0.965,
   'auc_macro': 0.979},
  4: {'accuracy': 0.8077,
   'f1_macro': 0.8148,
   'f1_micro': 0.8077,
   'auc_micro': 0.926,
   'auc_macro': 0.9393}}}

In [80]:
perform_classification(dict_xnetmf_emb, dict_labels)

{'overall': {'accuracy': {'mean': 0.7028, 'std': 0.0865},
  'f1_macro': {'mean': 0.6845, 'std': 0.089},
  'f1_micro': {'mean': 0.7028, 'std': 0.0865},
  'auc_micro': {'mean': 0.9015, 'std': 0.0463},
  'auc_macro': {'mean': 0.9102, 'std': 0.0456}},
 'detailed': {0: {'accuracy': 0.6296,
   'f1_macro': 0.5916,
   'f1_micro': 0.6296,
   'auc_micro': 0.8967,
   'auc_macro': 0.9004},
  1: {'accuracy': 0.5769,
   'f1_macro': 0.5673,
   'f1_micro': 0.5769,
   'auc_micro': 0.822,
   'auc_macro': 0.8336},
  2: {'accuracy': 0.8077,
   'f1_macro': 0.7939,
   'f1_micro': 0.8077,
   'auc_micro': 0.9596,
   'auc_macro': 0.9641},
  3: {'accuracy': 0.7692,
   'f1_macro': 0.7446,
   'f1_micro': 0.7692,
   'auc_micro': 0.9329,
   'auc_macro': 0.9488},
  4: {'accuracy': 0.7308,
   'f1_macro': 0.725,
   'f1_micro': 0.7308,
   'auc_micro': 0.8964,
   'auc_macro': 0.904}}}

### Perform Clustering

In [81]:
perform_clustering(dict_struc2vec_emb, dict_labels)



{'overall': {'purity': [0.6641221374045801], 'nmi': [0.4676404566911835]}}

In [82]:
perform_clustering(dict_xnetmf_emb, dict_labels)



{'overall': {'purity': [0.48091603053435117], 'nmi': [0.26573864312927054]}}

## Perform Centrality Correlation

In [83]:
from semb.evaluations.centrality_correlation import *
centrality_correlation(dataset_graph, 
                       dict_struc2vec_emb, 
                       centrality='clustering_coeff', 
                       similarity='euclidean')

0.9420442902918806

In [52]:
from semb.evaluations.centrality_correlation import *
centrality_correlation(dataset_graph, 
                       dict_struc2vec_emb, 
                       centrality='pr', 
                       similarity='euclidean')

MethodKeywordUnAllowedException: Please choose centrality from ['degree', 'pagerank', 'betweeness', 'clustering_coeff']

In [84]:
from semb.evaluations.centrality_correlation import *
centrality_correlation(dataset_graph, 
                       dict_struc2vec_emb, 
                       centrality='pagerank', 
                       similarity='euclidean')

0.973746069127092