## Load Embedding Methods and Datasets 

In [18]:
from semb.methods import load as load_method
from semb.methods import get_method_ids
for mid in get_method_ids():
    print(mid)
    load_method(mid)

graphwave
degree2
drne
node2vec
degree
role2vec
line
degree1
struc2vec
xnetmf
multilens
segk
riwalk


These are the method_id for the existing datasets.

In [19]:
from semb.datasets import load as load_dataset
from semb.datasets import get_dataset_ids
for did in get_dataset_ids():
    print(did)
    load_dataset(did)

BlogCatalog
ICEWS
Facebook
DD6
PPI
airports


These are the dataset_id for the existing datasets.

## Load Dataset

In [20]:
# Get airports datasets
DataProvider = load_dataset("airports")
Datasets = DataProvider().get_datasets()
dataset_graph = DataProvider().load_dataset(Datasets[0])

Note that there are three datasets in the airports dataset.

Datasets\[0\] represents the BR-air traffic Dataset

Datasets\[1\] represents the EU-air traffic Dataset

Datasets\[2\] represents the US-air traffic Dataset

In [21]:
# Example code for getting the other datasets
DataProvider = load_dataset("Facebook")
Facebook_dataset = DataProvider().get_datasets()
Facebook_graph = DataProvider().load_dataset(Facebook_dataset[0])

## Get Embedding Result Using struc2vec

In [22]:
# Define a hyper-class to load the embedding method
EmbMethodClass = load_method("struc2vec")

# Call the embedding method with the graph for initialization
struc2vec = EmbMethodClass(dataset_graph, 
                           num_walks=10, 
                           walk_length=80, 
                           window_size=10, 
                           dim=128, 
                           opt1=True, opt2=True, opt3=True, until_layer=2)
struc2vec.train()

# Get the embedding result with the get_embeddings() method,
# The return type is a dictionary with key as node_id and value as the embedding
dict_struc2vec_emb = struc2vec.get_embeddings()

rm /Users/mark/GoogleDrive/UM/S4/GEMS/Git/StrucEmbeddingLibrary/semb/methods/struc2vec/pickles/weights_distances-layer-*.pickle


In [29]:
# This shows the tunable hyper-parameters for the certain embedding method
# Here, for example, list the tunable hyper-parameters for struc2vec
EmbMethodClass.__PARAMS__

{'dim': 128,
 'walk_length': 80,
 'num_walks': 10,
 'window_size': 10,
 'until_layer': None,
 'iter': 5,
 'workers': 1,
 'weighted': False,
 'directed': False,
 'opt1': False,
 'opt2': False,
 'opt3': False}

## Load Evaluation Library and Perform Evaluation

In [24]:
from semb.evaluations.classification import *
from semb.evaluations.clustering import *
from semb.evaluations.utils import *

### Perform Classification

In [25]:
# Read the label file with the get_label(fn) function
dict_labels = get_label("./sample-data/labels/airport_Brazil_label.txt")

Read in 131 node labels.
>>> Label 0 appears 32 times
>>> Label 1 appears 32 times
>>> Label 3 appears 35 times
>>> Label 2 appears 32 times


In [26]:
perform_classification(dict_struc2vec_emb, dict_labels)

{'overall': {'accuracy': {'mean': 0.786, 'std': 0.0759},
  'f1_macro': {'mean': 0.7791, 'std': 0.0752},
  'f1_micro': {'mean': 0.786, 'std': 0.0759},
  'auc_micro': {'mean': 0.9288, 'std': 0.0255},
  'auc_macro': {'mean': 0.9413, 'std': 0.0182}},
 'detailed': {0: {'accuracy': 0.8148,
   'f1_macro': 0.805,
   'f1_micro': 0.8148,
   'auc_micro': 0.9374,
   'auc_macro': 0.9418},
  1: {'accuracy': 0.6538,
   'f1_macro': 0.6542,
   'f1_micro': 0.6538,
   'auc_micro': 0.8817,
   'auc_macro': 0.9083},
  2: {'accuracy': 0.7692,
   'f1_macro': 0.7448,
   'f1_micro': 0.7692,
   'auc_micro': 0.9438,
   'auc_macro': 0.9578},
  3: {'accuracy': 0.8846,
   'f1_macro': 0.8769,
   'f1_micro': 0.8846,
   'auc_micro': 0.9556,
   'auc_macro': 0.9585},
  4: {'accuracy': 0.8077,
   'f1_macro': 0.8148,
   'f1_micro': 0.8077,
   'auc_micro': 0.9255,
   'auc_macro': 0.9401}}}

### Perform Clustering

In [27]:
perform_clustering(dict_struc2vec_emb, dict_labels)



{'overall': {'purity': [0.6793893129770993], 'nmi': [0.4854751062047489]}}

## Perform Centrality Correlation

In [28]:
from semb.evaluations.centrality_correlation import *
centrality_correlation(dataset_graph, 
                       dict_struc2vec_emb, 
                       centrality='clustering_coeff', 
                       similarity='euclidean')

0.9379255572546902