## Load Embedding Methods and Datasets 

In [3]:
from semb.methods import load as load_method
from semb.methods import get_method_ids
for mid in get_method_ids():
    print(mid)
    load_method(mid)

graphwave
degree2
drne
node2vec
degree
role2vec
line
degree1
struc2vec
xnetmf
multilens
segk
riwalk


In [4]:
from semb.datasets import load as load_dataset
from semb.datasets import get_dataset_ids
for did in get_dataset_ids():
    print(did)
    load_dataset(did)

airports


## Get Embedding Result Using struc2vec

In [22]:
# Define a hyper-class to load the embedding method
EmbMethodClass = load_method("struc2vec")

# Get airports datasets
AirportDataProvider = load_dataset("airports")
airport_datasets = AirportDataProvider().get_datasets()
brazil_airport_graph = AirportDataProvider().load_dataset(airport_datasets[0])

# Call the embedding method with the graph for initialization
struc2vec = EmbMethodClass(brazil_airport_graph, 
                           num_walks=10, 
                           walk_length=80, 
                           window_size=10, 
                           dim=128, 
                           opt1=True, opt2=True, opt3=True, until_layer=2)
struc2vec.train()

# Get the embedding result with the get_embeddings() method,
# The return type is a dictionary with key as node_id and value as the embedding
dict_struc2vec_emb = struc2vec.get_embeddings()

rm /Users/mark/GoogleDrive/UM/S4/GEMS/Git/StrucEmbeddingLibrary/semb/methods/struc2vec/pickles/weights_distances-layer-*.pickle


In [27]:
# This shows the tunable parameters for the certain embedding method
EmbMethodClass.__PARAMS__

{'dim': 128,
 'walk_length': 80,
 'num_walks': 10,
 'window_size': 10,
 'until_layer': None,
 'iter': 5,
 'workers': 1,
 'weighted': False,
 'directed': False,
 'opt1': False,
 'opt2': False,
 'opt3': False}

## Load Evaluation Library and Perform Evaluation

In [28]:
from semb.evaluations.classification import *
from semb.evaluations.clustering import *
from semb.evaluations.utils import *

### Perform Classification

In [29]:
# Read the label file with the get_label(fn) function
dict_labels = get_label("./sample-data/labels/airport_Brazil_label.txt")

Read in 131 node labels.
>>> Label 0 appears 32 times
>>> Label 1 appears 32 times
>>> Label 3 appears 35 times
>>> Label 2 appears 32 times


In [31]:
perform_classification(dict_struc2vec_emb, dict_labels)

{'overall': {'accuracy': {'mean': 0.7633, 'std': 0.0787},
  'f1_macro': {'mean': 0.7548, 'std': 0.0765},
  'f1_micro': {'mean': 0.7633, 'std': 0.0787},
  'auc_micro': {'mean': 0.9182, 'std': 0.0327},
  'auc_macro': {'mean': 0.9224, 'std': 0.0301}},
 'detailed': {0: {'accuracy': 0.7778,
   'f1_macro': 0.7515,
   'f1_micro': 0.7778,
   'auc_micro': 0.9204,
   'auc_macro': 0.9298},
  1: {'accuracy': 0.6154,
   'f1_macro': 0.6209,
   'f1_micro': 0.6154,
   'auc_micro': 0.858,
   'auc_macro': 0.866},
  2: {'accuracy': 0.7692,
   'f1_macro': 0.7448,
   'f1_micro': 0.7692,
   'auc_micro': 0.9413,
   'auc_macro': 0.926},
  3: {'accuracy': 0.8462,
   'f1_macro': 0.8421,
   'f1_micro': 0.8462,
   'auc_micro': 0.9527,
   'auc_macro': 0.9561},
  4: {'accuracy': 0.8077,
   'f1_macro': 0.8148,
   'f1_micro': 0.8077,
   'auc_micro': 0.9186,
   'auc_macro': 0.9339}}}

### Perform Clustering

In [32]:
perform_clustering(dict_struc2vec_emb, dict_labels)



{'overall': {'purity': [0.6412213740458015], 'nmi': [0.4771373196787525]}}

## Perform Centrality Correlation

In [None]:
from semb.evaluations.centrality_correlation import *
centrality_correlation(brazil_airport_graph, 
                       dict_struc2vec_emb, 
                       centrality='clustering_coeff', 
                       similarity='euclidean')