## Load Embedding Methods and Datasets 

In [19]:
from semb.methods import load as load_method
from semb.methods import get_method_ids
for mid in get_method_ids():
    print(mid)
    load_method(mid)

graphwave
degree2
drne
node2vec
degree
role2vec
line
degree1
struc2vec
xnetmf
multilens
segk
riwalk


These are the method_id for the existing datasets.

In [20]:
from semb.datasets import load as load_dataset
from semb.datasets import get_dataset_ids
for did in get_dataset_ids():
    print(did)
    load_dataset(did)

BlogCatalog
ICEWS
Facebook
DD6
PPI
airports


These are the dataset_id for the existing datasets.

## Load Dataset

In [21]:
# Get airports datasets
DataProvider = load_dataset("airports")
Datasets = DataProvider().get_datasets()
dataset_graph = DataProvider().load_dataset(Datasets[0])

Note that there are three datasets in the airports dataset.

Datasets\[0\] represents the BR-air traffic Dataset

Datasets\[1\] represents the EU-air traffic Dataset

Datasets\[2\] represents the US-air traffic Dataset

In [22]:
# Example code for getting the other datasets
DataProvider = load_dataset("Facebook")
Facebook_dataset = DataProvider().get_datasets()
Facebook_graph = DataProvider().load_dataset(Facebook_dataset[0])

## Get Embedding Result Using struc2vec

In [23]:
# Define a hyper-class to load the embedding method
EmbMethodClass = load_method("struc2vec")

# Call the embedding method with the graph for initialization
struc2vec = EmbMethodClass(dataset_graph, 
                           num_walks=10, 
                           walk_length=80, 
                           window_size=10, 
                           dim=128, 
                           opt1=True, opt2=True, opt3=True, until_layer=2)
struc2vec.train()

# Get the embedding result with the get_embeddings() method,
# The return type is a dictionary with key as node_id and value as the embedding
dict_struc2vec_emb = struc2vec.get_embeddings()

rm /Users/mark/GoogleDrive/UM/S4/GEMS/Git/StrucEmbeddingLibrary/semb/methods/struc2vec/pickles/weights_distances-layer-*.pickle


In [24]:
# This shows the tunable hyper-parameters for the certain embedding method
# Here, for example, list the tunable hyper-parameters for struc2vec
EmbMethodClass.__PARAMS__

{'dim': 128,
 'walk_length': 80,
 'num_walks': 10,
 'window_size': 10,
 'until_layer': None,
 'iter': 5,
 'workers': 1,
 'weighted': False,
 'directed': False,
 'opt1': False,
 'opt2': False,
 'opt3': False}

In [25]:
# This shows the inside structure of the returned embedding file
# The key is the node_id, the value is the embedding in list()
list(dict_struc2vec_emb.items())[:1]

[(7,
  [0.08278467506170273,
   0.07463587075471878,
   0.11284095048904419,
   -0.38170960545539856,
   -0.44573143124580383,
   -0.3695821166038513,
   0.43749380111694336,
   0.05801921710371971,
   -0.06056896224617958,
   -0.25539231300354004,
   -0.28077754378318787,
   0.6167539358139038,
   -0.25711169838905334,
   -0.19481824338436127,
   -0.11672115325927734,
   0.31561076641082764,
   0.033818796277046204,
   0.17662350833415985,
   -0.20374169945716858,
   -0.061856288462877274,
   -0.01047682948410511,
   -0.34680068492889404,
   -0.047683726996183395,
   -0.3888072967529297,
   -0.3265305757522583,
   0.1867995709180832,
   -0.46184617280960083,
   0.16948775947093964,
   0.18607781827449799,
   -0.20094233751296997,
   0.23065787553787231,
   -0.17936761677265167,
   -0.008089302107691765,
   -0.00910465233027935,
   0.040275901556015015,
   0.15548869967460632,
   -0.28457310795783997,
   0.2947252094745636,
   0.02417849563062191,
   -0.19586694240570068,
   -0.0436645

## Load Evaluation Library and Perform Evaluation

In [26]:
from semb.evaluations.classification import *
from semb.evaluations.clustering import *
from semb.evaluations.utils import *

### Perform Classification

In [27]:
# Read the label file with the get_label(fn) function
dict_labels = get_label("./sample-data/labels/airport_Brazil_label.txt")

Read in 131 node labels.
>>> Label 0 appears 32 times
>>> Label 1 appears 32 times
>>> Label 3 appears 35 times
>>> Label 2 appears 32 times


In [28]:
perform_classification(dict_struc2vec_emb, dict_labels)

{'overall': {'accuracy': {'mean': 0.7863, 'std': 0.0622},
  'f1_macro': {'mean': 0.7774, 'std': 0.0612},
  'f1_micro': {'mean': 0.7863, 'std': 0.0622},
  'auc_micro': {'mean': 0.9234, 'std': 0.0288},
  'auc_macro': {'mean': 0.9314, 'std': 0.0261}},
 'detailed': {0: {'accuracy': 0.7778,
   'f1_macro': 0.747,
   'f1_micro': 0.7778,
   'auc_micro': 0.9255,
   'auc_macro': 0.9362},
  1: {'accuracy': 0.6923,
   'f1_macro': 0.7035,
   'f1_micro': 0.6923,
   'auc_micro': 0.8728,
   'auc_macro': 0.8881},
  2: {'accuracy': 0.7692,
   'f1_macro': 0.7448,
   'f1_micro': 0.7692,
   'auc_micro': 0.9359,
   'auc_macro': 0.9297},
  3: {'accuracy': 0.8846,
   'f1_macro': 0.8769,
   'f1_micro': 0.8846,
   'auc_micro': 0.961,
   'auc_macro': 0.97},
  4: {'accuracy': 0.8077,
   'f1_macro': 0.8148,
   'f1_micro': 0.8077,
   'auc_micro': 0.9216,
   'auc_macro': 0.9332}}}

### Perform Clustering

In [29]:
perform_clustering(dict_struc2vec_emb, dict_labels)



{'overall': {'purity': [0.6641221374045801], 'nmi': [0.4661859253253555]}}

## Perform Centrality Correlation

In [30]:
from semb.evaluations.centrality_correlation import *
centrality_correlation(dataset_graph, 
                       dict_struc2vec_emb, 
                       centrality='clustering_coeff', 
                       similarity='euclidean')

0.9482621398890526