## Load Embedding Methods and Datasets 

In [13]:
from semb.methods import load as load_method
from semb.methods import get_method_ids
for mid in get_method_ids():
    print(mid)
    load_method(mid)

graphwave
degree2
drne
node2vec
degree
role2vec
line
degree1
struc2vec
xnetmf
multilens
segk
riwalk


These are the method_id for the existing datasets.

In [2]:
from semb.datasets import load as load_dataset
from semb.datasets import get_dataset_ids
for did in get_dataset_ids():
    print(did)
    load_dataset(did)

BlogCatalog
ICEWS
Facebook
DD6
PPI
airports


These are the dataset_id for the existing datasets.

## Load Dataset

In [3]:
# Get airports datasets
DataProvider = load_dataset("airports")
Datasets = DataProvider().get_datasets()
dataset_graph = DataProvider().load_dataset(Datasets[0])

Note that there are three datasets in the airports dataset.

Datasets\[0\] represents the BR-air traffic Dataset

Datasets\[1\] represents the EU-air traffic Dataset

Datasets\[2\] represents the US-air traffic Dataset

In [4]:
# Example code for getting the other datasets
DataProvider = load_dataset("Facebook")
Facebook_dataset = DataProvider().get_datasets()
Facebook_graph = DataProvider().load_dataset(Facebook_dataset[0])

In [27]:
Facebook_graph

<networkx.classes.graph.Graph at 0x7fb609fa16d8>

In [None]:
# Example for 

## Get Embedding Result Using struc2vec

In [5]:
# Define a hyper-class to load the embedding method
EmbMethodClass = load_method("struc2vec")

# Call the embedding method with the graph for initialization
struc2vec = EmbMethodClass(dataset_graph, 
                           num_walks=10, 
                           walk_length=80, 
                           window_size=10, 
                           dim=128, 
                           opt1=True, opt2=True, opt3=True, until_layer=2)
struc2vec.train()

# Get the embedding result with the get_embeddings() method,
# The return type is a dictionary with key as node_id and value as the embedding
dict_struc2vec_emb = struc2vec.get_embeddings()

rm /Users/mark/GoogleDrive/UM/S4/GEMS/Git/StrucEmbeddingLibrary/semb/methods/struc2vec/pickles/weights_distances-layer-*.pickle


In [6]:
# This shows the tunable hyper-parameters for the certain embedding method
# Here, for example, list the tunable hyper-parameters for struc2vec
EmbMethodClass.__PARAMS__

{'dim': 128,
 'walk_length': 80,
 'num_walks': 10,
 'window_size': 10,
 'until_layer': None,
 'iter': 5,
 'workers': 1,
 'weighted': False,
 'directed': False,
 'opt1': False,
 'opt2': False,
 'opt3': False}

In [7]:
# This shows the inside structure of the returned embedding file
# The key is the node_id, the value is the embedding in list()
list(dict_struc2vec_emb.items())[:1]

[(7,
  [0.08667778223752975,
   0.3175113797187805,
   0.11984732002019882,
   0.08551565557718277,
   0.22592230141162872,
   -0.04005485400557518,
   0.04822433739900589,
   0.1058964803814888,
   -0.08130078762769699,
   0.5915679931640625,
   -0.2566221058368683,
   0.16664597392082214,
   0.5967034697532654,
   0.13798509538173676,
   -0.043815065175294876,
   -0.13155382871627808,
   0.15757058560848236,
   -0.004484705161303282,
   -0.24765531718730927,
   0.08927403390407562,
   -0.05873528867959976,
   -0.6947894096374512,
   0.36224365234375,
   -0.058237623423337936,
   0.04533327370882034,
   0.0029460384976118803,
   0.08026771247386932,
   -0.2877187132835388,
   -0.4461440145969391,
   0.7142013311386108,
   0.12455303966999054,
   0.2233635038137436,
   -0.314730703830719,
   -0.15512503683567047,
   0.4236886203289032,
   -0.4948304295539856,
   0.09092126786708832,
   -0.14706477522850037,
   -0.025397535413503647,
   -0.2967643439769745,
   0.13861005008220673,
   -0

## Use xNetMF to get the embedding

In [17]:
EmbMethodClassXnetmf = load_method("xnetmf")

In [15]:
EmbMethodClassXnetmf.__PARAMS__

{'dim': 128, 'max_layer': 2, 'discount': 0.1, 'gamma': 1}

In [18]:
xnetmf = EmbMethodClassXnetmf(dataset_graph,
                              dim = 128,
                              max_layer = 2,
                              discount = 0.1,
                              gamma = 1
                       )

In [21]:
xnetmf.train()
dict_xnetmf_emb = xnetmf.get_embeddings()

max degree:  80
got k hop neighbors in time:  0.04142189025878906
got degree sequences in time:  0.012885332107543945
computed representation in time:  0.008544921875


## Load Evaluation Library and Perform Evaluation

In [8]:
from semb.evaluations.classification import *
from semb.evaluations.clustering import *
from semb.evaluations.utils import *

### Perform Classification

In [9]:
# Read the label file with the get_label(fn) function
dict_labels = get_label("./sample-data/labels/airport_Brazil_label.txt")

Read in 131 node labels.
>>> Label 0 appears 32 times
>>> Label 1 appears 32 times
>>> Label 3 appears 35 times
>>> Label 2 appears 32 times


In [10]:
perform_classification(dict_struc2vec_emb, dict_labels)

{'overall': {'accuracy': {'mean': 0.7937, 'std': 0.0629},
  'f1_macro': {'mean': 0.7921, 'std': 0.0579},
  'f1_micro': {'mean': 0.7937, 'std': 0.0629},
  'auc_micro': {'mean': 0.9171, 'std': 0.033},
  'auc_macro': {'mean': 0.923, 'std': 0.0344}},
 'detailed': {0: {'accuracy': 0.8148,
   'f1_macro': 0.805,
   'f1_micro': 0.8148,
   'auc_micro': 0.9195,
   'auc_macro': 0.9333},
  1: {'accuracy': 0.6923,
   'f1_macro': 0.7035,
   'f1_micro': 0.6923,
   'auc_micro': 0.8565,
   'auc_macro': 0.8561},
  2: {'accuracy': 0.7692,
   'f1_macro': 0.7601,
   'f1_micro': 0.7692,
   'auc_micro': 0.9349,
   'auc_macro': 0.9355},
  3: {'accuracy': 0.8846,
   'f1_macro': 0.8769,
   'f1_micro': 0.8846,
   'auc_micro': 0.9551,
   'auc_macro': 0.9555},
  4: {'accuracy': 0.8077,
   'f1_macro': 0.8148,
   'f1_micro': 0.8077,
   'auc_micro': 0.9196,
   'auc_macro': 0.9348}}}

In [22]:
perform_classification(dict_xnetmf_emb, dict_labels)

{'overall': {'accuracy': {'mean': 0.7026, 'std': 0.0771},
  'f1_macro': {'mean': 0.6841, 'std': 0.0785},
  'f1_micro': {'mean': 0.7026, 'std': 0.0771},
  'auc_micro': {'mean': 0.9009, 'std': 0.0463},
  'auc_macro': {'mean': 0.91, 'std': 0.0459}},
 'detailed': {0: {'accuracy': 0.6667,
   'f1_macro': 0.6293,
   'f1_micro': 0.6667,
   'auc_micro': 0.8957,
   'auc_macro': 0.8976},
  1: {'accuracy': 0.5769,
   'f1_macro': 0.5673,
   'f1_micro': 0.5769,
   'auc_micro': 0.822,
   'auc_macro': 0.8336},
  2: {'accuracy': 0.8077,
   'f1_macro': 0.7939,
   'f1_micro': 0.8077,
   'auc_micro': 0.961,
   'auc_macro': 0.9662},
  3: {'accuracy': 0.7308,
   'f1_macro': 0.7049,
   'f1_micro': 0.7308,
   'auc_micro': 0.93,
   'auc_macro': 0.947},
  4: {'accuracy': 0.7308,
   'f1_macro': 0.725,
   'f1_micro': 0.7308,
   'auc_micro': 0.896,
   'auc_macro': 0.9056}}}

### Perform Clustering

In [11]:
perform_clustering(dict_struc2vec_emb, dict_labels)



{'overall': {'purity': [0.6793893129770993], 'nmi': [0.4854751062047489]}}

In [23]:
perform_clustering(dict_xnetmf_emb, dict_labels)



{'overall': {'purity': [0.4732824427480916], 'nmi': [0.29277522884736923]}}

## Perform Centrality Correlation

In [12]:
from semb.evaluations.centrality_correlation import *
centrality_correlation(dataset_graph, 
                       dict_struc2vec_emb, 
                       centrality='clustering_coeff', 
                       similarity='euclidean')

0.9438543649290735

In [25]:
from semb.evaluations.centrality_correlation import *
centrality_correlation(dataset_graph, 
                       dict_struc2vec_emb, 
                       centrality='pr', 
                       similarity='euclidean')

MethodKeywordUnAllowedException: Please choose centrality from ['degree', 'pagerank', 'betweeness', 'clustering_coeff']

In [26]:
from semb.evaluations.centrality_correlation import *
centrality_correlation(dataset_graph, 
                       dict_struc2vec_emb, 
                       centrality='pagerank', 
                       similarity='euclidean')

0.9758972882556818