# Load Embedding Methods and Datasets


In [1]:
from semb.methods import load as load_method
from semb.methods import get_method_ids
for mid in get_method_ids():
    print(mid)
    load_method(mid)

graphwave
degree2
drne


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


node2vec
degree
role2vec
line
degree1
struc2vec
xnetmf
multilens
segk
riwalk




These are the method_id for the existing datasets.

In [2]:
from semb.datasets import load as load_dataset
from semb.datasets import get_dataset_ids
for did in get_dataset_ids():
    print(did)
    load_dataset(did)

BlogCatalog
ICEWS
Facebook
DD6
PPI
airports


These are the dataset_id for the existing datasets.

# Load Dataset

In [3]:
# Get airports datasets
DataProvider = load_dataset("airports")
Datasets = DataProvider().get_datasets()
dataset_graph = DataProvider().load_dataset(Datasets[0])

Note that there are three datasets in the airports dataset.

Datasets[0] represents the BR-air traffic Dataset

Datasets[1] represents the EU-air traffic Dataset

Datasets[2] represents the US-air traffic Dataset

In [4]:
# Example code for getting the other datasets
DataProvider = load_dataset("Facebook")
Facebook_dataset = DataProvider().get_datasets()
Facebook_graph = DataProvider().load_dataset(Facebook_dataset[0])

# Get Embedding Result Using struc2vec

In [5]:
# Define a hyper-class to load the embedding method
EmbMethodClass = load_method("struc2vec")

# Call the embedding method with the graph for initialization
struc2vec = EmbMethodClass(dataset_graph, 
                           num_walks=10, 
                           walk_length=80, 
                           window_size=10, 
                           dim=128, 
                           opt1=True, opt2=True, opt3=True, until_layer=2)
struc2vec.train()

# Get the embedding result with the get_embeddings() method,
# The return type is a dictionary with key as node_id and value as the embedding
dict_struc2vec_emb = struc2vec.get_embeddings()

rm /Users/mark/GoogleDrive/UM/S4/GEMS/Git/StrucEmbeddingLibrary/semb/methods/struc2vec/pickles/weights_distances-layer-*.pickle


In [6]:
# This shows the tunable hyper-parameters for the certain embedding method
# Here, for example, list the tunable hyper-parameters for struc2vec
EmbMethodClass.__PARAMS__

{'dim': 128,
 'walk_length': 80,
 'num_walks': 10,
 'window_size': 10,
 'until_layer': None,
 'iter': 5,
 'workers': 1,
 'weighted': False,
 'directed': False,
 'opt1': False,
 'opt2': False,
 'opt3': False}

In [7]:
# This shows the inside structure of the returned embedding file
# The key is the node_id, the value is the embedding in list()
list(dict_struc2vec_emb.items())[:1]

[(7,
  [-0.4232161343097687,
   -0.12211604416370392,
   -0.41314584016799927,
   0.31680214405059814,
   0.22780832648277283,
   0.16798822581768036,
   0.3995887339115143,
   0.2065640687942505,
   0.43101632595062256,
   0.1483970731496811,
   0.3616706430912018,
   -0.16688808798789978,
   0.36248403787612915,
   -0.05844362452626228,
   -0.05580306053161621,
   0.48257818818092346,
   -0.10159474611282349,
   -0.17438405752182007,
   0.43539392948150635,
   0.2525126338005066,
   -0.036251772195100784,
   -0.19434180855751038,
   -0.1814160943031311,
   -0.19217780232429504,
   0.25638559460639954,
   -0.11004286259412766,
   0.10969080030918121,
   0.2960025370121002,
   -0.17926089465618134,
   -0.3447902798652649,
   0.3123073875904083,
   0.17308878898620605,
   -0.5789359211921692,
   -0.08227355778217316,
   -0.1508754938840866,
   -0.2065512239933014,
   -0.26680704951286316,
   -0.10857447981834412,
   0.1815626472234726,
   0.11504188925027847,
   0.37738704681396484,
   

# Load Evaluation Library and Perform Evaluation

In [8]:
from semb.evaluations.classification import *
from semb.evaluations.clustering import *
from semb.evaluations.utils import *

## Perform Classification

In [9]:
# Read the label file with the get_label(fn) function
dict_labels = get_label("./sample-data/labels/airport_Brazil_label.txt")

Read in 131 node labels.
>>> Label 0 appears 32 times
>>> Label 1 appears 32 times
>>> Label 3 appears 35 times
>>> Label 2 appears 32 times


In [10]:
perform_classification(dict_struc2vec_emb, dict_labels)

{'overall': {'accuracy': {'mean': 0.771, 'std': 0.0545},
  'f1_macro': {'mean': 0.7633, 'std': 0.0566},
  'f1_micro': {'mean': 0.771, 'std': 0.0545},
  'auc_micro': {'mean': 0.9183, 'std': 0.0278},
  'auc_macro': {'mean': 0.9211, 'std': 0.0277}},
 'detailed': {0: {'accuracy': 0.7778,
   'f1_macro': 0.7515,
   'f1_micro': 0.7778,
   'auc_micro': 0.9223,
   'auc_macro': 0.9218},
  1: {'accuracy': 0.6923,
   'f1_macro': 0.7035,
   'f1_micro': 0.6923,
   'auc_micro': 0.8664,
   'auc_macro': 0.8688},
  2: {'accuracy': 0.7308,
   'f1_macro': 0.7045,
   'f1_micro': 0.7308,
   'auc_micro': 0.9413,
   'auc_macro': 0.9383},
  3: {'accuracy': 0.8462,
   'f1_macro': 0.8421,
   'f1_micro': 0.8462,
   'auc_micro': 0.9433,
   'auc_macro': 0.9487},
  4: {'accuracy': 0.8077,
   'f1_macro': 0.8148,
   'f1_micro': 0.8077,
   'auc_micro': 0.9181,
   'auc_macro': 0.9281}}}