#Packages for Drive Files

In [None]:
# import packages
## for mount drive purpose
import os
from google.colab import drive

#Mount Drive

In [None]:
# mount drive
drive.mount('/content/drive/', force_remount=True)
os.chdir('/content/drive/My Drive/Colab_Notebooks/Graph_ML/semi_dr.shen')

Mounted at /content/drive/


# import ipynb packages

In [None]:
!pip install import-ipynb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting import-ipynb
  Downloading import_ipynb-0.1.4-py3-none-any.whl (4.1 kB)
Installing collected packages: import-ipynb
Successfully installed import-ipynb-0.1.4


In [None]:
!ls

 citeseer
 citeseer.edges
 citeseer.node_labels
 cora
 cora.edges
 cora.node_labels
 Graph_Embed_Cluster_Semi_supervise.ipynb
 Graph_Embed_Cluster_Semi_supervise_test_10.ipynb
 Graph_Embed_node2vec.ipynb
'GraphEncoder(Dr.Shen).ipynb'
 pecanpy_test
 PubMed
 PubMed.edges
 PubMed.node_labels
 Results.ipynb
 run.ipynb
 semi_supervised_learning.ipynb
 test_cases.ipynb
 test_LDA_supervise.ipynb
 test_node2vec.ipynb


In [None]:
import import_ipynb
from test_cases import Model, Case

# Test Cases 

## Test Form

In [None]:
# importing the module
import pandas as pd
list_of_data = [
        [ False, False, False, 0, 0.95],
        ['False', 'False', 'True', 0, 0.95],
        ['False', 'True', 'True', 0, 0.95],
        ['False', 'True', 'False', 0, 0.95],        
        ['True', 'False', 'False', 0, 0.95],
        ['True', 'False', 'True', 0, 0.95],
        ['True', 'True', 'False', 0, 0.95],
        ['True', 'True', 'True', 0, 0.95]
        ]
df = pd.DataFrame(list_of_data,
index=['set_01','set_02','set_03','set_04','set_05','set_06','set_07','set_08'],
columns=['Laplacian','DiagA', 'Correlation', 'Accuracy', 'Time(s)'])

df = df.style.format({
  'Time(s)': '{:0.2f}',
})

display(df)

Unnamed: 0,Laplacian,DiagA,Correlation,Accuracy,Time(s)
set_01,False,False,False,0,0.95
set_02,False,False,True,0,0.95
set_03,False,True,True,0,0.95
set_04,False,True,False,0,0.95
set_05,True,False,False,0,0.95
set_06,True,False,True,0,0.95
set_07,True,True,False,0,0.95
set_08,True,True,True,0,0.95


## Real datasets

In [None]:
import math

class RealDataSet:
  def __init__(self, edg_file, node_file):
    self.X = None  # edg_list
    self.n = None
    self.Y = None
    self.edg_file = edg_file
    self.node_file = node_file
  
  def get_initial_values(self):
    realSet = copy.deepcopy(self)

    label_dict, map_new_old_keys = self.read_node_file(self.node_file)
    n = self.get_n(label_dict)
    
    if map_new_old_keys:
      X = self.read_edge_file_with_remap(self.edg_file, n, map_new_old_keys)
    else:
      X = self.read_edge_file(self.edg_file, n)
    
    realSet.X = X
    realSet.Y = self.get_labels(label_dict, n)
    realSet.n = n
    return realSet
  
  def read_node_file(self, filename):
    """
      the node in the node file start with node 1 not node 0
    """
    re_map = False
    label_dict = {}
    labels = open(filename, "r") 
    line_count = 0
    map_new_old_keys = {}
    for l in labels:
      line_count += 1
      (node_i, label_i) = l.strip().split(",")
      if (line_count) == 1 and (int(node_i) != 1):
        re_map = True 
      label_dict[int(node_i)-1] = label_i 
    # if node not start with 0, there is an id for the node, for example PMID for pubmed data
    # need to map the pubmed id back to a serie of node IDs starting from 0 
    if re_map:
      keys = sorted(list(label_dict.keys()))
      new_node_idx = [i for i in range(len(keys))]
      new_label_dict = {}
      for i in range(len(keys)):
        map_new_old_keys[keys[i]] = new_node_idx[i]
        new_label_dict[new_node_idx[i]] = label_dict[keys[i]]
      label_dict = new_label_dict
        
    return label_dict, map_new_old_keys
  
  def get_n(self, label_dict):
    """
      get the number of nodes: n
      the keys start with 0, so n is max + 1.
    """
    n = max(sorted(list(label_dict.keys())))+1
    return n
  
  def read_edge_file(self, filename, n):
    """
      NOTE: the node in the node file start with node 1 not node 0
    """
    edg_list = []
    edges = open(filename, "r") 
    for l in edges:
      elements = l.strip().split(",")
      if len(elements) > 2:
        (node_i, node_j, w) = elements
        edg_list.append([int(node_i)-1, int(node_j)-1, float(w)])
      else: 
        (node_i, node_j) = elements
        edg_list.append([int(node_i)-1, int(node_j)-1, 1]) 
    edg = np.array(edg_list)
    return edg  

  def read_edge_file_with_remap(self, filename, n, map_new_old_keys):
    """
      for the ids that are remaped from the node file, 
      need to remap id for edge list as well
    """
    edg_list = []
    edges = open(filename, "r") 
    for l in edges:
      elements = l.strip().split(",")
      if len(elements) > 2:
        (node_i, node_j, w) = elements
        new_idx_i = map_new_old_keys[int(node_i)-1]
        new_idx_j = map_new_old_keys[int(node_j)-1]
        edg_list.append([new_idx_i, new_idx_j, float(w)])
      else: 
        (node_i, node_j) = elements
        new_idx_i = map_new_old_keys[int(node_i)-1]
        new_idx_j = map_new_old_keys[int(node_j)-1]        
        edg_list.append([new_idx_i, new_idx_j, 1]) 
    edg = np.array(edg_list)
    return edg  

  def check_label(self, label_dict, n):
    """
      the input label_dict start with key 0
    """
    check = True
    keys = sorted(list(label_dict.keys()))
    unlabeld_node_idx = []
    for node_idx in range(n):
      if node_idx not in keys:
        unlabeld_node_idx.append(node_idx)
    if len(unlabeld_node_idx) > 0:
      print("There are node(s) not labeled")
      check = False
    return check, unlabeld_node_idx

  def get_labels(self, label_dict, n):
    check, unlabeld_node_idx = self.check_label(label_dict, n)
    keys = sorted(list(label_dict.keys()))
    Y = np.zeros((n,1), dtype=int)
    for node_idx in keys:    
      Y[node_idx][0] = int(label_dict[node_idx])
    if not check:
      for idx in unlabeld_node_idx:
        Y[idx][0] = -1

    return Y

  def split_sets(self, test_ratio):

    DataSet = copy.deepcopy(self)
    Y_ori = DataSet.Y
    Y = np.copy(Y_ori)

    t = test_ratio
    Y_1st_dim = Y.shape[0]

    np.random.seed(0)
    indices = np.random.permutation(Y_1st_dim)  #randomly permute the 1st indices

    # Generate indices for splits
    test_ind_split_point = math.floor(Y_1st_dim*t)
    test_idx, train_idx = indices[:test_ind_split_point], indices[test_ind_split_point:]

    
    # get the Y_test label
    Y_test = Y[test_idx]
    # mark the test position as unknown: -1
    Y[test_idx, 0] = -1    


    DataSet.Y = Y
    DataSet.Y_ori = Y_ori
    DataSet.Y_test = Y_test 
    DataSet.test_idx = test_idx
    DataSet.train_idx = train_idx    
    return DataSet 

def edge_list_to_adjacency_matrix(edg_list, n):
  A = np.zeros((n,n))
  for [i, j, w] in edg_list:
    i = int(i)
    j = int(j)
    if A[i,j] != w:
      A[i,j] = w
  return A



### Citessser

In [None]:
edg_file = "citeseer/citeseer.edges"
node_file = "citeseer/citeseer.node_labels"

In [None]:
RlDataSet = RealDataSet(edg_file, node_file)
citeseer = RlDataSet.get_initial_values()
test_case = citeseer.split_sets(0.2)

In [None]:
print(test_case.X)

[[0.000e+00 8.690e+02 1.000e+00]
 [1.000e+00 5.970e+02 1.000e+00]
 [1.000e+00 2.206e+03 1.000e+00]
 ...
 [3.196e+03 3.197e+03 1.000e+00]
 [3.227e+03 3.228e+03 1.000e+00]
 [3.242e+03 3.243e+03 1.000e+00]]


In [None]:
print(test_case.Y)

[[ 2]
 [-1]
 [ 5]
 ...
 [ 3]
 [ 4]
 [ 4]]


In [None]:
print(len(test_case.Y))

3264


In [None]:
test_case.n

3264

In [None]:
Run(test_case, "su", Learner = 0)

acc:  0.6779140830039978
--- embed 0.04220867156982422 seconds ---
--- train 15.417465686798096 seconds ---
--- total 15.463039875030518 seconds ---


(0.6779140830039978,
 15.417465686798096,
 0.04220867156982422,
 15.463039875030518)

In [None]:
Run(test_case, "su", Learner = 1)

acc:  0.6656441717791411
--- embed 0.03812909126281738 seconds ---
--- train 0.00874018669128418 seconds ---
--- total 0.052065372467041016 seconds ---


(0.6656441717791411,
 0.00874018669128418,
 0.03812909126281738,
 0.052065372467041016)

In [None]:
test_case.X = edge_list_to_adjacency_matrix(test_case.X, test_case.n)
Run(test_case, "su", Learner = 0, emb_opt = "Node2Vec")

### Cora

In [None]:
edg_file = "cora/cora.edges"
node_file = "cora/cora.node_labels"

In [None]:
RlDataSet = RealDataSet(edg_file, node_file)
cora = RlDataSet.get_initial_values()
test_case = cora.split_sets(0.2)

In [None]:
print(test_case.X)

[[0.000e+00 8.000e+00 1.000e+00]
 [0.000e+00 4.350e+02 1.000e+00]
 [0.000e+00 5.440e+02 1.000e+00]
 ...
 [2.707e+03 7.740e+02 1.000e+00]
 [2.707e+03 1.389e+03 1.000e+00]
 [2.707e+03 2.344e+03 1.000e+00]]


In [None]:
print(test_case.Y)

[[3]
 [6]
 [5]
 ...
 [2]
 [1]
 [3]]


In [None]:
test_case.n

2708

In [None]:
Run(test_case, "su", Learner = 0)

acc:  0.8207023739814758
--- embed 0.043190717697143555 seconds ---
--- train 18.946336269378662 seconds ---
--- total 18.99651789665222 seconds ---


(0.8207023739814758,
 18.946336269378662,
 0.043190717697143555,
 18.99651789665222)

In [None]:
Run(test_case, "su", Learner = 1)

acc:  0.7929759704251387
--- embed 0.04282498359680176 seconds ---
--- train 0.04377627372741699 seconds ---
--- total 0.09116196632385254 seconds ---


(0.7929759704251387,
 0.04377627372741699,
 0.04282498359680176,
 0.09116196632385254)

### PubMed

In [None]:
edg_file = "PubMed/PubMed.edges"
node_file = "PubMed/PubMed.node_labels"

In [None]:
RlDataSet = RealDataSet(edg_file, node_file)
PbMed = RlDataSet.get_initial_values()
test_case = PbMed.split_sets(0.2)

In [None]:
print(test_case.X)

[[8964 2235    1]
 [8964 5975    1]
 [8964 1603    1]
 ...
 [8953  749    1]
 [8953 2175    1]
 [8953 5033    1]]


In [None]:
print(test_case.Y)

[[ 1]
 [ 1]
 [ 1]
 ...
 [ 2]
 [-1]
 [ 3]]


In [None]:
test_case.n

19717

In [None]:
Run(test_case, "su", Learner = 0)

acc:  0.7707329392433167
--- embed 0.26154255867004395 seconds ---
--- train 82.69898128509521 seconds ---
--- total 82.96822571754456 seconds ---


(0.7707329392433167, 82.69898128509521, 0.26154255867004395, 82.96822571754456)

In [None]:
Run(test_case, "su", Learner = 1)

acc:  0.7674359624651281
--- embed 0.2641258239746094 seconds ---
--- train 0.015927791595458984 seconds ---
--- total 0.2916078567504883 seconds ---


(0.7674359624651281,
 0.015927791595458984,
 0.2641258239746094,
 0.2916078567504883)

## Graph Encoder test case


In [None]:
class Encoder_case:
  def __init__(self, A,Y,n):
    Encoder_case.X = A
    Encoder_case.Y = Y
    Encoder_case.n = n

###Case 1

A = 

\begin{bmatrix}
0 & 1 & 1 & 1 & 0\\
1 & 0 & 1 & 1 & 1\\
1 & 1 & 0 & 1 & 1\\
1 & 1 & 1 & 0 & 1\\
0 & 1 & 1 & 1 & 0
\end{bmatrix}

Labels = [0,0,0,1,1] 


In [None]:
A = np.ones((5,5))
A[0,4] = 0
A[4,0] = 0
np.fill_diagonal(A, 0)

Y = np.array([[0,0,0,1,1]]).reshape((5,1))

print(A)
print(Y)

Encoder_case = Encoder_case(A,Y,5)

[[0. 1. 1. 1. 0.]
 [1. 0. 1. 1. 1.]
 [1. 1. 0. 1. 1.]
 [1. 1. 1. 0. 1.]
 [0. 1. 1. 1. 0.]]
[[0]
 [0]
 [0]
 [1]
 [1]]


#### Laplacian = False, correclation = False, DiagA = False

In [None]:
Dataset = DataPreprocess(Encoder_case, Laplacian = False, DiagA = False)
print(Dataset.X)
print(Dataset.Y)
print(Dataset.n)

[array([[0., 1., 1.],
       [0., 2., 1.],
       [0., 3., 1.],
       [1., 0., 1.],
       [1., 2., 1.],
       [1., 3., 1.],
       [1., 4., 1.],
       [2., 0., 1.],
       [2., 1., 1.],
       [2., 3., 1.],
       [2., 4., 1.],
       [3., 0., 1.],
       [3., 1., 1.],
       [3., 2., 1.],
       [3., 4., 1.],
       [4., 1., 1.],
       [4., 2., 1.],
       [4., 3., 1.]])]
[[0]
 [0]
 [0]
 [1]
 [1]]
5


In [None]:
Z, W = graph_encoder_embed(Dataset.X[0], Dataset.Y, Dataset.n, Correlation = False)
print(Z)
print(W)

[[1.33333333 1.        ]
 [1.33333333 2.        ]
 [1.33333333 2.        ]
 [2.         1.        ]
 [1.33333333 1.        ]]
[[0.33333333 0.        ]
 [0.33333333 0.        ]
 [0.33333333 0.        ]
 [0.         0.5       ]
 [0.         0.5       ]]


#### Laplacian = False, correclation = True, DiagA = False

In [None]:
Dataset = DataPreprocess(Encoder_case, Laplacian = False, DiagA = False)
print(Dataset.X)
print(Dataset.Y)
print(Dataset.n)

[array([[0., 1., 1.],
       [0., 2., 1.],
       [0., 3., 1.],
       [1., 0., 1.],
       [1., 2., 1.],
       [1., 3., 1.],
       [1., 4., 1.],
       [2., 0., 1.],
       [2., 1., 1.],
       [2., 3., 1.],
       [2., 4., 1.],
       [3., 0., 1.],
       [3., 1., 1.],
       [3., 2., 1.],
       [3., 4., 1.],
       [4., 1., 1.],
       [4., 2., 1.],
       [4., 3., 1.]])]
[[0]
 [0]
 [0]
 [1]
 [1]]
5


In [None]:
Z, W = graph_encoder_embed(Dataset.X[0], Dataset.Y, Dataset.n, Correlation = True)
print(Z)
print(W)

[[0.8        0.6       ]
 [0.5547002  0.83205029]
 [0.5547002  0.83205029]
 [0.89442719 0.4472136 ]
 [0.8        0.6       ]]
[[0.33333333 0.        ]
 [0.33333333 0.        ]
 [0.33333333 0.        ]
 [0.         0.5       ]
 [0.         0.5       ]]


#### Laplacian = True, correclation = False, DiagA = False

In [None]:
Dataset = DataPreprocess(Encoder_case, Laplacian = True, DiagA = False)
print(Dataset.X)
print(Dataset.Y)
print(Dataset.n)

[array([[0.        , 1.        , 0.14433757],
       [0.        , 2.        , 0.14433757],
       [0.        , 3.        , 0.14433757],
       [1.        , 0.        , 0.14433757],
       [1.        , 2.        , 0.125     ],
       [1.        , 3.        , 0.125     ],
       [1.        , 4.        , 0.14433757],
       [2.        , 0.        , 0.14433757],
       [2.        , 1.        , 0.125     ],
       [2.        , 3.        , 0.125     ],
       [2.        , 4.        , 0.14433757],
       [3.        , 0.        , 0.14433757],
       [3.        , 1.        , 0.125     ],
       [3.        , 2.        , 0.125     ],
       [3.        , 4.        , 0.14433757],
       [4.        , 1.        , 0.14433757],
       [4.        , 2.        , 0.14433757],
       [4.        , 3.        , 0.14433757]])]
[[0]
 [0]
 [0]
 [1]
 [1]]
5


In [None]:
Z, W = graph_encoder_embed(Dataset.X[0], Dataset.Y, Dataset.n, Correlation = False)
print(Z)
print(W)

[[0.19245009 0.14433757]
 [0.17955838 0.26933757]
 [0.17955838 0.26933757]
 [0.26289171 0.14433757]
 [0.19245009 0.14433757]]
[[0.33333333 0.        ]
 [0.33333333 0.        ]
 [0.33333333 0.        ]
 [0.         0.5       ]
 [0.         0.5       ]]


#### Laplacian = True, correclation = True, DiagA = False

In [None]:
Dataset = DataPreprocess(Encoder_case, Laplacian = True, DiagA = False)
print(Dataset.X)
print(Dataset.Y)
print(Dataset.n)

[array([[0.        , 1.        , 0.14433757],
       [0.        , 2.        , 0.14433757],
       [0.        , 3.        , 0.14433757],
       [1.        , 0.        , 0.14433757],
       [1.        , 2.        , 0.125     ],
       [1.        , 3.        , 0.125     ],
       [1.        , 4.        , 0.14433757],
       [2.        , 0.        , 0.14433757],
       [2.        , 1.        , 0.125     ],
       [2.        , 3.        , 0.125     ],
       [2.        , 4.        , 0.14433757],
       [3.        , 0.        , 0.14433757],
       [3.        , 1.        , 0.125     ],
       [3.        , 2.        , 0.125     ],
       [3.        , 4.        , 0.14433757],
       [4.        , 1.        , 0.14433757],
       [4.        , 2.        , 0.14433757],
       [4.        , 3.        , 0.14433757]])]
[[0]
 [0]
 [0]
 [1]
 [1]]
5


In [None]:
Z, W = graph_encoder_embed(Dataset.X[0], Dataset.Y, Dataset.n, Correlation = False)
print(Z)
print(W)

[[0.19245009 0.14433757]
 [0.17955838 0.26933757]
 [0.17955838 0.26933757]
 [0.26289171 0.14433757]
 [0.19245009 0.14433757]]
[[0.33333333 0.        ]
 [0.33333333 0.        ]
 [0.33333333 0.        ]
 [0.         0.5       ]
 [0.         0.5       ]]


### test encoder_1

In [None]:
A = np.array([
 [0, 0, 1, 0, 0, 0, 0, 0],
 [0, 0, 0, 1, 0, 0, 0, 0],
 [1, 0, 0, 1, 0, 0, 0, 0],
 [0, 1, 1, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0]])
print(A)

[[0 0 1 0 0 0 0 0]
 [0 0 0 1 0 0 0 0]
 [1 0 0 1 0 0 0 0]
 [0 1 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]]


In [None]:
Y = np.array([[1,1,0,1,2,1,1,1]]).reshape((8,1))
print(Y)

[[1]
 [1]
 [0]
 [1]
 [2]
 [1]
 [1]
 [1]]


In [None]:
Encoder_case = Encoder_case(A,Y,8)

####DiagA=false; Correlation=false; Laplacian=false

In [None]:
Dataset = DataPreprocess(Encoder_case, Laplacian = False, DiagA = False)
Z, W = graph_encoder_embed(Dataset.X[0], Dataset.Y, Dataset.n, Correlation = False)
print(Z)
print(W)

[[2.         0.         0.        ]
 [0.         0.33333333 0.        ]
 [0.         0.66666667 0.        ]
 [2.         0.33333333 0.        ]
 [0.         0.         0.        ]
 [0.         0.         0.        ]
 [0.         0.         0.        ]
 [0.         0.         0.        ]]
[[0.         0.16666667 0.        ]
 [0.         0.16666667 0.        ]
 [1.         0.         0.        ]
 [0.         0.16666667 0.        ]
 [0.         0.         1.        ]
 [0.         0.16666667 0.        ]
 [0.         0.16666667 0.        ]
 [0.         0.16666667 0.        ]]


####DiagA=true; Correlation=false; Laplacian=false;

In [None]:
Dataset = DataPreprocess(Encoder_case, Laplacian = False, DiagA = True)
Z, W = graph_encoder_embed(Dataset.X[0], Dataset.Y, Dataset.n, Correlation = False)
print(Z)
print(W)

[[2.         0.16666667 0.        ]
 [0.         0.5        0.        ]
 [1.         0.66666667 0.        ]
 [2.         0.5        0.        ]
 [0.         0.         1.        ]
 [0.         0.16666667 0.        ]
 [0.         0.16666667 0.        ]
 [0.         0.16666667 0.        ]]
[[0.         0.16666667 0.        ]
 [0.         0.16666667 0.        ]
 [1.         0.         0.        ]
 [0.         0.16666667 0.        ]
 [0.         0.         1.        ]
 [0.         0.16666667 0.        ]
 [0.         0.16666667 0.        ]
 [0.         0.16666667 0.        ]]


####DiagA= true; Correlation= true; Laplacian=false

In [None]:
Dataset = DataPreprocess(Encoder_case, Laplacian = False, DiagA = True)
Z, W = graph_encoder_embed(Dataset.X[0], Dataset.Y, Dataset.n, Correlation = True)
print(Z)
print(W)

[[0.99654576 0.08304548 0.        ]
 [0.         1.         0.        ]
 [0.83205029 0.5547002  0.        ]
 [0.9701425  0.24253563 0.        ]
 [0.         0.         1.        ]
 [0.         1.         0.        ]
 [0.         1.         0.        ]
 [0.         1.         0.        ]]
[[0.         0.16666667 0.        ]
 [0.         0.16666667 0.        ]
 [1.         0.         0.        ]
 [0.         0.16666667 0.        ]
 [0.         0.         1.        ]
 [0.         0.16666667 0.        ]
 [0.         0.16666667 0.        ]
 [0.         0.16666667 0.        ]]


####DiagA= true; Correlation= true; Laplacian= true;

In [None]:
Dataset = DataPreprocess(Encoder_case, Laplacian = True, DiagA = True)
Z, W = graph_encoder_embed(Dataset.X[0], Dataset.Y, Dataset.n, Correlation = True)
print(Z)
print(W)

[[0.99426272 0.10696564 0.        ]
 [0.         1.         0.        ]
 [0.79475691 0.60692789 0.        ]
 [0.95822122 0.28602815 0.        ]
 [0.         0.         1.        ]
 [0.         1.         0.        ]
 [0.         1.         0.        ]
 [0.         1.         0.        ]]
[[0.         0.16666667 0.        ]
 [0.         0.16666667 0.        ]
 [1.         0.         0.        ]
 [0.         0.16666667 0.        ]
 [0.         0.         1.        ]
 [0.         0.16666667 0.        ]
 [0.         0.16666667 0.        ]
 [0.         0.16666667 0.        ]]


## Supervised Learning, Clustering, Semi-supervised learning 

In [None]:
n = 3000
case = Case(n)

In [None]:
# get all combinations of different emb settings 

sets_no = 8
L_set = [True, False]
Diag_set = [True, False]
Corre_set = [True, False]
comb = [L_set, Diag_set, Corre_set]
comb_set = []

ele_list = [None, None, None]
for ele1 in comb[0]:
  ele_list[0] = ele1
  for ele2 in comb[1]:
    ele_list[1] = ele2
    for ele3 in comb[2]:
      ele_list[2] = ele3
      comb_set.append(ele_list.copy())

print(comb_set)
print(len(comb_set))

[[True, True, True], [True, True, False], [True, False, True], [True, False, False], [False, True, True], [False, True, False], [False, False, True], [False, False, False]]
8


In [None]:
def average_restuls(case_num, comb_set, learner_no):
  results = []
  for comb in comb_set:
    acc_final, train_time_final, emb_time_final, total_time_final = 0,0,0,0
    for i in range(10):
      test_case = copy.deepcopy(case_num)
      acc, train_time, emb_time, total_time = Run(test_case, "su", Learner = learner_no, Laplacian = comb[0], DiagA = comb[1], Correlation = comb[2])
      acc_final += acc
      train_time_final += train_time
      emb_time_final += emb_time 
      total_time_final += total_time
    
    acc_final /= 10
    train_time_final /= 10
    emb_time_final /= 10
    total_time_final /= 10

    result = comb + [acc_final, train_time_final, emb_time_final, total_time_final]
    results.append(result)
  
  return results

def plot(results):
  df = pd.DataFrame(results,
  index=['set_01','set_02','set_03','set_04','set_05','set_06','set_07','set_08'],
  columns=['Laplacian','DiagA', 'Correlation', 'Accuracy', 'Train_Time(s)', 'Emb_Time(s)', 'Total_Time(s)'])

  df = df.style.format({
    'Emb_Time(s)': '{:0.2f}',
    'Train_Time(s)': '{:0.5f}',
    'Total_Time(s)': '{:0.2f}'
  })

  display(df)

### Supervised

#### GNN

##### case 10

In [None]:
case_10 = case.case_10_fully_known()
case_10.summary()

name:

    SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]]
Y:
(3000, 1)
[[1]
 [0]
 [2]
 ...
 [2]
 [2]
 [2]]


In [None]:
print(case_10.bd)

0.13


In [None]:
acc, train_time, emb_time, total_time = Run(case_10, "su", Learner = 0, Laplacian = False, DiagA = False, Correlation = False)

acc:  0.95333331823349
--- embed 3.70628023147583 seconds ---
--- train 21.000762462615967 seconds ---
--- total 29.746941089630127 seconds ---


In [None]:
print(train_time)
print(emb_time)
print(total_time)

21.000762462615967
3.70628023147583
29.746941089630127


In [None]:
acc, train_time, emb_time, total_time  = Run(case_10, "su", Learner = 0, Laplacian = False, DiagA = True, Correlation = False)

acc:  0.9583333134651184
--- embed 3.509427785873413 seconds ---
--- train 12.733009815216064 seconds ---
--- total 21.42648410797119 seconds ---


In [None]:
acc, train_time, emb_time, total_time = Run(case_10, "su", Learner = 0, Laplacian = False, DiagA = True, Correlation = True)

acc:  0.9516666531562805
--- embed 3.5233070850372314 seconds ---
--- train 11.92277455329895 seconds ---
--- total 23.51697850227356 seconds ---


In [None]:
acc, train_time, emb_time, total_time = Run(case_10, "su", Learner = 0, Laplacian = True, DiagA = True, Correlation = True)

acc:  0.95333331823349
--- embed 3.6451563835144043 seconds ---
--- train 21.002015590667725 seconds ---
--- total 40.55085849761963 seconds ---


In [None]:
results = average_restuls(case_10, comb_set, 0)

acc:  0.95333331823349
--- embed 3.419264078140259 seconds ---
--- train 21.11810326576233 seconds ---
--- total 40.59562110900879 seconds ---
acc:  0.95333331823349
--- embed 3.4034903049468994 seconds ---
--- train 12.623661756515503 seconds ---
--- total 31.16509985923767 seconds ---
acc:  0.9549999833106995
--- embed 3.418227434158325 seconds ---
--- train 12.319038391113281 seconds ---
--- total 30.31583523750305 seconds ---
acc:  0.95333331823349
--- embed 3.3595666885375977 seconds ---
--- train 12.580832242965698 seconds ---
--- total 31.665188550949097 seconds ---
acc:  0.949999988079071
--- embed 3.4427661895751953 seconds ---
--- train 12.615651845932007 seconds ---
--- total 30.693434953689575 seconds ---
acc:  0.9516666531562805
--- embed 3.4687247276306152 seconds ---
--- train 12.653229475021362 seconds ---
--- total 31.406693696975708 seconds ---
acc:  0.9516666531562805
--- embed 3.4163429737091064 seconds ---
--- train 21.00416111946106 seconds ---
--- total 39.331142

In [None]:
plot(results)

Unnamed: 0,Laplacian,DiagA,Correlation,Accuracy,Train_Time(s),Emb_Time(s),Total_Time(s)
set_01,True,True,True,0.952167,16.79191,3.41,35.29
set_02,True,True,False,0.48,14.96333,3.42,33.36
set_03,True,False,True,0.951833,16.69279,3.46,36.04
set_04,True,False,False,0.48,15.7976,3.44,35.08
set_05,False,True,True,0.953,15.7844,3.32,24.09
set_06,False,True,False,0.954833,15.64148,3.31,23.85
set_07,False,False,True,0.953,12.97473,3.45,21.31
set_08,False,False,False,0.954667,16.57532,3.43,24.88


In [None]:
acc, train_time, emb_time, total_time = Run(case_10, "su", Learner = 0, Laplacian = True, DiagA = True, Correlation = False)

acc:  0.47999998927116394
--- embed 3.3850369453430176 seconds ---
--- train 19.963889122009277 seconds ---
--- total 37.73133563995361 seconds ---
[[0.00030246 0.00040755 0.00028513]
 [0.00039492 0.00035844 0.00031217]
 [0.00030466 0.00030033 0.00037867]
 ...
 [0.00026587 0.00030015 0.00041224]
 [0.00033185 0.00027881 0.00032703]
 [0.00032185 0.00036483 0.00036568]]
[[0.00033925 0.00037717 0.00030801]
 [0.00034452 0.00039691 0.00030621]
 [0.00028007 0.00031384 0.00035417]
 ...
 [0.00035683 0.00024665 0.00028031]
 [0.00031754 0.00026905 0.00038784]
 [0.00034029 0.0003661  0.00028912]]
[1 1 2 1 1 0 2 0 2 2 2 1 0 2 2 1 2 2 2 2 1 1 2 2 1 2 1 1 0 1 1 2 1 1 2 0 1
 2 0 2 1 0 2 2 2 0 1 1 1 0 0 2 1 0 0 2 2 1 2 2 2 0 2 2 2 2 2 2 0 1 1 1 0 0
 2 0 0 1 2 1 2 2 2 0 2 2 0 1 1 1 2 0 2 2 1 1 2 2 2 2 1 2 0 2 0 1 2 2 2 0 2
 2 2 1 0 0 1 1 1 2 1 0 0 2 1 2 0 2 0 0 2 2 0 0 0 0 0 1 2 1 2 2 2 1 2 0 0 0
 2 2 2 1 2 1 1 1 2 0 2 2 0 2 1 2 1 2 2 1 2 2 2 0 0 2 2 0 2 2 2 2 2 0 2 1 2
 0 0 0 2 1 2 1 0 1 2 0 2 1 1 2 1 

In [None]:
acc, train_time, emb_time, total_time = Run(case_10, "su", Learner = 1, Laplacian = True, DiagA = True, Correlation = False)

acc:  0.9533333333333334
--- embed 3.450080394744873 seconds ---
--- train 0.035543203353881836 seconds ---
--- total 18.690415382385254 seconds ---
[[0.00030246 0.00040755 0.00028513]
 [0.00039492 0.00035844 0.00031217]
 [0.00030466 0.00030033 0.00037867]
 ...
 [0.00026587 0.00030015 0.00041224]
 [0.00033185 0.00027881 0.00032703]
 [0.00032185 0.00036483 0.00036568]]
[[0.00033925 0.00037717 0.00030801]
 [0.00034452 0.00039691 0.00030621]
 [0.00028007 0.00031384 0.00035417]
 ...
 [0.00035683 0.00024665 0.00028031]
 [0.00031754 0.00026905 0.00038784]
 [0.00034029 0.0003661  0.00028912]]
[1 1 2 1 1 0 2 0 2 2 2 1 0 2 2 1 2 2 2 2 1 1 2 2 1 2 1 1 0 1 1 2 1 1 2 0 1
 2 0 2 1 0 2 2 2 0 1 1 1 0 0 2 1 0 0 2 2 1 2 2 2 0 2 2 2 2 2 2 0 1 1 1 0 0
 2 0 0 1 2 1 2 2 2 0 2 2 0 1 1 1 2 0 2 2 1 1 2 2 2 2 1 2 0 2 0 1 2 2 2 0 2
 2 2 1 0 0 1 1 1 2 1 0 0 2 1 2 0 2 0 0 2 2 0 0 0 0 0 1 2 1 2 2 2 1 2 0 0 0
 2 2 2 1 2 1 1 1 2 0 2 2 0 2 1 2 1 2 2 1 2 2 2 0 0 2 2 0 2 2 2 2 2 0 2 1 2
 0 0 0 2 1 2 1 0 1 2 0 2 1 1 2 1

In [None]:
acc, train_time, emb_time, total_time = Run(case_10, "su", Learner = 0, Laplacian = True, DiagA = True, Correlation = True)

acc:  0.9516666531562805
--- embed 3.454232692718506 seconds ---
--- train 12.05919098854065 seconds ---
--- total 30.275911569595337 seconds ---
[[0.51957154 0.7001005  0.48980068]
 [0.63905609 0.58003098 0.50514491]
 [0.53324723 0.52568135 0.66280201]
 ...
 [0.46232283 0.52192124 0.71683738]
 [0.61117436 0.51350303 0.60231266]
 [0.52882802 0.59944564 0.60083763]]
[[0.57163251 0.63552442 0.51898457]
 [0.56639504 0.65251114 0.50341422]
 [0.50932978 0.57073679 0.64408283]
 ...
 [0.69090353 0.47757942 0.54274323]
 [0.55816818 0.47293868 0.68174577]
 [0.58932617 0.63402448 0.50070713]]
[1 1 2 1 1 0 2 0 2 2 2 1 0 2 2 1 2 2 2 2 1 1 2 2 1 2 1 1 0 1 1 2 1 1 2 0 1
 2 0 2 1 0 2 2 2 0 1 1 1 0 0 2 1 0 0 2 2 1 2 2 2 0 2 2 2 2 2 2 0 1 1 1 0 0
 2 0 0 1 2 1 2 2 2 0 2 2 0 1 1 1 2 0 2 2 1 1 2 2 2 2 1 2 0 2 0 1 2 2 2 0 2
 2 2 1 0 0 1 1 1 2 1 0 0 2 1 2 0 2 0 0 2 2 0 0 0 0 0 1 2 1 2 2 2 1 2 0 0 0
 2 2 2 1 2 1 1 1 2 0 2 2 0 2 1 2 1 2 2 1 2 2 2 0 0 2 2 0 2 2 2 2 2 0 2 1 2
 0 0 0 2 1 2 1 0 1 2 0 2 1 1 2 1 2 

##### case 11

In [None]:
case_11 = case.case_11_fully_known()
case_11.summary()

name:

    SBM with 5 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
3000
d:
<class 'int'>
5
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]]
Y:
(3000, 1)
[[2]
 [0]
 [2]
 ...
 [3]
 [3]
 [2]]


In [None]:
print(case_11.bd)

0.2


In [None]:
Run(case_11, "su", Learner = 0, Laplacian = False, DiagA = False, Correlation = False)

acc:  1.0


In [None]:
Run(case_11, "su", Learner = 0, Laplacian = False, DiagA = True, Correlation = False)

acc:  1.0


In [None]:
Run(case_11, "su", Learner = 0, Laplacian = False, DiagA = True, Correlation = True)

acc:  1.0


In [None]:
Run(case_11, "su", Learner = 0, Laplacian = True, DiagA = True, Correlation = True)

acc:  1.0


In [None]:
results = average_restuls(case_11, comb_set, 0)

acc:  1.0
--- embed 3.597121477127075 seconds ---
--- train 21.016427040100098 seconds ---
--- total 39.95435047149658 seconds ---
acc:  1.0
--- embed 3.617098331451416 seconds ---
--- train 20.986443519592285 seconds ---
--- total 39.91535687446594 seconds ---
acc:  1.0
--- embed 3.751112699508667 seconds ---
--- train 21.053439140319824 seconds ---
--- total 40.036688804626465 seconds ---
acc:  1.0
--- embed 3.536910057067871 seconds ---
--- train 21.01297926902771 seconds ---
--- total 40.42873191833496 seconds ---
acc:  1.0
--- embed 3.733595848083496 seconds ---
--- train 13.186842441558838 seconds ---
--- total 32.31816530227661 seconds ---
acc:  0.9983333349227905
--- embed 3.6090376377105713 seconds ---
--- train 13.274563550949097 seconds ---
--- total 31.815383434295654 seconds ---
acc:  1.0
--- embed 3.5584394931793213 seconds ---
--- train 21.012768030166626 seconds ---
--- total 40.322922468185425 seconds ---
acc:  1.0
--- embed 3.559985876083374 seconds ---
--- train 20.9

In [None]:
plot(results)

Unnamed: 0,Laplacian,DiagA,Correlation,Accuracy,Train_Time(s),Emb_Time(s),Total_Time(s)
set_01,True,True,True,0.999833,17.89955,3.61,36.88
set_02,True,True,False,0.195,15.63108,3.56,34.67
set_03,True,False,True,1.0,17.15214,3.71,37.69
set_04,True,False,False,0.195,17.17114,3.7,37.76
set_05,False,True,True,1.0,17.87204,3.55,26.43
set_06,False,True,False,1.0,18.67732,3.6,27.32
set_07,False,False,True,1.0,17.14811,3.74,25.89
set_08,False,False,False,1.0,18.59717,3.71,27.33


##### case 20

In [None]:
case_20 = case.case_20_fully_known()
case_20.summary()

name:

    DC-SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(3000, 1)
[[1]
 [0]
 [2]
 ...
 [2]
 [2]
 [2]]


In [None]:
print(case_20.bd)

[0.9, 0.5, 0.2]


In [None]:
Run(case_20, "su", Learner = 0, Laplacian = False, DiagA = False, Correlation = False)

acc:  0.8799999952316284


In [None]:
Run(case_20, "su", Learner = 0, Laplacian = False, DiagA = True, Correlation = False)

acc:  0.8949999809265137


In [None]:
Run(case_20, "su", Learner = 0, Laplacian = False, DiagA = True, Correlation = True)

acc:  0.8899999856948853


In [None]:
Run(case_20, "su", Learner = 0, Laplacian = True, DiagA = True, Correlation = True)

acc:  0.8966666460037231


In [None]:
results = average_restuls(case_20, comb_set, 0)

acc:  0.8983333110809326
--- embed 0.2274940013885498 seconds ---
--- train 12.17214846611023 seconds ---
--- total 15.866555452346802 seconds ---
acc:  0.8999999761581421
--- embed 0.22590970993041992 seconds ---
--- train 12.092920541763306 seconds ---
--- total 15.996092319488525 seconds ---
acc:  0.8983333110809326
--- embed 0.23233628273010254 seconds ---
--- train 21.00863552093506 seconds ---
--- total 24.91910743713379 seconds ---
acc:  0.8983333110809326
--- embed 0.22621440887451172 seconds ---
--- train 11.722924947738647 seconds ---
--- total 15.418576955795288 seconds ---
acc:  0.8983333110809326
--- embed 0.22709226608276367 seconds ---
--- train 11.918452501296997 seconds ---
--- total 15.83203911781311 seconds ---
acc:  0.8866666555404663
--- embed 0.2376565933227539 seconds ---
--- train 12.06058382987976 seconds ---
--- total 15.942700147628784 seconds ---
acc:  0.8983333110809326
--- embed 0.23672151565551758 seconds ---
--- train 12.194794178009033 seconds ---
--- t



acc:  0.8949999809265137
--- embed 0.22490143775939941 seconds ---
--- train 11.953272581100464 seconds ---
--- total 15.710629940032959 seconds ---
acc:  0.8966666460037231
--- embed 0.23372340202331543 seconds ---
--- train 20.982940912246704 seconds ---
--- total 24.972780466079712 seconds ---
acc:  0.8949999809265137
--- embed 0.22850799560546875 seconds ---
--- train 21.039648294448853 seconds ---
--- total 25.013603448867798 seconds ---
acc:  0.8916666507720947
--- embed 0.22601842880249023 seconds ---
--- train 21.00651478767395 seconds ---
--- total 24.79054570198059 seconds ---
acc:  0.8949999809265137
--- embed 0.2262735366821289 seconds ---
--- train 12.087382793426514 seconds ---
--- total 16.05409049987793 seconds ---
acc:  0.8949999809265137
--- embed 0.2419745922088623 seconds ---
--- train 12.137262105941772 seconds ---
--- total 16.094321489334106 seconds ---
acc:  0.8983333110809326
--- embed 0.22763991355895996 seconds ---
--- train 20.979283332824707 seconds ---
---

In [None]:
plot(results)

Unnamed: 0,Laplacian,DiagA,Correlation,Accuracy,Train_Time(s),Emb_Time(s),Total_Time(s)
set_01,True,True,True,0.896333,12.91241,0.23,16.71
set_02,True,True,False,0.48,17.50515,0.23,21.3
set_03,True,False,True,0.894667,15.72584,0.23,19.62
set_04,True,False,False,0.48,15.69895,0.23,19.6
set_05,False,True,True,0.902667,15.67053,0.23,18.83
set_06,False,True,False,0.888167,18.38201,0.23,21.57
set_07,False,False,True,0.904667,16.67513,0.23,19.9
set_08,False,False,False,0.8835,17.52914,0.23,20.71


##### case 21

In [None]:
case_21 = case.case_21_fully_known()
case_21.summary()

name:

    DC-SBM with 10 classes and defined probabilities with fully known labels.
    Edge list version. 
    
n:
<class 'int'>
3000
d:
<class 'int'>
10
X:
(60974, 3)
[[   0    3    1]
 [   0  168    1]
 [   0  551    1]
 ...
 [2999 2577    1]
 [2999 2877    1]
 [2999 2951    1]]
Y:
(3000, 1)
[[4]
 [0]
 [5]
 ...
 [6]
 [7]
 [5]]


In [None]:
print(case_21.bd)

0.9


In [None]:
Run(case_21, "su", Learner = 0, Laplacian = False, DiagA = False, Correlation = False)

acc:  0.82833331823349


In [None]:
Run(case_21, "su", Learner = 0, Laplacian = False, DiagA = True, Correlation = False)

acc:  0.8299999833106995


In [None]:
Run(case_21, "su", Learner = 0, Laplacian = False, DiagA = True, Correlation = True)

acc:  0.8366666436195374


In [None]:
Run(case_21, "su", Learner = 0, Laplacian = True, DiagA = True, Correlation = True)

acc:  0.8050000071525574


In [None]:
results = average_restuls(case_21, comb_set, 0)

acc:  0.8116666674613953
--- embed 0.23595499992370605 seconds ---
--- train 13.567328214645386 seconds ---
--- total 14.51444935798645 seconds ---
acc:  0.8083333373069763
--- embed 0.23884010314941406 seconds ---
--- train 21.0231511592865 seconds ---
--- total 21.92582631111145 seconds ---
acc:  0.8100000023841858
--- embed 0.24170565605163574 seconds ---
--- train 12.741525888442993 seconds ---
--- total 13.641494274139404 seconds ---
acc:  0.8100000023841858
--- embed 0.24010276794433594 seconds ---
--- train 13.241767883300781 seconds ---
--- total 14.156140089035034 seconds ---
acc:  0.8083333373069763
--- embed 0.23739051818847656 seconds ---
--- train 21.071983575820923 seconds ---
--- total 21.96921944618225 seconds ---
acc:  0.8083333373069763
--- embed 0.2401423454284668 seconds ---
--- train 21.014440774917603 seconds ---
--- total 21.928312301635742 seconds ---
acc:  0.8083333373069763
--- embed 0.22916007041931152 seconds ---
--- train 21.044551372528076 seconds ---
--- 



acc:  0.8149999976158142
--- embed 0.25891661643981934 seconds ---
--- train 21.061058282852173 seconds ---
--- total 22.04176950454712 seconds ---
acc:  0.8183333277702332
--- embed 0.2427513599395752 seconds ---
--- train 21.48850393295288 seconds ---
--- total 22.424398183822632 seconds ---
acc:  0.8149999976158142
--- embed 0.24790668487548828 seconds ---
--- train 12.67394733428955 seconds ---
--- total 13.605210781097412 seconds ---
acc:  0.8233333230018616
--- embed 0.23498082160949707 seconds ---
--- train 21.07823348045349 seconds ---
--- total 22.027421951293945 seconds ---
acc:  0.8149999976158142
--- embed 0.23657989501953125 seconds ---
--- train 12.787090063095093 seconds ---
--- total 13.766481161117554 seconds ---
acc:  0.8183333277702332
--- embed 0.23082852363586426 seconds ---
--- train 21.037189960479736 seconds ---
--- total 21.972176551818848 seconds ---
acc:  0.8149999976158142
--- embed 0.2336723804473877 seconds ---
--- train 12.981094360351562 seconds ---
--- 

In [None]:
plot(results)

Unnamed: 0,Laplacian,DiagA,Correlation,Accuracy,Train_Time(s),Emb_Time(s),Total_Time(s)
set_01,True,True,True,0.809167,17.93527,0.24,18.85
set_02,True,True,False,0.618,16.06782,0.24,16.97
set_03,True,False,True,0.8185,16.16558,0.24,17.12
set_04,True,False,False,0.476167,16.13896,0.24,17.1
set_05,False,True,True,0.833667,17.0183,0.25,17.27
set_06,False,True,False,0.828833,15.21822,0.25,15.47
set_07,False,False,True,0.835333,16.95359,0.25,17.2
set_08,False,False,False,0.8315,17.76558,0.25,18.02


#### LDA

##### case 10

In [None]:
case_10 = case.case_10_fully_known()
case_10.summary()

name:

    SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]]
Y:
(3000, 1)
[[1]
 [0]
 [2]
 ...
 [2]
 [2]
 [2]]


In [None]:
Run(case_10, "su", Learner = 1, Laplacian = False, DiagA = False, Correlation = False)

acc:  0.9583333333333334


In [None]:
Run(case_10, "su", Learner = 1, Laplacian = False, DiagA = True, Correlation = False)

acc:  0.9566666666666667


In [None]:
Run(case_10, "su", Learner = 1, Laplacian = False, DiagA = True, Correlation = True)

acc:  0.9533333333333334


In [None]:
Run(case_10, "su", Learner = 1, Laplacian = True, DiagA = True, Correlation = True)

acc:  0.9533333333333334
--- embed 5.193510055541992 seconds ---
--- train 0.0058062076568603516 seconds ---
--- total 25.464478015899658 seconds ---


(0.9533333333333334,
 0.0058062076568603516,
 5.193510055541992,
 25.464478015899658)

In [None]:
results = average_restuls(case_10, comb_set, 1)

acc:  0.9533333333333334
--- embed 3.5747530460357666 seconds ---
--- train 0.005490779876708984 seconds ---
--- total 18.323492288589478 seconds ---
acc:  0.9533333333333334
--- embed 3.564149856567383 seconds ---
--- train 0.0036499500274658203 seconds ---
--- total 17.76686668395996 seconds ---
acc:  0.9533333333333334
--- embed 3.568498134613037 seconds ---
--- train 0.0036590099334716797 seconds ---
--- total 17.678396224975586 seconds ---
acc:  0.9533333333333334
--- embed 3.5280163288116455 seconds ---
--- train 0.0034914016723632812 seconds ---
--- total 18.08701181411743 seconds ---
acc:  0.9533333333333334
--- embed 3.5961172580718994 seconds ---
--- train 0.0034427642822265625 seconds ---
--- total 17.7871732711792 seconds ---
acc:  0.9533333333333334
--- embed 3.5459461212158203 seconds ---
--- train 0.005916595458984375 seconds ---
--- total 17.622737646102905 seconds ---
acc:  0.9533333333333334
--- embed 3.6501457691192627 seconds ---
--- train 0.0032329559326171875 seco

In [None]:
plot(results)

Unnamed: 0,Laplacian,DiagA,Correlation,Accuracy,Train_Time(s),Emb_Time(s),Total_Time(s)
set_01,True,True,True,0.953333,0.00392,3.57,17.9
set_02,True,True,False,0.953333,0.00356,3.58,17.87
set_03,True,False,True,0.953333,0.00359,3.66,19.28
set_04,True,False,False,0.953333,0.00368,3.66,19.23
set_05,False,True,True,0.953333,0.00349,3.49,8.09
set_06,False,True,False,0.956667,0.00351,3.47,8.07
set_07,False,False,True,0.953333,0.00369,3.55,8.08
set_08,False,False,False,0.958333,0.00346,3.57,8.12


In [None]:
acc, train_time, emb_time, total_time, Z, W, Z_ori, W_ori = Run(case_10, "su", Learner = 1, Laplacian = True, DiagA = True, Correlation = True, sparce_opt = 'csr')

acc:  0.9533333333333334
--- embed 3.5838661193847656 seconds ---
--- train 0.0035517215728759766 seconds ---
--- total 22.104680061340332 seconds ---
[[0.51957154 0.7001005  0.48980068]
 [0.63905609 0.58003098 0.50514491]
 [0.53324723 0.52568135 0.66280201]
 ...
 [0.46232283 0.52192124 0.71683738]
 [0.61117436 0.51350303 0.60231266]
 [0.52882802 0.59944564 0.60083763]]
[[0.57163251 0.63552442 0.51898457]
 [0.56639504 0.65251114 0.50341422]
 [0.50932978 0.57073679 0.64408283]
 ...
 [0.69090353 0.47757942 0.54274323]
 [0.55816818 0.47293868 0.68174577]
 [0.58932617 0.63402448 0.50070713]]
[1 1 2 1 1 0 2 0 2 2 2 1 0 2 2 1 2 2 2 2 1 1 2 2 1 2 1 1 0 1 1 2 1 1 2 0 1
 2 0 2 1 0 2 2 2 0 1 1 1 0 0 2 1 0 0 2 2 1 2 2 2 0 2 2 2 2 2 2 0 1 1 1 0 0
 2 0 0 1 2 1 2 2 2 0 2 2 0 1 1 1 2 0 2 2 1 1 2 2 2 2 1 2 0 2 0 1 2 2 2 0 2
 2 2 1 0 0 1 1 1 2 1 0 0 2 1 2 0 2 0 0 2 2 0 0 0 0 0 1 2 1 2 2 2 1 2 0 0 0
 2 2 2 1 2 1 1 1 2 0 2 2 0 2 1 2 1 2 2 1 2 2 2 0 0 2 2 0 2 2 2 2 2 0 2 1 2
 0 0 0 2 1 2 1 0 1 2 0 2 1 1 2

In [None]:
print(Z.shape)
print(Z_ori)
print(Z)

(3000, 3)
[[0.51957154 0.7001005  0.48980068]
 [0.63905609 0.58003098 0.50514491]
 [0.53324723 0.52568135 0.66280201]
 ...
 [0.46232283 0.52192124 0.71683738]
 [0.61117436 0.51350303 0.60231266]
 [0.52882802 0.59944564 0.60083763]]
  (0, 0)	0.5195715389378506
  (1, 0)	0.6390560884259104
  (2, 0)	0.5332472292758246
  (3, 0)	0.49725584103460374
  (4, 0)	0.4360105076065667
  (5, 0)	0.517217796861308
  (6, 0)	0.6374676270270658
  (7, 0)	0.45386221727566506
  (8, 0)	0.5916039088151546
  (9, 0)	0.5007366256669146
  (10, 0)	0.4351393827205337
  (11, 0)	0.5508805053320266
  (12, 0)	0.6655977323371083
  (13, 0)	0.4885214728176826
  (14, 0)	0.6748981332651581
  (15, 0)	0.5012390185849971
  (16, 0)	0.5175779379036337
  (17, 0)	0.5431564234339152
  (18, 0)	0.6601132250293184
  (19, 0)	0.7028375919709977
  (20, 0)	0.5196157471330455
  (21, 0)	0.5576492834988768
  (22, 0)	0.6133977145325848
  (23, 0)	0.6723062553102196
  (24, 0)	0.6193111528990982
  :	:
  (2975, 2)	0.49261966697922904
  (2976, 2)	0.

In [None]:
print(W[0].shape)
print(W_ori)
print(W[0])

(3000, 3)
[array([[0.        , 0.00134409, 0.        ],
       [0.00206186, 0.        , 0.        ],
       [0.        , 0.        , 0.00085397],
       ...,
       [0.        , 0.        , 0.00085397],
       [0.        , 0.        , 0.00085397],
       [0.        , 0.        , 0.00085397]])]
  (1, 0)	0.002061855670103093
  (12, 0)	0.002061855670103093
  (19, 0)	0.002061855670103093
  (21, 0)	0.002061855670103093
  (24, 0)	0.002061855670103093
  (27, 0)	0.002061855670103093
  (38, 0)	0.002061855670103093
  (51, 0)	0.002061855670103093
  (55, 0)	0.002061855670103093
  (57, 0)	0.002061855670103093
  (63, 0)	0.002061855670103093
  (67, 0)	0.002061855670103093
  (77, 0)	0.002061855670103093
  (81, 0)	0.002061855670103093
  (89, 0)	0.002061855670103093
  (91, 0)	0.002061855670103093
  (101, 0)	0.002061855670103093
  (123, 0)	0.002061855670103093
  (126, 0)	0.002061855670103093
  (129, 0)	0.002061855670103093
  (130, 0)	0.002061855670103093
  (147, 0)	0.002061855670103093
  (150, 0)	0.00206

In [None]:
acc, train_time, emb_time, total_time, Z, W, Z_ori, W_ori = Run(case_10, "su", Learner = 1, Laplacian = True, DiagA = True, Correlation = True, sparce_opt = 'csc')

acc:  0.9533333333333334
--- embed 4.638524532318115 seconds ---
--- train 0.006206989288330078 seconds ---
--- total 23.42071294784546 seconds ---
[[0.51957154 0.7001005  0.48980068]
 [0.63905609 0.58003098 0.50514491]
 [0.53324723 0.52568135 0.66280201]
 ...
 [0.46232283 0.52192124 0.71683738]
 [0.61117436 0.51350303 0.60231266]
 [0.52882802 0.59944564 0.60083763]]
[[0.57163251 0.63552442 0.51898457]
 [0.56639504 0.65251114 0.50341422]
 [0.50932978 0.57073679 0.64408283]
 ...
 [0.69090353 0.47757942 0.54274323]
 [0.55816818 0.47293868 0.68174577]
 [0.58932617 0.63402448 0.50070713]]
[1 1 2 1 1 0 2 0 2 2 2 1 0 2 2 1 2 2 2 2 1 1 2 2 1 2 1 1 0 1 1 2 1 1 2 0 1
 2 0 2 1 0 2 2 2 0 1 1 1 0 0 2 1 0 0 2 2 1 2 2 2 0 2 2 2 2 2 2 0 1 1 1 0 0
 2 0 0 1 2 1 2 2 2 0 2 2 0 1 1 1 2 0 2 2 1 1 2 2 2 2 1 2 0 2 0 1 2 2 2 0 2
 2 2 1 0 0 1 1 1 2 1 0 0 2 1 2 0 2 0 0 2 2 0 0 0 0 0 1 2 1 2 2 2 1 2 0 0 0
 2 2 2 1 2 1 1 1 2 0 2 2 0 2 1 2 1 2 2 1 2 2 2 0 0 2 2 0 2 2 2 2 2 0 2 1 2
 0 0 0 2 1 2 1 0 1 2 0 2 1 1 2 1 

In [None]:
print(Z.shape)
print(Z_ori)
print(Z)

(3000, 3)
[[0.51957154 0.7001005  0.48980068]
 [0.63905609 0.58003098 0.50514491]
 [0.53324723 0.52568135 0.66280201]
 ...
 [0.46232283 0.52192124 0.71683738]
 [0.61117436 0.51350303 0.60231266]
 [0.52882802 0.59944564 0.60083763]]
  (0, 0)	0.5195715389378506
  (1, 0)	0.6390560884259104
  (2, 0)	0.5332472292758246
  (3, 0)	0.49725584103460374
  (4, 0)	0.4360105076065667
  (5, 0)	0.517217796861308
  (6, 0)	0.6374676270270658
  (7, 0)	0.45386221727566506
  (8, 0)	0.5916039088151546
  (9, 0)	0.5007366256669146
  (10, 0)	0.4351393827205337
  (11, 0)	0.5508805053320266
  (12, 0)	0.6655977323371083
  (13, 0)	0.4885214728176826
  (14, 0)	0.6748981332651581
  (15, 0)	0.5012390185849971
  (16, 0)	0.5175779379036337
  (17, 0)	0.5431564234339152
  (18, 0)	0.6601132250293184
  (19, 0)	0.7028375919709977
  (20, 0)	0.5196157471330455
  (21, 0)	0.5576492834988768
  (22, 0)	0.6133977145325848
  (23, 0)	0.6723062553102196
  (24, 0)	0.6193111528990982
  :	:
  (2975, 2)	0.49261966697922904
  (2976, 2)	0.

In [None]:
print(W[0].shape)
print(W_ori)
print(W[0])

(3000, 3)
[array([[0.        , 0.00134409, 0.        ],
       [0.00206186, 0.        , 0.        ],
       [0.        , 0.        , 0.00085397],
       ...,
       [0.        , 0.        , 0.00085397],
       [0.        , 0.        , 0.00085397],
       [0.        , 0.        , 0.00085397]])]
  (1, 0)	0.002061855670103093
  (12, 0)	0.002061855670103093
  (19, 0)	0.002061855670103093
  (21, 0)	0.002061855670103093
  (24, 0)	0.002061855670103093
  (27, 0)	0.002061855670103093
  (38, 0)	0.002061855670103093
  (51, 0)	0.002061855670103093
  (55, 0)	0.002061855670103093
  (57, 0)	0.002061855670103093
  (63, 0)	0.002061855670103093
  (67, 0)	0.002061855670103093
  (77, 0)	0.002061855670103093
  (81, 0)	0.002061855670103093
  (89, 0)	0.002061855670103093
  (91, 0)	0.002061855670103093
  (101, 0)	0.002061855670103093
  (123, 0)	0.002061855670103093
  (126, 0)	0.002061855670103093
  (129, 0)	0.002061855670103093
  (130, 0)	0.002061855670103093
  (147, 0)	0.002061855670103093
  (150, 0)	0.00206

In [None]:
acc, train_time, emb_time, total_time, Z, W, Z_ori, W_ori = Run(case_10, "su", Learner = 1, Laplacian = True, DiagA = True, Correlation = True, sparce_opt = 'coo')

acc:  0.9533333333333334
--- embed 3.5411553382873535 seconds ---
--- train 0.003939628601074219 seconds ---
--- total 18.111839532852173 seconds ---
[[0.51957154 0.7001005  0.48980068]
 [0.63905609 0.58003098 0.50514491]
 [0.53324723 0.52568135 0.66280201]
 ...
 [0.46232283 0.52192124 0.71683738]
 [0.61117436 0.51350303 0.60231266]
 [0.52882802 0.59944564 0.60083763]]
[[0.57163251 0.63552442 0.51898457]
 [0.56639504 0.65251114 0.50341422]
 [0.50932978 0.57073679 0.64408283]
 ...
 [0.69090353 0.47757942 0.54274323]
 [0.55816818 0.47293868 0.68174577]
 [0.58932617 0.63402448 0.50070713]]
[1 1 2 1 1 0 2 0 2 2 2 1 0 2 2 1 2 2 2 2 1 1 2 2 1 2 1 1 0 1 1 2 1 1 2 0 1
 2 0 2 1 0 2 2 2 0 1 1 1 0 0 2 1 0 0 2 2 1 2 2 2 0 2 2 2 2 2 2 0 1 1 1 0 0
 2 0 0 1 2 1 2 2 2 0 2 2 0 1 1 1 2 0 2 2 1 1 2 2 2 2 1 2 0 2 0 1 2 2 2 0 2
 2 2 1 0 0 1 1 1 2 1 0 0 2 1 2 0 2 0 0 2 2 0 0 0 0 0 1 2 1 2 2 2 1 2 0 0 0
 2 2 2 1 2 1 1 1 2 0 2 2 0 2 1 2 1 2 2 1 2 2 2 0 0 2 2 0 2 2 2 2 2 0 2 1 2
 0 0 0 2 1 2 1 0 1 2 0 2 1 1 2 

In [None]:
print(Z.shape)
print(Z_ori)
print(Z)

(3000, 3)
[[0.51957154 0.7001005  0.48980068]
 [0.63905609 0.58003098 0.50514491]
 [0.53324723 0.52568135 0.66280201]
 ...
 [0.46232283 0.52192124 0.71683738]
 [0.61117436 0.51350303 0.60231266]
 [0.52882802 0.59944564 0.60083763]]
  (0, 0)	0.5195715389378506
  (0, 1)	0.7001005028061394
  (0, 2)	0.4898006756797547
  (1, 0)	0.6390560884259104
  (1, 1)	0.5800309763510874
  (1, 2)	0.5051449121974598
  (2, 0)	0.5332472292758246
  (2, 1)	0.5256813506009793
  (2, 2)	0.6628020142546237
  (3, 0)	0.49725584103460374
  (3, 1)	0.7523076584193281
  (3, 2)	0.43216873514935844
  (4, 0)	0.4360105076065667
  (4, 1)	0.6760287880875464
  (4, 2)	0.5940369642821458
  (5, 0)	0.517217796861308
  (5, 1)	0.7005314852404515
  (5, 2)	0.491672033775302
  (6, 0)	0.6374676270270658
  (6, 1)	0.6155825875146564
  (6, 2)	0.46334987044483145
  (7, 0)	0.45386221727566506
  (7, 1)	0.5183226174209902
  (7, 2)	0.7248108387706896
  (8, 0)	0.5916039088151546
  :	:
  (2991, 2)	0.6019055237428302
  (2992, 0)	0.508884110800889

In [None]:
print(W[0].shape)
print(W_ori)
print(W[0])

(3000, 3)
[array([[0.        , 0.00134409, 0.        ],
       [0.00206186, 0.        , 0.        ],
       [0.        , 0.        , 0.00085397],
       ...,
       [0.        , 0.        , 0.00085397],
       [0.        , 0.        , 0.00085397],
       [0.        , 0.        , 0.00085397]])]
  (0, 1)	0.0013440860215053765
  (1, 0)	0.002061855670103093
  (2, 2)	0.0008539709649871904
  (3, 1)	0.0013440860215053765
  (4, 1)	0.0013440860215053765
  (5, 1)	0.0013440860215053765
  (6, 1)	0.0013440860215053765
  (7, 2)	0.0008539709649871904
  (8, 1)	0.0013440860215053765
  (11, 2)	0.0008539709649871904
  (12, 0)	0.002061855670103093
  (13, 2)	0.0008539709649871904
  (16, 2)	0.0008539709649871904
  (17, 1)	0.0013440860215053765
  (18, 2)	0.0008539709649871904
  (19, 0)	0.002061855670103093
  (20, 2)	0.0008539709649871904
  (21, 0)	0.002061855670103093
  (24, 0)	0.002061855670103093
  (25, 2)	0.0008539709649871904
  (26, 1)	0.0013440860215053765
  (27, 0)	0.002061855670103093
  (28, 1)	0.0013

##### case 11

In [None]:
case_11 = case.case_11_fully_known()
case_11.summary()

name:

    SBM with 5 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
3000
d:
<class 'int'>
5
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]]
Y:
(3000, 1)
[[2]
 [0]
 [2]
 ...
 [3]
 [3]
 [2]]


In [None]:
print(case_11.bd)

0.2


In [None]:
Run(case_11, "su", Learner = 1, Laplacian = False, DiagA = False, Correlation = False)

acc:  1.0


In [None]:
Run(case_11, "su", Learner = 1, Laplacian = False, DiagA = True, Correlation = False)

acc:  1.0


In [None]:
Run(case_11, "su", Learner = 1, Laplacian = False, DiagA = True, Correlation = True)

acc:  1.0


In [None]:
Run(case_11, "su", Learner = 1, Laplacian = True, DiagA = True, Correlation = True)

acc:  1.0


In [None]:
results = average_restuls(case_11, comb_set, 1)

acc:  1.0
--- embed 3.8129494190216064 seconds ---
--- train 0.006302356719970703 seconds ---
--- total 20.552429914474487 seconds ---
acc:  1.0
--- embed 3.821761131286621 seconds ---
--- train 0.004448890686035156 seconds ---
--- total 19.162704467773438 seconds ---
acc:  1.0
--- embed 4.31722617149353 seconds ---
--- train 0.0046977996826171875 seconds ---
--- total 19.075125694274902 seconds ---
acc:  1.0
--- embed 3.827867269515991 seconds ---
--- train 0.0067119598388671875 seconds ---
--- total 19.20094585418701 seconds ---
acc:  1.0
--- embed 3.84987735748291 seconds ---
--- train 0.004353761672973633 seconds ---
--- total 19.247668743133545 seconds ---
acc:  1.0
--- embed 3.8219213485717773 seconds ---
--- train 0.00437164306640625 seconds ---
--- total 19.1132493019104 seconds ---
acc:  1.0
--- embed 3.8054163455963135 seconds ---
--- train 0.004303455352783203 seconds ---
--- total 19.076492071151733 seconds ---
acc:  1.0
--- embed 3.694176435470581 seconds ---
--- train 0.0

In [None]:
plot(results)

Unnamed: 0,Laplacian,DiagA,Correlation,Accuracy,Train_Time(s),Emb_Time(s),Total_Time(s)
set_01,True,True,True,1.0,0.00476,3.86,19.25
set_02,True,True,False,1.0,0.00433,3.81,19.14
set_03,True,False,True,1.0,0.00422,3.93,20.66
set_04,True,False,False,1.0,0.0044,3.91,20.62
set_05,False,True,True,1.0,0.00424,3.81,8.76
set_06,False,True,False,1.0,0.00409,3.78,8.75
set_07,False,False,True,1.0,0.00551,4.16,9.14
set_08,False,False,False,1.0,0.00436,3.94,8.97


##### case 20

In [None]:
case_20 = case.case_20_fully_known()
case_20.summary()

name:

    DC-SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(3000, 1)
[[1]
 [0]
 [2]
 ...
 [2]
 [2]
 [2]]


In [None]:
print(case_20.bd)

[0.9, 0.5, 0.2]


In [None]:
Run(case_20, "su", Learner = 1, Laplacian = False, DiagA = False, Correlation = False)

acc:  0.8033333333333333


In [None]:
Run(case_20, "su", Learner = 1, Laplacian = False, DiagA = True, Correlation = False)

acc:  0.81


In [None]:
Run(case_20, "su", Learner = 1, Laplacian = False, DiagA = True, Correlation = True)

acc:  0.8683333333333333


In [None]:
Run(case_20, "su", Learner = 1, Laplacian = True, DiagA = True, Correlation = True)

acc:  0.8616666666666667


In [None]:
results = average_restuls(case_20, comb_set, 1)

acc:  0.8616666666666667
--- embed 0.25098705291748047 seconds ---
--- train 0.003683328628540039 seconds ---
--- total 3.8276798725128174 seconds ---
acc:  0.8616666666666667
--- embed 0.23109841346740723 seconds ---
--- train 0.00341033935546875 seconds ---
--- total 3.7405240535736084 seconds ---
acc:  0.8616666666666667
--- embed 0.2346956729888916 seconds ---
--- train 0.003314495086669922 seconds ---
--- total 3.7338244915008545 seconds ---
acc:  0.8616666666666667
--- embed 0.2519969940185547 seconds ---
--- train 0.003350973129272461 seconds ---
--- total 3.932577610015869 seconds ---
acc:  0.8616666666666667
--- embed 0.23643922805786133 seconds ---
--- train 0.0036618709564208984 seconds ---
--- total 3.7266390323638916 seconds ---
acc:  0.8616666666666667
--- embed 0.23111629486083984 seconds ---
--- train 0.003446340560913086 seconds ---
--- total 3.907248020172119 seconds ---
acc:  0.8616666666666667
--- embed 0.24237775802612305 seconds ---
--- train 0.003309011459350586 



acc:  0.8783333333333333
--- embed 0.2513918876647949 seconds ---
--- train 0.0037262439727783203 seconds ---
--- total 3.841672897338867 seconds ---




acc:  0.8783333333333333
--- embed 0.24879097938537598 seconds ---
--- train 0.003383159637451172 seconds ---
--- total 3.9952142238616943 seconds ---




acc:  0.8783333333333333
--- embed 0.23460149765014648 seconds ---
--- train 0.0030879974365234375 seconds ---
--- total 3.8033370971679688 seconds ---




acc:  0.8783333333333333
--- embed 0.24135541915893555 seconds ---
--- train 0.003627300262451172 seconds ---
--- total 3.981414318084717 seconds ---




acc:  0.8783333333333333
--- embed 0.23695731163024902 seconds ---
--- train 0.003475666046142578 seconds ---
--- total 3.9886748790740967 seconds ---




acc:  0.8783333333333333
--- embed 0.24387550354003906 seconds ---
--- train 0.0033698081970214844 seconds ---
--- total 3.897451400756836 seconds ---




acc:  0.8783333333333333
--- embed 0.2349700927734375 seconds ---
--- train 0.0031473636627197266 seconds ---
--- total 3.960235118865967 seconds ---




acc:  0.8783333333333333
--- embed 0.2505607604980469 seconds ---
--- train 0.004873991012573242 seconds ---
--- total 3.8042216300964355 seconds ---




acc:  0.8783333333333333
--- embed 0.2467048168182373 seconds ---
--- train 0.003174304962158203 seconds ---
--- total 3.9922757148742676 seconds ---




acc:  0.8783333333333333
--- embed 0.23595142364501953 seconds ---
--- train 0.005025625228881836 seconds ---
--- total 4.026124715805054 seconds ---




acc:  0.87
--- embed 0.23995113372802734 seconds ---
--- train 0.0031998157501220703 seconds ---
--- total 3.768972396850586 seconds ---




acc:  0.87
--- embed 0.23319005966186523 seconds ---
--- train 0.004218578338623047 seconds ---
--- total 4.0052971839904785 seconds ---




acc:  0.87
--- embed 0.23841285705566406 seconds ---
--- train 0.0031232833862304688 seconds ---
--- total 3.8501181602478027 seconds ---




acc:  0.87
--- embed 0.2340550422668457 seconds ---
--- train 0.00396728515625 seconds ---
--- total 3.9421684741973877 seconds ---




acc:  0.87
--- embed 0.2538590431213379 seconds ---
--- train 0.003163576126098633 seconds ---
--- total 3.852815628051758 seconds ---




acc:  0.87
--- embed 0.23768281936645508 seconds ---
--- train 0.0034101009368896484 seconds ---
--- total 3.925396680831909 seconds ---




acc:  0.87
--- embed 0.23560214042663574 seconds ---
--- train 0.0031104087829589844 seconds ---
--- total 3.955116033554077 seconds ---




acc:  0.87
--- embed 0.2387380599975586 seconds ---
--- train 0.0034732818603515625 seconds ---
--- total 3.785287857055664 seconds ---




acc:  0.87
--- embed 0.24123930931091309 seconds ---
--- train 0.0031731128692626953 seconds ---
--- total 4.038876056671143 seconds ---




acc:  0.87
--- embed 0.2584724426269531 seconds ---
--- train 0.0033452510833740234 seconds ---
--- total 3.7856481075286865 seconds ---
acc:  0.8683333333333333
--- embed 0.23628616333007812 seconds ---
--- train 0.0032999515533447266 seconds ---
--- total 3.225003480911255 seconds ---
acc:  0.8683333333333333
--- embed 0.23377132415771484 seconds ---
--- train 0.00323486328125 seconds ---
--- total 3.0738112926483154 seconds ---
acc:  0.8683333333333333
--- embed 0.23107028007507324 seconds ---
--- train 0.0032494068145751953 seconds ---
--- total 3.225822925567627 seconds ---
acc:  0.8683333333333333
--- embed 0.23411989212036133 seconds ---
--- train 0.0032422542572021484 seconds ---
--- total 3.2224957942962646 seconds ---
acc:  0.8683333333333333
--- embed 0.24110102653503418 seconds ---
--- train 0.0032701492309570312 seconds ---
--- total 3.0780293941497803 seconds ---
acc:  0.8683333333333333
--- embed 0.23151779174804688 seconds ---
--- train 0.003389120101928711 seconds ---


In [None]:
plot(results)

Unnamed: 0,Laplacian,DiagA,Correlation,Accuracy,Train_Time(s),Emb_Time(s),Total_Time(s)
set_01,True,True,True,0.861667,0.0034,0.24,3.83
set_02,True,True,False,0.875,0.00365,0.24,3.85
set_03,True,False,True,0.878333,0.00369,0.24,3.93
set_04,True,False,False,0.87,0.00342,0.24,3.89
set_05,False,True,True,0.868333,0.0035,0.24,3.16
set_06,False,True,False,0.81,0.00331,0.24,3.18
set_07,False,False,True,0.88,0.00327,0.24,3.17
set_08,False,False,False,0.803333,0.0033,0.24,3.18


##### case 21

In [None]:
case_21 = case.case_21_fully_known()
case_21.summary()

name:

    DC-SBM with 10 classes and defined probabilities with fully known labels.
    Edge list version. 
    
n:
<class 'int'>
3000
d:
<class 'int'>
10
X:
(60974, 3)
[[   0    3    1]
 [   0  168    1]
 [   0  551    1]
 ...
 [2999 2577    1]
 [2999 2877    1]
 [2999 2951    1]]
Y:
(3000, 1)
[[4]
 [0]
 [5]
 ...
 [6]
 [7]
 [5]]


In [None]:
print(case_21.bd)

0.9


In [None]:
Run(case_21, "su", Learner = 1, Laplacian = False, DiagA = False, Correlation = False)

acc:  0.8383333333333334


In [None]:
Run(case_21, "su", Learner = 1, Laplacian = False, DiagA = True, Correlation = False)

acc:  0.8316666666666667


In [None]:
Run(case_21, "su", Learner = 1, Laplacian = False, DiagA = True, Correlation = True)

acc:  0.83


In [None]:
Run(case_21, "su", Learner = 1, Laplacian = True, DiagA = True, Correlation = True)

acc:  0.8216666666666667


In [None]:
results = average_restuls(case_21, comb_set, 1)

acc:  0.8216666666666667
--- embed 0.32279348373413086 seconds ---
--- train 0.012647628784179688 seconds ---
--- total 1.0624041557312012 seconds ---
acc:  0.8216666666666667
--- embed 0.47625303268432617 seconds ---
--- train 0.011063098907470703 seconds ---
--- total 1.2822730541229248 seconds ---
acc:  0.8216666666666667
--- embed 0.46976280212402344 seconds ---
--- train 0.013624906539916992 seconds ---
--- total 1.9463958740234375 seconds ---
acc:  0.8216666666666667
--- embed 0.5477137565612793 seconds ---
--- train 0.013518333435058594 seconds ---
--- total 2.0885672569274902 seconds ---
acc:  0.8216666666666667
--- embed 0.6068413257598877 seconds ---
--- train 0.014215469360351562 seconds ---
--- total 1.9182448387145996 seconds ---
acc:  0.8216666666666667
--- embed 0.5083954334259033 seconds ---
--- train 0.014401435852050781 seconds ---
--- total 1.8037171363830566 seconds ---
acc:  0.8216666666666667
--- embed 0.5935494899749756 seconds ---
--- train 0.013621807098388672 



acc:  0.8116666666666666
--- embed 0.2333080768585205 seconds ---
--- train 0.008192300796508789 seconds ---
--- total 0.9488792419433594 seconds ---




acc:  0.8116666666666666
--- embed 0.2377326488494873 seconds ---
--- train 0.0056002140045166016 seconds ---
--- total 0.9522075653076172 seconds ---




acc:  0.8116666666666666
--- embed 0.23929643630981445 seconds ---
--- train 0.008352279663085938 seconds ---
--- total 0.9656522274017334 seconds ---




acc:  0.8116666666666666
--- embed 0.23727035522460938 seconds ---
--- train 0.007306098937988281 seconds ---
--- total 0.9923994541168213 seconds ---




acc:  0.8116666666666666
--- embed 0.23441267013549805 seconds ---
--- train 0.005865812301635742 seconds ---
--- total 0.9540493488311768 seconds ---




acc:  0.8116666666666666
--- embed 0.23378658294677734 seconds ---
--- train 0.00877833366394043 seconds ---
--- total 0.9477188587188721 seconds ---




acc:  0.8116666666666666
--- embed 0.23687243461608887 seconds ---
--- train 0.005882978439331055 seconds ---
--- total 0.9971613883972168 seconds ---




acc:  0.8116666666666666
--- embed 0.2317650318145752 seconds ---
--- train 0.007254362106323242 seconds ---
--- total 0.9609098434448242 seconds ---




acc:  0.8116666666666666
--- embed 0.23242735862731934 seconds ---
--- train 0.006545305252075195 seconds ---
--- total 0.9564080238342285 seconds ---




acc:  0.8116666666666666
--- embed 0.2358226776123047 seconds ---
--- train 0.006914377212524414 seconds ---
--- total 0.9805798530578613 seconds ---




acc:  0.8183333333333334
--- embed 0.23067569732666016 seconds ---
--- train 0.005755186080932617 seconds ---
--- total 0.9641084671020508 seconds ---




acc:  0.8183333333333334
--- embed 0.21796107292175293 seconds ---
--- train 0.007372856140136719 seconds ---
--- total 0.9567809104919434 seconds ---




acc:  0.8183333333333334
--- embed 0.2169969081878662 seconds ---
--- train 0.008222818374633789 seconds ---
--- total 0.9605503082275391 seconds ---




acc:  0.8183333333333334
--- embed 0.2193007469177246 seconds ---
--- train 0.006380558013916016 seconds ---
--- total 0.9543807506561279 seconds ---




acc:  0.8183333333333334
--- embed 0.2274487018585205 seconds ---
--- train 0.008347511291503906 seconds ---
--- total 0.9534950256347656 seconds ---




acc:  0.8183333333333334
--- embed 0.22861576080322266 seconds ---
--- train 0.0076177120208740234 seconds ---
--- total 0.9679384231567383 seconds ---




acc:  0.8183333333333334
--- embed 0.22882413864135742 seconds ---
--- train 0.008661985397338867 seconds ---
--- total 0.9839673042297363 seconds ---




acc:  0.8183333333333334
--- embed 0.22600221633911133 seconds ---
--- train 0.008264780044555664 seconds ---
--- total 0.9906904697418213 seconds ---




acc:  0.8183333333333334
--- embed 0.23665904998779297 seconds ---
--- train 0.0062525272369384766 seconds ---
--- total 0.990703821182251 seconds ---




acc:  0.8183333333333334
--- embed 0.23306655883789062 seconds ---
--- train 0.008369207382202148 seconds ---
--- total 0.9781818389892578 seconds ---
acc:  0.83
--- embed 0.2812047004699707 seconds ---
--- train 0.005878448486328125 seconds ---
--- total 0.29306650161743164 seconds ---
acc:  0.83
--- embed 0.2775564193725586 seconds ---
--- train 0.006426811218261719 seconds ---
--- total 0.2889981269836426 seconds ---
acc:  0.83
--- embed 0.29416823387145996 seconds ---
--- train 0.008039236068725586 seconds ---
--- total 0.3073267936706543 seconds ---
acc:  0.83
--- embed 0.2799255847930908 seconds ---
--- train 0.008398056030273438 seconds ---
--- total 0.29305076599121094 seconds ---
acc:  0.83
--- embed 0.2787182331085205 seconds ---
--- train 0.005712270736694336 seconds ---
--- total 0.289111852645874 seconds ---
acc:  0.83
--- embed 0.2833421230316162 seconds ---
--- train 0.005767345428466797 seconds ---
--- total 0.29396510124206543 seconds ---
acc:  0.83
--- embed 0.2837936

In [None]:
plot(results)

Unnamed: 0,Laplacian,DiagA,Correlation,Accuracy,Train_Time(s),Emb_Time(s),Total_Time(s)
set_01,True,True,True,0.821667,0.01524,0.54,1.9
set_02,True,True,False,0.816667,0.014,0.44,1.73
set_03,True,False,True,0.811667,0.00707,0.24,0.97
set_04,True,False,False,0.818333,0.00752,0.23,0.97
set_05,False,True,True,0.83,0.0067,0.28,0.3
set_06,False,True,False,0.831667,0.00759,0.29,0.3
set_07,False,False,True,0.823333,0.00676,0.29,0.3
set_08,False,False,False,0.838333,0.00627,0.28,0.29


###Clustering

#### Case 10

In [None]:
case_10_cluster = case.case_10_cluster()
case_10_cluster.summary()

name:

    SBM with 3 classes for clustering
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]]
Y:
(1, 1)
[[3]]


In [None]:
print(case_10_cluster.bd)

0.13


In [None]:
Run(case_10_cluster, "c")

ARI:  0.8036759717543803


#### Case 11

In [None]:
case_11_cluster = case.case_11_cluster()
case_11_cluster.summary()

name:

    SBM with 5 classes for clustering
    
n:
<class 'int'>
3000
d:
<class 'int'>
5
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]]
Y:
(1, 1)
[[5]]


In [None]:
print(case_11_cluster.bd)

0.2


In [None]:
Run(case_11_cluster, "c")

ARI:  1.0


#### case 20

In [None]:
case_20_cluster = case.case_20_cluster()
case_20_cluster.summary()

name:

    DC-SBM with 3 classes for clustering
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(1, 1)
[[3]]


In [None]:
print(case_20_cluster.bd)

[0.9, 0.5, 0.2]


In [None]:
Run(case_20_cluster, "c")

ARI:  0.6710219123774864


#### Case 21

In [None]:
case_21_cluster = case.case_21_cluster()
case_21_cluster.summary()

name:

    DC-SBM with 10 classes for clustering.
    Edge list version. 
    
n:
<class 'int'>
3000
d:
<class 'int'>
10
X:
(30487, 3)
[[   0    3    1]
 [   0  168    1]
 [   0  551    1]
 ...
 [2952 2993    1]
 [2975 2980    1]
 [2983 2987    1]]
Y:
(1, 1)
[[10]]


In [None]:
print(case_21_cluster.bd)

0.9


In [None]:
Run(case_21_cluster, "c")

ARI:  0.43355806469613173


### Semi-GNN-learner 0

#### case 10

In [None]:
case_10 = case.case_10()
case_10.summary()

name:

    SBM with 3 classes and defined probabilities with 95% unknown labels.
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]]
Y:
(3000, 1)
[[-1]
 [-1]
 [-1]
 ...
 [-1]
 [-1]
 [-1]]


In [None]:
print(case_10.bd)

0.13


In [None]:
Run(case_10, "se", Learner = 0, LearnerIter = 0)

acc:  0.5623025894165039


#### case 11

In [None]:
case_11 = case.case_11()
case_11.summary()

name:

    SBM with 5 classes and defined probabilities with 95% unknown labels.  
    
n:
<class 'int'>
3000
d:
<class 'int'>
5
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]]
Y:
(3000, 1)
[[-1]
 [-1]
 [-1]
 ...
 [-1]
 [-1]
 [-1]]


In [None]:
print(case_11.bd)

0.2


In [None]:
Run(case_11, "se", Learner = 0, LearnerIter = 0)

acc:  0.6410256624221802


#### case 20

In [None]:
case_20 = case.case_20()
case_20.summary()

name:

    DC-SBM with 3 classes and defined probabilities with 95% unknown labels.
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(3000, 1)
[[-1]
 [-1]
 [-1]
 ...
 [-1]
 [-1]
 [-1]]


In [None]:
print(case_20.bd)

[0.9, 0.5, 0.2]


In [None]:
Run(case_20, "se", Learner = 0, LearnerIter = 0)

acc:  0.6356616616249084


#### case 21

In [None]:
case_21 = case.case_21()
case_21.summary()

name:

    DC-SBM with 10 classes and defined probabilities with 95% unknown labels.
    Edge list version.     
    
n:
<class 'int'>
3000
d:
<class 'int'>
10
X:
(30487, 3)
[[   0    3    1]
 [   0  168    1]
 [   0  551    1]
 ...
 [2952 2993    1]
 [2975 2980    1]
 [2983 2987    1]]
Y:
(3000, 1)
[[-1]
 [-1]
 [-1]
 ...
 [-1]
 [-1]
 [ 5]]


In [None]:
print(case_21.bd)

0.9


In [None]:
Run(case_21, "se", Learner = 0, LearnerIter = 0)

acc:  0.33778560161590576


### Semi-LDA-learner 1

#### case 10

In [None]:
case_10 = case.case_10()
case_10.summary()

name:

    SBM with 3 classes and defined probabilities with 95% unknown labels.
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]]
Y:
(3000, 1)
[[-1]
 [-1]
 [-1]
 ...
 [-1]
 [-1]
 [-1]]


In [None]:
print(case_10.bd)

0.13


In [None]:
Run(case_10, "se", Learner = 1, LearnerIter = 10)

acc:  0.763


#### case 11

In [None]:
case_11 = case.case_11()
case_11.summary()

name:

    SBM with 5 classes and defined probabilities with 95% unknown labels.  
    
n:
<class 'int'>
3000
d:
<class 'int'>
5
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]]
Y:
(3000, 1)
[[-1]
 [-1]
 [-1]
 ...
 [-1]
 [-1]
 [-1]]


In [None]:
print(case_11.bd)

0.2


In [None]:
Run(case_11, "se", Learner = 1, LearnerIter = 10)

acc:  1.0


#### case 20

In [None]:
case_20 = case.case_20()
case_20.summary()

name:

    DC-SBM with 3 classes and defined probabilities with 95% unknown labels.
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(3000, 1)
[[-1]
 [-1]
 [-1]
 ...
 [-1]
 [-1]
 [-1]]


In [None]:
print(case_20.bd)

[0.9, 0.5, 0.2]


In [None]:
Run(case_20, "se", Learner = 1, LearnerIter = 10)

acc:  0.9073333333333333


#### case 21

In [None]:
case_21 = case.case_21()
case_21.summary()

name:

    DC-SBM with 10 classes and defined probabilities with 95% unknown labels.
    Edge list version.     
    
n:
<class 'int'>
3000
d:
<class 'int'>
10
X:
(30487, 3)
[[   0    3    1]
 [   0  168    1]
 [   0  551    1]
 ...
 [2952 2993    1]
 [2975 2980    1]
 [2983 2987    1]]
Y:
(3000, 1)
[[-1]
 [-1]
 [-1]
 ...
 [-1]
 [-1]
 [ 5]]


In [None]:
print(case_21.bd)

0.9


In [None]:
Run(case_21, "se", Learner = 1, LearnerIter = 10)

acc:  0.8416666666666667


### Semi-GNN-learner 2 - update using y_temp

#### case 10

In [None]:
case_10 = case.case_10()
case_10.summary()

name:

    SBM with 3 classes and defined probabilities with 95% unknown labels.
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]]
Y:
(3000, 1)
[[-1]
 [-1]
 [-1]
 ...
 [-1]
 [-1]
 [-1]]


In [None]:
print(case_10.bd)

0.13


In [None]:
Run(case_10, "se", Learner = 2, LearnerIter = 10)

acc:  0.7223333333333334


#### case 11

In [None]:
case_11 = case.case_11()
case_11.summary()

name:

    SBM with 5 classes and defined probabilities with 95% unknown labels.  
    
n:
<class 'int'>
3000
d:
<class 'int'>
5
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]]
Y:
(3000, 1)
[[-1]
 [-1]
 [-1]
 ...
 [-1]
 [-1]
 [-1]]


In [None]:
print(case_11.bd)

0.2


In [None]:
Run(case_11, "se", Learner = 2, LearnerIter = 10)

acc:  1.0


#### Case 20

In [None]:
case_20 = case.case_20()
case_20.summary()

name:

    DC-SBM with 3 classes and defined probabilities with 95% unknown labels.
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(3000, 1)
[[-1]
 [-1]
 [-1]
 ...
 [-1]
 [-1]
 [-1]]


In [None]:
print(case_20.bd)

[0.9, 0.5, 0.2]


In [None]:
Run(case_20, "se", Learner = 2, LearnerIter = 10)

acc:  0.9053333333333333


#### Case 21

In [None]:
case_21 = case.case_21()
case_21.summary()

name:

    DC-SBM with 10 classes and defined probabilities with 95% unknown labels.
    Edge list version.     
    
n:
<class 'int'>
3000
d:
<class 'int'>
10
X:
(30487, 3)
[[   0    3    1]
 [   0  168    1]
 [   0  551    1]
 ...
 [2952 2993    1]
 [2975 2980    1]
 [2983 2987    1]]
Y:
(3000, 1)
[[-1]
 [-1]
 [-1]
 ...
 [-1]
 [-1]
 [ 5]]


In [None]:
print(case_21.bd)

0.9


In [None]:
Run(case_21, "se", Learner = 2, LearnerIter = 10)

acc:  0.83


### Semi-GNN-learner 2 - update using y_temp_one_hot

#### case 10

In [None]:
case_10 = case.case_10()
case_10.summary()

name:

    SBM with 3 classes and defined probabilities with 95% unknown labels.
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]]
Y:
(3000, 1)
[[-1]
 [-1]
 [-1]
 ...
 [-1]
 [-1]
 [-1]]


In [None]:
print(case_10.bd)

0.13


In [None]:
Run(case_10, "se", Learner = 2, LearnerIter = 10)

acc:  0.5123333333333333


#### case 11

In [None]:
case_11 = case.case_11()
case_11.summary()

name:

    SBM with 5 classes and defined probabilities with 95% unknown labels.  
    
n:
<class 'int'>
3000
d:
<class 'int'>
5
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]]
Y:
(3000, 1)
[[-1]
 [-1]
 [-1]
 ...
 [-1]
 [-1]
 [-1]]


In [None]:
print(case_11.bd)

0.2


In [None]:
Run(case_11, "se", Learner = 2, LearnerIter = 10)

acc:  0.9656666666666667


#### Case 20

In [None]:
case_20 = case.case_20()
case_20.summary()

name:

    DC-SBM with 3 classes and defined probabilities with 95% unknown labels.
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(3000, 1)
[[-1]
 [-1]
 [-1]
 ...
 [-1]
 [-1]
 [-1]]


In [None]:
print(case_20.bd)

[0.9, 0.5, 0.2]


In [None]:
Run(case_20, "se", Learner = 2, LearnerIter = 10)

acc:  0.907


#### Case 21

In [None]:
case_21 = case.case_21()
case_21.summary()

name:

    DC-SBM with 10 classes and defined probabilities with 95% unknown labels.
    Edge list version.     
    
n:
<class 'int'>
3000
d:
<class 'int'>
10
X:
(30487, 3)
[[   0    3    1]
 [   0  168    1]
 [   0  551    1]
 ...
 [2952 2993    1]
 [2975 2980    1]
 [2983 2987    1]]
Y:
(3000, 1)
[[-1]
 [-1]
 [-1]
 ...
 [-1]
 [-1]
 [ 5]]


In [None]:
print(case_21.bd)

0.9


In [None]:
Run(case_21, "se", Learner = 2, LearnerIter = 10)

acc:  0.854


## Node2Vec vs AEE

In [None]:
n = 2000
case = Case(n)

### Node2Vec - Supervised

#### GNN

##### case 10

In [None]:
case_10 = case.case_10_fully_known()
case_10.summary()

name:

    SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
2000
d:
<class 'int'>
3
X:
(2000, 2000)
[[0 0 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 [0 1 0 ... 0 0 1]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]]
Y:
(2000, 1)
[[ 1]
 [-1]
 [ 2]
 ...
 [ 2]
 [ 2]
 [-1]]


In [None]:
print(case_10.bd)

0.13


In [None]:
Run(case_10, "su", Learner = 0, emb_opt = "Node2Vec")

Computing transition probabilities:   0%|          | 0/2000 [00:00<?, ?it/s]

acc:  0.39500001072883606
--- 1779.9130690097809 seconds ---


##### Case 20

In [None]:
case_20 = case.case_20_fully_known()
case_20.summary()

name:

    DC-SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
2000
d:
<class 'int'>
3
X:
(2000, 2000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(2000, 1)
[[ 1]
 [-1]
 [ 2]
 ...
 [ 2]
 [ 2]
 [-1]]


In [None]:
print(case_20.bd)

[0.9, 0.5, 0.2]


In [None]:
Run(case_20, "su", Learner = 0, emb_opt = "Node2Vec")

Computing transition probabilities:   0%|          | 0/2000 [00:00<?, ?it/s]

acc:  0.4124999940395355
--- 1042.857544183731 seconds ---


#### LDA

##### case 10

In [None]:
case_10 = case.case_10_fully_known()
case_10.summary()

name:

    SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
2000
d:
<class 'int'>
3
X:
(2000, 2000)
[[0 0 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 [0 1 0 ... 0 0 1]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]]
Y:
(2000, 1)
[[ 1]
 [-1]
 [ 2]
 ...
 [ 2]
 [ 2]
 [-1]]


In [None]:
print(case_10.bd)

0.13


In [None]:
Run(case_10, "su", Learner = 1, emb_opt = "Node2Vec")

Computing transition probabilities:   0%|          | 0/2000 [00:00<?, ?it/s]

acc:  0.43
--- 1791.2492997646332 seconds ---


##### Case 20

In [None]:
case_20 = case.case_20_fully_known()
case_20.summary()

name:

    DC-SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
2000
d:
<class 'int'>
3
X:
(2000, 2000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(2000, 1)
[[ 1]
 [-1]
 [ 2]
 ...
 [ 2]
 [ 2]
 [-1]]


In [None]:
print(case_20.bd)

[0.9, 0.5, 0.2]


In [None]:
Run(case_20, "su", Learner = 1, emb_opt = "Node2Vec")

Computing transition probabilities:   0%|          | 0/2000 [00:00<?, ?it/s]

acc:  0.4475
--- 1176.5556297302246 seconds ---


### AEE - Supervised

#### GNN

##### case 10

In [None]:
case_10 = case.case_10_fully_known()
case_10.summary()

name:

    SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
2000
d:
<class 'int'>
3
X:
(2000, 2000)
[[0 0 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 [0 1 0 ... 0 0 1]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]]
Y:
(2000, 1)
[[ 1]
 [-1]
 [ 2]
 ...
 [ 2]
 [ 2]
 [-1]]


In [None]:
print(case_10.bd)

0.13


In [None]:
Run(case_10, "su", Learner = 0, emb_opt = "AEE")

acc:  0.8899999856948853
--- 26.105212450027466 seconds ---


##### Case 20

In [None]:
case_20 = case.case_20_fully_known()
case_20.summary()

name:

    DC-SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
2000
d:
<class 'int'>
3
X:
(2000, 2000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(2000, 1)
[[ 1]
 [-1]
 [ 2]
 ...
 [ 2]
 [ 2]
 [-1]]


In [None]:
print(case_20.bd)

[0.9, 0.5, 0.2]


In [None]:
Run(case_20, "su", Learner = 0, emb_opt = "AEE")

acc:  0.8274999856948853
--- 9.403448343276978 seconds ---


#### LDA

##### case 10

In [None]:
case_10 = case.case_10_fully_known()
case_10.summary()

name:

    SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
2000
d:
<class 'int'>
3
X:
(2000, 2000)
[[0 0 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 [0 1 0 ... 0 0 1]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]]
Y:
(2000, 1)
[[ 1]
 [-1]
 [ 2]
 ...
 [ 2]
 [ 2]
 [-1]]


In [None]:
print(case_10.bd)

0.13


In [None]:
Run(case_10, "su", Learner = 1, emb_opt = "AEE")

acc:  0.8825
--- 3.7893357276916504 seconds ---


##### Case 20

In [None]:
case_20 = case.case_20_fully_known()
case_20.summary()

name:

    DC-SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
2000
d:
<class 'int'>
3
X:
(2000, 2000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(2000, 1)
[[ 1]
 [-1]
 [ 2]
 ...
 [ 2]
 [ 2]
 [-1]]


In [None]:
print(case_20.bd)

[0.9, 0.5, 0.2]


In [None]:
Run(case_20, "su", Learner = 1, emb_opt = "AEE")

acc:  0.8275
--- 1.3883047103881836 seconds ---


### Encoder old method AEE + Node2Vec

In [None]:
class EncoderEmbedding:
  def AEE(self,dataset):
    aee = copy.deepcopy(self)

    X = dataset.X
    Y = dataset.Y
    test_idx = dataset.test_idx
    train_idx = dataset.train_idx

    # Partition the data
    X_test, X_train = X[test_idx,:][:,train_idx], X[train_idx,:][:,train_idx]
    Y_train = Y[train_idx]

    Y_test = dataset.Y_test 
    k = dataset.d

    #nk,w,Z
    nk = np.zeros((1,k))
    for i in range(0,len(Y_train)):
        nk[0,int(Y_train[i])]=nk[0,int(Y_train[i])]+1
    w = np.zeros((int(np.size(Y_train)),k))
    for i in range(0,int(np.size(Y_train))):
        k_i=int(Y_train[i])
        # w[i][k_i]=1/nk[0,k_i]*2
        w[i][k_i]=1/nk[0,k_i]
    
    aee.z_train= np.matmul(X_train,w)
    aee.z_test = np.matmul(X_test,w)
    aee.y_train = Y_train.ravel() 
    aee.y_test = Y_test.ravel() 
    aee.k = k
    aee.nk = nk
    aee.w = w

    
    return aee

  def NodeToVec(self,dataset):
    n2v = copy.deepcopy(self)

    X = dataset.X
    Y = dataset.Y
    test_idx = dataset.test_idx
    train_idx = dataset.train_idx

    # Partition the data
    X_test, X_train = X[test_idx,:][:,train_idx], X[train_idx,:][:,train_idx]
    Y_train = Y[train_idx]

    Y_test = dataset.Y_test
    k = dataset.d

    G = nx.from_numpy_matrix(X)
    # use default setting from https://github.com/eliorc/node2vec
    node2vec = Node2Vec(G, dimensions=64, walk_length=30, num_walks=200, workers=4)
    # Embed nodes, use default setting from https://github.com/eliorc/node2vec
    model = node2vec.fit(window=10, min_count=1, batch_words=4)
    # get embedding matrix
    Z = model.wv.vectors
    
    n2v.z_train= Z[train_idx]
    n2v.z_test = Z[test_idx]
    n2v.y_train = Y_train.ravel()
    n2v.y_test = Y_test.ravel() 
    n2v.k = k

    
    return n2v

class Hyperperameters:
  """
    define perameters for GNN.
    default values are for GNN learning -- "Leaner" ==2:
      embedding via partial label, then learn unknown label via two-layer NN

  """
  def __init__(self):
    # there is no scaled conjugate gradiant in keras optimiser, use defualt instead
    # use whatever default
    self.learning_rate = 0.01  # Initial learning rate.
    self.epochs = 100 #Number of epochs to train.
    self.hidden = 20 #Number of units in hidden layer 
    self.val_split = 0.1 #Split 10% of training data for validation
    self.loss = 'categorical_crossentropy' # loss function

class GNN:
  def __init__(self, DataSets):
    GNN.DataSets = DataSets
    GNN.hyperM = Hyperperameters()
    GNN.model = self.GNN_model()  #model summary: GNN.model.summary()
      
 
  def GNN_model(self):
    """
      build GNN model
    """
    hyperM = self.hyperM
    DataSets = self.DataSets

    z_train = DataSets.z_train
    k = DataSets.k

    feature_num = z_train.shape[1]
    
    model = keras.Sequential([
    keras.layers.Flatten(input_shape = (feature_num,)),  # input layer 
    keras.layers.Dense(hyperM.hidden, activation='relu'),  # hidden layer -- no tansig activation function in Keras, use relu instead
    keras.layers.Dense(k, activation='softmax') # output layer, matlab used softmax for patternnet default ??? max(opts.neuron,K)? opts 
    ])

    optimizer = keras.optimizers.Adam(learning_rate = hyperM.learning_rate)

    model.compile(optimizer='adam',
                  loss=hyperM.loss,
                  metrics=['accuracy'])

    return model
    
  def GNN_run(self):
    """
      Train and test directly.
      Do not learn from the unknown labels.
    """
    gnn = copy.deepcopy(self)
    hyperM = gnn.hyperM
    DataSets = self.DataSets
    k = DataSets.k
    z_train = DataSets.z_train
    y_train = DataSets.y_train
    y_test = DataSets.y_test
    z_test = DataSets.z_test
    model = gnn.model    

    y_train_one_hot = to_categorical(y_train) 
    history = model.fit(z_train, y_train_one_hot, 
          epochs=hyperM.epochs, 
          validation_split=hyperM.val_split,  
          verbose=0)
    
    y_test_one_hot = to_categorical(y_test) 
    # set verbose to 0 to silent the output
    test_loss, test_acc = gnn.model.evaluate(z_test,  y_test_one_hot, verbose=0) 
    return test_acc


#### case 10

In [None]:
case_10 = case.case_10_fully_known()
case_10.summary()

name:

    SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
2000
d:
<class 'int'>
3
X:
(2000, 2000)
[[0 0 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 [0 1 0 ... 0 0 1]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]]
Y:
(2000, 1)
[[ 1]
 [-1]
 [ 2]
 ...
 [ 2]
 [ 2]
 [-1]]


In [None]:
print(case_10.bd)

0.13


##### LDA

###### AEE

In [None]:
begin = time.time()
Encod = EncoderEmbedding()
aee = Encod.AEE(case_10)
clf = LinearDiscriminantAnalysis()
clf.fit(aee.z_train, aee.y_train)
acc = clf.score(aee.z_test, aee.y_test)
end = time.time()
print(acc)
print("--- %s seconds ---" % (end - begin))

0.9075
--- 0.10760641098022461 seconds ---


###### Node2Vec

In [None]:
begin = time.time()
Encod = EncoderEmbedding()
n2v = Encod.NodeToVec(case_10)
clf = LinearDiscriminantAnalysis()
clf.fit(n2v.z_train, n2v.Y_train)
acc = clf.score(n2v.z_test, n2v.Y_test)
end = time.time()
print(acc)
print("--- %s seconds ---" % (end - begin))

Computing transition probabilities:   0%|          | 0/2000 [00:00<?, ?it/s]

0.435
--- 2213.2506487369537 seconds ---


##### GNN

###### AEE

In [None]:
begin = time.time()
Encod = EncoderEmbedding()
aee = Encod.AEE(case_10)
gnn = GNN(aee)
acc = gnn.GNN_run()
end = time.time()
print(acc)
print("--- %s seconds ---" % (end - begin))

0.9075
--- 21.70582938194275 seconds ---


###### Node2Vec

In [None]:
begin = time.time()
Encod = EncoderEmbedding()
n2v = Encod.NodeToVec(case_10)
gnn = GNN(n2v)
acc = gnn.GNN_run()
end = time.time()
print(acc)
print("--- %s seconds ---" % (end - begin))

Computing transition probabilities:   0%|          | 0/2000 [00:00<?, ?it/s]

0.3824999928474426
--- 2131.48596739769 seconds ---


#### case 20

In [None]:
case_20 = case.case_20_fully_known()
case_20.summary()

name:

    DC-SBM with 3 classes and defined probabilities with fully known labels
    80% for training and 20% for testing
    
n:
<class 'int'>
2000
d:
<class 'int'>
3
X:
(2000, 2000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(2000, 1)
[[ 1]
 [-1]
 [ 2]
 ...
 [ 2]
 [ 2]
 [-1]]


#####LDA

###### AEE

In [None]:
begin = time.time()
Encod = EncoderEmbedding()
aee = Encod.AEE(case_20)
clf = LinearDiscriminantAnalysis()
clf.fit(aee.z_train, aee.y_train)
acc = clf.score(aee.z_test, aee.y_test)
end = time.time()
print(acc)
print("--- %s seconds ---" % (end - begin))

0.8
--- 0.21722698211669922 seconds ---


###### Node2Vec

In [None]:
begin = time.time()
Encod = EncoderEmbedding()
n2v = Encod.NodeToVec(case_20)
clf = LinearDiscriminantAnalysis()
clf.fit(n2v.z_train, n2v.y_train)
acc = clf.score(n2v.z_test, n2v.y_test)
end = time.time()
print(acc)
print("--- %s seconds ---" % (end - begin))

Computing transition probabilities:   0%|          | 0/2000 [00:00<?, ?it/s]

0.435
--- 1268.2662296295166 seconds ---


##### GNN

###### AEE

In [None]:
begin = time.time()
Encod = EncoderEmbedding()
aee = Encod.AEE(case_20)
gnn = GNN(aee)
acc = gnn.GNN_run()
end = time.time()
print(acc)
print("--- %s seconds ---" % (end - begin))

0.8299999833106995
--- 9.782210350036621 seconds ---


###### Node2Vec

In [None]:
begin = time.time()
Encod = EncoderEmbedding()
n2v = Encod.NodeToVec(case_20)
gnn = GNN(n2v)
acc = gnn.GNN_run()
end = time.time()
print(acc)
print("--- %s seconds ---" % (end - begin))

Computing transition probabilities:   0%|          | 0/2000 [00:00<?, ?it/s]

0.4025000035762787
--- 1263.1786060333252 seconds ---


# Others

### Semi

**acc 0.912**

In [None]:
case_20 = case.case_20()
case_20.summary()

name:

    DC-SBM with 3 classes and defined probabilities with 95% unknown labels
    The unkown labels maybe less than 95% in order to make sure that 
    at least there is one node belongs to one label     
    
n:
<class 'int'>
3000
d:
<class 'int'>
3
X:
(3000, 3000)
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Y:
(3000, 1)
[[-1]
 [-1]
 [-1]
 ...
 [-1]
 [-1]
 [-1]]


In [None]:
print(case_20.bd)

[0.9, 0.5, 0.2]


In [None]:
Dataset = DataPreprocess(case_20.X, case_20.Y)
Dataset = Dataset.semi_supervise_preprocess()
kwargs_for_gnn =  {'Learner': 2, 'LearnerIter': 10}
gnn = GNN(Dataset, **kwargs_for_gnn)
Results = gnn.GNN_complete()

results = metrics.accuracy_score(case_20.Y_ori, Results.Y)
print(results)

0.912


### case 21
DC-SBM, 10 classes

#### Semi

## Semi-LDA-Learner 1

In [None]:
case_21 = case.case_21()
case_21.summary()

name:

    DC-SBM with 10 classes and defined probabilities with 95% unknown labels
    The unkown labels maybe less than 95% in order to make sure that 
    at least there is one node belongs to one label       
    
n:
<class 'int'>
3000
d:
<class 'int'>
10
X:
(30487, 3)
[[   0    3    1]
 [   0  168    1]
 [   0  551    1]
 ...
 [2952 2993    1]
 [2975 2980    1]
 [2983 2987    1]]
Y:
(3000, 1)
[[-1]
 [-1]
 [-1]
 ...
 [-1]
 [-1]
 [-1]]
