# Stocatistic block model and Graph neural network

V 4.2.1 is the version with only the simplest model Mw

V 4.2 adds a new type of GNN

V 4.1 is the version for the condivision with other people

## Initialization of the program

### Import libraries

In [1]:
# Importing module
import matplotlib.pyplot as plt
import numpy as np
import math
import scipy.special

from sklearn.cluster import KMeans
from tqdm.notebook import trange
from itertools import permutations

### Functions

In [2]:
# Show the adiacency matrix
def show(A):
    f = plt.figure()
    f.set_figwidth(7.5)
    f.set_figheight(7.5)
    plt.imshow(A, cmap='Greys', interpolation='nearest')
    plt.axis('off')
    plt.show()

# Generation of a random graph for the SBM problem with 2 communities
def random_graph_generator(n, p, q, k, graph_num, use_MSE=False, shuffle=True):
    np.random.seed(graph_num)

    # Number of nodes per cluster
    n_c = n // k

    # Initialize labels and indices
    labels = np.reshape(np.indices((k, n_c))[0,:,:], n)
    indices = np.arange(n)

    # Initialize Adjacency matrix and the triangular indices
    A = np.zeros((n, n))
    triu_inds = np.triu_indices(n_c, k=1)

    # Fill the upper triangle
    for i in range(k):
        for j in range(i, k):
            if j == i:
                # in cluster
                cluster_i = np.random.choice([0, 1], size=(n_c * (n_c - 1) // 2), p = [1-p, p])
                A[triu_inds[0] + i * n_c, triu_inds[1] + i * n_c] = cluster_i
            else:
                # out of clusters
                between_clusters_i_j = np.random.choice([0, 1], size=(n_c, n_c), p=[1-q, q])
                A[n_c * i : n_c * (i + 1), n_c * j : n_c * (j + 1)] = between_clusters_i_j

    # Fill the under triangle
    A = A + A.T

    if shuffle:
        # Shuffle the adjacency matrix
        np.random.shuffle(indices)
        A = A[indices][:, indices]

        # Shuffle the clusters
        labels = labels[indices]

    # initialize the label list and the permutations
    labels_list = []
    perm = list(permutations(range(k)))

    if use_MSE:
        labels = labels * 2 - 1
        labels_list.append(labels)
        labels_list.append(-labels)
    else:
        new_labels = np.zeros(n).astype(int)

        for i in range(len(perm)):
            # Permute the labels
            for l in range(k):
                new_labels[labels == l] = perm[i][l]

            # initialize the array for the loss
            labels_i = np.zeros((n,k))

            # create the labels for the cross entropy loss
            for j in range(n):
                labels_i[j, new_labels[j]] = 1

            labels_list.append(labels_i)

    # Create the edge index
    edge_index = np.array(np.nonzero(A))

    return A, edge_index, labels_list

# make predictions
def result(X, W, w, L, n, p, q, k, h, test_graph_num, rand_lab=False,
           istogram=True, use_MSE = False, use_deg=False, shuffle=True):
    accuracys = []

    for graph_num in trange(test_graph_num):
        A, edge_index, labels_list = random_graph_generator(n, p, q, k, graph_num, use_MSE=use_MSE, shuffle=True)
        I = np.eye(n)
        D = np.diag(np.sum(A, axis=1))
        M = np.sqrt(np.linalg.inv(D + I)) @ (A + I) @ np.sqrt(np.linalg.inv(D + I))

        # initialize results and corrects
        results = M @ X @ W

        for i in range(1, L):
            results = M @ np.reshape(np.tanh(results), (n,h))

        results = np.reshape(results, (n,h)) @ w

        corrects = (np.sign(np.sign(results) * 2 + 1)).astype(int) == labels_list[0]

        accuracy = np.mean(corrects) * 100
        accuracy = max(accuracy, 100 - accuracy)
        accuracys.append(accuracy)

    mean_accuracy = sum(accuracys) / len(accuracys)

    print(f"\nMean accuracy: {mean_accuracy:.2f} %\n\n")

    # Istogram of accuracys
    if istogram:
        f = plt.figure()
        f.set_figwidth(20)
        f.set_figheight(8)
        plt.xlabel('SNR')
        plt.ylabel('Accuracy')
        plt.title('Performance of the model')
        plt.hist(accuracys, bins=20)
        plt.show()

    return accuracys, mean_accuracy

## Generate a SBM problem

### Initialization of the parameters

In [None]:
# Initialization
# number of vertices$
n = 500
# probability in clusters
p = n / n
# Probability between clusters
q = 0 / n
# Number of cluster
k = 2
# Numbe of hidden features
h = 1
# If true use MSE as loss function, if false use cross entropy loss
use_MSE = True
# Number of hidden layer
L = 1



# AUTO INITIALIZATION

# Parameters a and b
a = n * p
b = n * q

# check if n is divisible by k, if not change the value of n
if not n % k == 0:
    n = (n // k) * k

# model with only one layer has no hidden features
if L == 1:
    h = 1

# SNR
snr = ((a - b) ** 2)/(k * (a + (k - 1) * b))

# is true shuffle the row and columns of the adjacency matrix (DO NOT CHANGE IT: Experiments without shuffle aren't relevant)
shuffle = True

print(f"Parameter a: {a:.2f}\tParameter b: {b:.2f}\tSNR: {snr:.2f}\tProbability p: {p:.2f}\tProbability q: {q:.2f}\tn: {n}")

In [None]:
# Simplest model results: f = MW
X = np.eye(n)
ones_model = True

if ones_model:
    W_0 = np.concatenate((np.ones(int(n/2)), -1 * np.ones(int(n/2))), axis=0)
    W = W_0
    for i in range(h-1):
        np.random.shuffle(W_0)
        W = np.column_stack((W, W_0))
else:
    W_0 = np.concatenate((np.ones(1)*n/2, -1 * np.ones(n-1)), axis=0)
    W = W_0
    for i in range(h-1):
        np.random.shuffle(W_0)
        W = np.column_stack((W, W_0))

#W = np.concatenate((np.ones(1)*n/2, -1 * np.ones(n-1)), axis=0)
w = np.ones(h) / h
print(f"Accuracy prediction: {100 * (1 + p - q)/ 2}")
accuracys, mean_accuracy = result(X, W, w, L, n, p, q, k, h, 1000, use_MSE=use_MSE)

In [None]:
# save result of simplest model for different n
ns = np.arange(50, 1001, 50)
mean_accuracy_odd = []
mean_accuracy_even = []
predictions = []
ns_odd = []
ns_even = []
ones_model = True

for i in ns:
    n = i
    X = np.eye(n)

    if ones_model:
        W = np.concatenate((np.ones(int(n/2)), -1 * np.ones(int(n/2))), axis=0)
    else:
        W = np.concatenate((np.ones(1)*n/2, -1 * np.ones(n-1)), axis=0)

    w = np.array([1])

    if n % 4 == 0:
        accuracys_i, mean_accuracy_i = result(X, W, w, 1, n, 1, 0, 2, 1, 2000, istogram=False, use_MSE=use_MSE)
        mean_accuracy_even.append(mean_accuracy_i)
        ns_even.append(n)
        delta_i = pow(math.factorial(i // 2), 4) / ( pow(math.factorial(i // 4), 4) * math.factorial(i) )
        prediction = 50 * delta_i + 100 * (1 - delta_i)
        predictions.append(prediction)
    else:
        accuracys_i, mean_accuracy_i = result(X, W, w, 1, n, 1, 0, 2, 1, 2000, istogram=False, use_MSE=use_MSE)
        mean_accuracy_odd.append(mean_accuracy_i)
        ns_odd.append(n)

In [None]:
# data
mean_accuracy_even = [92.8, 94.152, 96.05, 96.6, 96.65, 95.5, 97.26628571428574, 97.25, 97.65, 97.49969999999999]
mean_accuracy_even = [92.5, 94.1275, 95.75, 96.5, 96.275, 96.15, 96.91857142857133, 97.075, 97.4, 97.24905000000001]
mean_accuracy_odd = [100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0]
predictions = [92.08076694193348, 94.3792212147979, 95.40491332206871, 96.01805031658179, 96.4371001099559, 96.74671905508835, 96.9875092174659, 97.18169547820673, 97.3426002178267, 97.47875911634195]
ns_odd = [50, 150, 250, 350, 450, 550, 650, 750, 850, 950]
ns_even = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]

# Plot the accuracy
plt.rcParams.update({'font.size': 25})
f_1 = plt.figure()
f_1.set_figwidth(20)
f_1.set_figheight(10)
plt.xlabel('n')
plt.ylabel('Mean accuracy')
plt.title('Accuracy with different n value')
plt.plot(ns_odd, mean_accuracy_odd, marker='o', label="Model Mw on graph with n/2 odd", linewidth=3)
plt.plot(ns_even, mean_accuracy_even, marker='o', label="Model Mw on graph with n/2 even", linewidth=3)
plt.plot(ns_even, predictions, marker='o', label="Predictions for n/2 even", linewidth=3)
plt.legend(loc="lower right")
f_1.savefig('Mean_accuracy_simplest_model_for_different_n.pdf')
plt.show()

In [None]:
# save result of simplest model for different SNR value
a = np.arange(1, 14)
mean_accuracy = []
ones_model = True
SNR = []
random = []
predictions_n = []
predictions_ones = []
h = 1
L = 1

#Initialize X and w
X = np.eye(n)
w = np.ones(h) / h

# Choose the model
if ones_model:
    W_0 = np.concatenate((np.ones(int(n/2)), -1 * np.ones(int(n/2))), axis=0)
    W = W_0
    for i in range(h-1):
        np.random.shuffle(W_0)
        W = np.column_stack((W, W_0))
else:
    W_0 = np.concatenate((np.ones(1)*n/2, -1 * np.ones(n-1)), axis=0)
    W = W_0
    for i in range(h-1):
        np.random.shuffle(W_0)
        W = np.column_stack((W, W_0))

for i in a:
    # Parameters a and b
    a = i
    b = 1

    # SNR
    if p == 0 and q == 0:
        snr = 0
    else:
        snr = ((a - b) ** 2)/(2 * (a + b))

    p = a / n
    q = b / n

    accuracys_i, mean_accuracy_i = result(X, W, w, L, n, p, q, k, h, 1000, istogram=False, use_MSE=use_MSE)
    mean_accuracy.append(mean_accuracy_i)

    prediction_n = (1 + p - q) / 2
    prediction_n = prediction_n + (1 - p) / n
    predictions_n.append(prediction_n * 100)
    random.append(51.25)
    SNR.append(snr)

In [None]:
# n = 1000
accuracy_simple_ones = [51.26949999999994, 51.44789999999996, 51.59180000000006, 51.7171, 51.798999999999914, 51.90779999999995, 52.01819999999994, 52.1272, 52.223499999999994]
accuracy_simple_n = [50.11229999999944, 50.15719999999959, 50.20599999999968, 50.25589999999993, 50.305000000000014, 50.35730000000002, 50.40710000000005, 50.456400000000116, 50.507100000000015]
random = [51.25, 51.25, 51.25, 51.25, 51.25, 51.25, 51.25, 51.25, 51.25]
predictions_n = [50.0999, 50.14980000000001, 50.1997, 50.24960000000001, 50.299499999999995, 50.34940000000001, 50.399300000000004, 50.449200000000005, 50.4991]
predictions_ones = []
SNR = [0.0, 0.16666666666666666, 0.5, 0.9, 1.3333333333333333, 1.7857142857142858, 2.25, 2.7222222222222223, 3.2]

# Plot the accuracy 2 Layers
f_1 = plt.figure()
f_1.set_figwidth(20)
f_1.set_figheight(10)
plt.xlabel('SNR')
plt.ylabel('Mean accuracy')
plt.title('Accuracy with different SNR value')
plt.plot(SNR, random, label="Accuracy random assignement")
plt.plot(SNR, accuracy_simple_ones, label="Accuracy model with W = (1,...,1,-1,...,-1)")
plt.plot(SNR, accuracy_simple_n, label="Accuracy model with W = (n,-1,...,-1)")
plt.plot(SNR, predictions_n, label="Predicted accuracy of the model with W = (n,-1,...,-1)")
plt.legend(loc="upper left")
f_1.savefig('Accuracy_trivial_model_500.png')
plt.show()

In [None]:
# n = 1000
accuracy_simple_ones = [51.26949999999994, 51.44789999999996, 51.59180000000006, 51.7171, 51.798999999999914, 51.90779999999995, 52.01819999999994, 52.1272, 52.223499999999994]
accuracy_simple_n = [50.11229999999944, 50.15719999999959, 50.20599999999968, 50.25589999999993, 50.305000000000014, 50.35730000000002, 50.40710000000005, 50.456400000000116, 50.507100000000015]
random = [51.25, 51.25, 51.25, 51.25, 51.25, 51.25, 51.25, 51.25, 51.25]
predictions_n = [50.0999, 50.14980000000001, 50.1997, 50.24960000000001, 50.299499999999995, 50.34940000000001, 50.399300000000004, 50.449200000000005, 50.4991]
predictions_ones = []
SNR = [0.0, 0.16666666666666666, 0.5, 0.9, 1.3333333333333333, 1.7857142857142858, 2.25, 2.7222222222222223, 3.2]

# Plot the accuracy 2 Layers
plt.rcParams.update({'font.size': 25})
f_1 = plt.figure()
f_1.set_figwidth(20)
f_1.set_figheight(10)
plt.xlabel('SNR')
plt.ylabel('Mean accuracy')
plt.title('Accuracy with different SNR value')
plt.plot(SNR, random, marker = 'o', label="Random assignement", linewidth=3)
#plt.plot(SNR, accuracy_simple_ones, marker = 'o', label="Model w = (1,...,1,-1,...,-1)", linewidth=3)
plt.plot(SNR, accuracy_simple_n, marker = 'o', label="Model w = (n,-1,...,-1)", linewidth=3)
plt.plot(SNR, predictions_n, marker = 'o', label="Predicted accuracy", linewidth=3)
plt.legend(loc="lower right")
f_1.savefig('Accuracy_trivial_model_1000_predictions.pdf')
plt.show()