Many of the ideas in this tutorial are simplified versions of what is discussed in 

```
    Priebe, C. E., Park, Y., Vogelstein, J. T., Conroy, J. M., Lyzinski, V., Tang, M., ... & Bridgeford, E. (2019). On a two-truths phenomenon in spectral graph clustering. Proceedings of the National Academy of Sciences, 116(13), 5995-6000.
```

Create an affinity network

In [None]:
##############
### Cell 1 ###
##############

## REQUIRES pip install scikit-learn

from drawing_utilities import show_dendrogram
from drawing_utilities import show_partitions
from drawing_utilities import show_kCores
from drawing_utilities import show_kCores_by_partition
from drawing_utilities import show_2D_scatterplot
from partitioning_utilities import get_k_fiedler_eigenvectors_sparse
from partitioning_utilities import get_k_leading_eigenvectors_sparse
from partitioning_utilities import get_colors_from_clusters
import networkx as nx

%load_ext autoreload

a = 0.4
b = 0.1
c = 0.4
num_nodes: int = 30
G: nx.Graph = nx.stochastic_block_model([num_nodes,num_nodes],[[a, b],[b,c]])

partition_dict: dict[int, set[int]] = {0: set(), 1: set()}
for node in G.nodes():
    if node < num_nodes:
        partition_dict[0].add(node)
    else:
        partition_dict[1].add(node)
partition: list[set[int]] = list(partition_dict.values())

show_partitions(G, partition, title="Two class affinity network with true classes:\n")

Look at dendrogram

In [None]:
##############
### Cell 2 ###
##############
import networkx as nx
from copy import deepcopy
def largest_component(graph):
    # Find all connected components
    H = deepcopy(graph)
    components = nx.connected_components(H)
    # Get the largest component
    largest = max(components, key=len)
    H = deepcopy(H.subgraph(largest))
    mapping = {old_label: new_label for new_label, old_label in enumerate(G.nodes())}
    H = nx.relabel_nodes(H, mapping)
    return H

H = largest_component(G)
show_dendrogram(H)

Look at kcores

In [None]:
##############
### Cell 3 ###
##############
show_kCores(G)

Get first and second fiedler vector

In [None]:
##############
### Cell 4 ###
##############

%autoreload 2

L = nx.linalg.normalized_laplacian_matrix(G)
fiedler_vectors = get_k_fiedler_eigenvectors_sparse(L, 2)
colors = get_colors_from_clusters(fiedler_vectors, num_clusters=2)
show_2D_scatterplot(fiedler_vectors[:,0],
                    fiedler_vectors[:,1],
                    colors,
                    xlabel = "values of fiedler eigenvector",
                    ylabel = "values of second fiedler eigenvector",
                    title = "Clusters of L for affinity graph")


Show graph partitioned into two clusters using the Laplacian embedding

In [None]:
##############
### Cell 5 ###
##############
group1 = {node for node in G.nodes() if colors[node] == 'y'}
group2 = {node for node in G.nodes() if colors[node] == 'c'}

show_partitions(G, partition=(group1, group2), title="Affinity network partitioned by L")

Repeat but partition using adjacency matrix

In [None]:
##############
### Cell 6 ###
##############
A = nx.linalg.adjacency_matrix(G)
leading_vectors = get_k_leading_eigenvectors_sparse(A, 2)
colors = get_colors_from_clusters(leading_vectors, num_clusters=2)

show_2D_scatterplot(leading_vectors[:,0],
                    leading_vectors[:,1],
                    colors,
                    xlabel = "values of first leading eigenvector",
                    ylabel = "values of second leadin eigenvector",
                    title = "Clusters of A for affinity graph")


In [None]:
##############
### Cell 7 ###
##############
group1 = {node for node in G.nodes() if colors[node] == 'y'}
group2 = {node for node in G.nodes() if colors[node] == 'c'}

show_partitions(G, partition=(group1, group2), title="Affinity network partitioned by A")

Redraw to see which node classes belong n core and which belong in the periphery

In [None]:
##############
### Cell 8 ###
##############

show_kCores_by_partition(G, colors, title="Affinity graph partitioned by A")

---

Create a core-periphery network

In [None]:
##############
### Cell 9 ###
##############

a = 0.4
b = 0.2
c = 0.01
G: nx.Graph = nx.stochastic_block_model([num_nodes,num_nodes],[[a,b],[b,c]])

partition_dict: dict[int, set[int]] = {0: set(), 1: set()}
for node in G.nodes():
    if node < num_nodes:
        partition_dict[0].add(node)
    else:
        partition_dict[1].add(node)
partition: list[set[int]] = list(partition_dict.values())

show_partitions(G, 
                pos = nx.spring_layout(G),
                partition = partition, 
                title="Two class core-periphery network with true classes:\n")



Look at dendrogram

In [None]:
###############
### Cell 10 ###
###############

#G = largest_component(G)
show_dendrogram(G,title="Dendrogram of core-periphery network")

Look at kcores

In [None]:
###############
### Cell 11 ###
###############

show_kCores(G)

Let's partition using the normalized Laplacian matrix

In [None]:
###############
### Cell 12 ###
###############

L = nx.linalg.normalized_laplacian_matrix(G)
fiedler_vectors = get_k_fiedler_eigenvectors_sparse(L, 2)
colors = get_colors_from_clusters(fiedler_vectors, num_clusters=2)
show_2D_scatterplot(fiedler_vectors[:,0],
                    fiedler_vectors[:,1],
                    colors,
                    xlabel = "values of fiedler eigenvector",
                    ylabel = "values of second fiedler eigenvector",
                    title = "Clusters of L for core-periphery graph")

Show partition

In [None]:
###############
### Cell 13 ###
###############
group1 = {node for node in G.nodes() if colors[node] == 'y'}
group2 = {node for node in G.nodes() if colors[node] == 'c'}


# Spring layout tends to show core periphery structure well
show_partitions(G, 
                pos = nx.spring_layout(G),
                partition=(group1, group2), 
                title="Core-periphery network partitioned by L:\n")

In [None]:
###############
### Cell 14 ###
###############
show_kCores_by_partition(G,colors,title = "Core-periphery network partitioned by L")

Repeat using the adjacency matrix

In [None]:
###############
### Cell 15 ##
###############

A = nx.linalg.adjacency_matrix(G)
leading_vectors = get_k_leading_eigenvectors_sparse(A, 2)
colors = get_colors_from_clusters(leading_vectors, num_clusters=2)

show_2D_scatterplot(leading_vectors[:,0],
                    leading_vectors[:,1],
                    colors,
                    xlabel = "values of first leading eigenvector",
                    ylabel = "values of second leadin eigenvector",
                    title = "Clusters of A for core-periphery graph")

Show partition by adjacency matrix

In [None]:
###############
### Cell 16 ###
###############

group1 = {node for node in G.nodes() if colors[node] == 'y'}
group2 = {node for node in G.nodes() if colors[node] == 'c'}

# Spring layout tends to show core periphery structure well
show_partitions(G, 
                pos = nx.spring_layout(G),
                partition=(group1, group2), 
                title="Core-periphery network partitioned by A:\n")

Redraw using the shell layout to see which class falls in core and which in periphery

In [None]:
###############
### Cell 17 ###
###############
show_kCores_by_partition(G,colors, title="Core-periphery graph partitioned by A")

It looks like most of the yellow nodes are in the 3-core, which means that the clustering by the eigenvectors of the adjacency matrix pull out at least some information about the k-core structure.

---

### Clustering Using More Information

In [None]:
from sklearn.manifold import TSNE
num_eigenvectors: int = 5
A = nx.linalg.adjacency_matrix(G)
leading_vectors = get_k_leading_eigenvectors_sparse(A, num_eigenvectors)
tsne: TSNE = TSNE(n_components=2, random_state=42)
tsne_components = tsne.fit_transform(leading_vectors)
colors = get_colors_from_clusters(tsne_components, num_clusters=3)


In [None]:
show_2D_scatterplot(leading_vectors[:,0],
                    leading_vectors[:,1],
                    colors,
                    xlabel = "values of first leading eigenvector",
                    ylabel = "values of second leadin eigenvector",
                    title = "Clusters of A for core-periphery graph")

In [None]:
group1 = {node for node in G.nodes() if colors[node] == 'y'}
group2 = {node for node in G.nodes() if colors[node] == 'c'}
group3 = {node for node in G.nodes() if colors[node] == 'm'}


show_partitions(G, 
                partition=(group1, group2, group3), 
                title="Core-periphery network partitioned by A:\n")

In [None]:
show_kCores_by_partition(G,colors, title="Core-periphery graph partitioned by A")

I see nothing useful in this plot