# Kmeans on spherical coordinates

In [1]:
import os
import sys
sys.path.append("/home/rohit/PhD_Work/GM_my_version/Graph_matching/")
from sklearn.cluster import KMeans
import networkx as nx
import numpy as np
from graph_generation.load_graphs_and_create_metadata import dataset_metadata
from graph_matching_tools.metrics import matching
import matplotlib.pyplot as plt
import scipy.io as sio

In [2]:
path_to_graph_folder = '/home/rohit/PhD_Work/GM_my_version/Graph_matching/data/Oasis_full_batch_renamed/'

In [3]:
def get_permutation_matrix_from_dictionary(matching, g_sizes):
    """
    Create the full permutation matrix from the matching result
    :param matching: the matching result for each graph (nodes number, assignment)
    :param g_sizes: the list of the size of the different graph
    :return: the full permutation matrix
    """
    f_size = int(np.sum(g_sizes))
    res = np.zeros((f_size, f_size))

    idx1 = 0
    for i_g1 in range(len(g_sizes)):
        idx2 = 0
        for i_g2 in range(len(g_sizes)):
            match = matching["{},{}".format(i_g1, i_g2)]
            for k in match:
                res[idx1 + int(k), idx2 + match[k]] = 1
            idx2 += g_sizes[i_g2]
        idx1 += g_sizes[i_g1]
        
    np.fill_diagonal(res,1)
    return res

In [4]:
def get_all_coords(list_graphs):
    all_coords = []
    for g in list_graphs:
        coords = np.array(list(nx.get_node_attributes(g,'sphere_3dcoords').values()))
        all_coords.extend(coords)
    all_coords = np.array(all_coords)
    
    return all_coords

In [5]:
def create_perm_from_labels(labels):
    U = np.zeros((len(labels),len(set(labels))))
    
    for node,label in zip(range(U.shape[0]),labels):
        U[node,label] = 1
        
    return U @ U.T,U

In [6]:
def get_labels_from_k_means(k, coords):
    
    kmeans = KMeans(n_clusters=k, random_state=0).fit(coords)
    
    return kmeans.labels_

In [7]:
def remove_dummy_nodes(graph):
		to_remove = []
		for (p, d) in graph.nodes(data=True):
			if d['is_dummy'] == True:
				to_remove.append(p)
		graph.remove_nodes_from(to_remove)
		return graph

In [8]:
def insert_at(arr, output_size, indices):
    
    # assert len(output_size) == len(indices) == len(arr.shape)
    result = np.zeros(output_size)
    existing_indices = [np.setdiff1d(np.arange(axis_size), axis_indices,assume_unique=True)
                        for axis_size, axis_indices in zip(output_size, indices)]
    result[np.ix_(*existing_indices)] = arr
    return result

In [9]:
trials = np.sort(os.listdir(path_to_graph_folder))

k = 90

path_to_graphs = path_to_graph_folder + '/' + '/modified_graphs/'

all_files = os.listdir(path_to_graphs)

all_graphs = [remove_dummy_nodes(nx.read_gpickle(path_to_graphs+"/"+graph)) for graph in all_files]

num_nodes = [nx.number_of_nodes(g) for g in all_graphs]
print(num_nodes)

all_coords = get_all_coords(all_graphs)         

kmeans_labels = get_labels_from_k_means(k, all_coords)

P,U = create_perm_from_labels(kmeans_labels)

kmeans_X = {}
kmeans_X['full_assignment_mat'] = P
kmeans_X['U'] = U

sio.savemat(path_to_graph_folder + '/X_kmeans_real_data.mat',kmeans_X)

print('/n',P.shape)


# Add dummy rows and columns to the Permutation matrix

all_dummy_graphs = [nx.read_gpickle(path_to_graphs+"/"+graph) for graph in all_files]
sizes_dummy = [nx.number_of_nodes(g) for g in all_dummy_graphs]
print('/n',sizes_dummy)


dummy_mask = [list(nx.get_node_attributes(graph,'is_dummy').values()) for graph in all_dummy_graphs]
dummy_mask = sum(dummy_mask,[])
dummy_indexes = [i for i in range(len(dummy_mask)) if dummy_mask[i]==True] 


X_kmeans_dummy = insert_at(kmeans_X['full_assignment_mat'], (sum(sizes_dummy), sum(sizes_dummy)), (dummy_indexes, dummy_indexes))
print('/n',X_kmeans_dummy.shape)

X_kmeans_dummy_dict = {}
X_kmeans_dummy_dict['full_assignment_mat'] = X_kmeans_dummy

sio.savemat(path_to_graph_folder + '/X_kmeans_real_data_dummy.mat',X_kmeans_dummy_dict)

[86, 88, 83, 87, 94, 91, 94, 83, 94, 88, 92, 89, 92, 94, 89, 85, 91, 89, 83, 96, 101, 87, 89, 91, 83, 82, 89, 82, 87, 86, 93, 89, 89, 81, 91, 89, 91, 92, 91, 86, 87, 83, 93, 92, 89, 89, 91, 81, 88, 90, 89, 96, 89, 94, 90, 93, 90, 89, 86, 91, 89, 89, 85, 85, 84, 88, 94, 86, 81, 83, 81, 84, 87, 83, 91, 86, 96, 91, 86, 78, 82, 79, 95, 95, 86, 80, 85, 85, 80, 96, 89, 94, 87, 91, 86, 83, 85, 97, 83, 86, 91, 90, 85, 81, 89, 92, 95, 91, 88, 92, 83, 88, 95, 84, 84, 87, 80, 93, 86, 88, 77, 92, 92, 94, 94, 97, 101, 86, 86, 79, 87, 93, 85, 89]
/n (11832, 11832)
/n [101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101,

In [None]:
nx.write_gpickle(prec_scores,'kmeans_prec_score_k_'+ str(k) +'.gpickle')
nx.write_gpickle(rec_scores,'kmeans_rec_score_k_'+ str(k) +'.gpickle')

In [None]:
# k_70 = nx.read_gpickle('kmeans_score_k_70.gpickle')
# k_90 = nx.read_gpickle('kmeans_score_k_90.gpickle')
# k_110 = nx.read_gpickle('kmeans_score_k_110.gpickle')

In [None]:
def score_mean_std(scores):
    
    avg_scores = []
    std_scores = []

    for keys,values in scores.items():
        avg_scores.append(np.mean(values))
        std_scores.append(np.std(values))
        
    return np.array(avg_scores), np.array(std_scores)

In [None]:
# k_70_mean, k_70_std  = score_mean_std(k_70)
# k_90_mean, k_90_std  = score_mean_std(k_90)
# k_110_mean, k_110_std = score_mean_std(k_110)

In [None]:
plt.figure(figsize=(14, 8))

plt.plot(list(k_70.keys()), k_70_mean ,label = 'k = 70')
plt.fill_between(list(k_70.keys()), k_70_mean-k_70_std, k_70_mean + k_70_std, alpha=0.2)


plt.plot(list(k_90.keys()), k_90_mean ,label = 'k = 90')
plt.fill_between(list(k_90.keys()), k_90_mean - k_90_std, k_90_mean + k_90_std, alpha=0.2)


plt.plot(list(k_110.keys()), k_110_mean ,label = 'k = 110')
plt.fill_between(list(k_110.keys()), k_110_mean - k_110_std, k_110_mean + k_110_std, alpha=0.2)


plt.xlabel('kappa',fontweight="bold")
plt.ylabel('F1 score',fontweight="bold")
plt.legend(loc = 'lower left')
plt.title('kmeans on simultion for different kappa values',fontweight="bold")
plt.gca().yaxis.grid(True)
plt.gca().invert_xaxis()
plt.show()