In [4]:
import os
import sys
sys.path.append("/home/rohit/PhD_Work/GM_my_version/Graph_matching/")
from sklearn.cluster import KMeans
import networkx as nx
import numpy as np
from graph_generation.load_graphs_and_create_metadata import dataset_metadata
from graph_matching_tools.metrics import matching
import matplotlib.pyplot as plt
import scipy.io as sio
from os.path import exists

In [5]:
path_to_graph_folder = '/home/rohit/PhD_Work/GM_my_version/Graph_matching/data/simu_graph/simu_test_single_noise/'
path_to_dummy_graph_folder = '/home/rohit/PhD_Work/GM_my_version/Graph_matching/data/simu_graph/simu_with_dummy/'

In [6]:
def get_permutation_matrix_from_dictionary(matching, g_sizes):
    """
    Create the full permutation matrix from the matching result
    :param matching: the matching result for each graph (nodes number, assignment)
    :param g_sizes: the list of the size of the different graph
    :return: the full permutation matrix
    """
    f_size = int(np.sum(g_sizes))
    res = np.zeros((f_size, f_size))

    idx1 = 0
    for i_g1 in range(len(g_sizes)):
        idx2 = 0
        for i_g2 in range(len(g_sizes)):
            match = matching["{},{}".format(i_g1, i_g2)]
            for k in match:
                res[idx1 + int(k), idx2 + match[k]] = 1
            idx2 += g_sizes[i_g2]
        idx1 += g_sizes[i_g1]
        
    np.fill_diagonal(res,1)
    return res

In [7]:
trials = np.sort(os.listdir(path_to_graph_folder))


scores = {100:[],400:[],700:[],1000:[],1300:[]}
prec_scores = {100:[],400:[],700:[],1000:[],1300:[]}
rec_scores = {100:[],400:[],700:[],1000:[],1300:[]}

for trial in trials:
    print('trial: ', trial)
    
    all_files = os.listdir(path_to_graph_folder+trial)
    
    for folder in all_files:
        
        if os.path.isdir(path_to_graph_folder+trial+'/'+ folder):
            
            print('Noise folder: ',folder)
            
            path_to_graphs = path_to_graph_folder + '/' + trial + '/' + folder+'/graphs/'
            path_to_dummy_graphs = path_to_dummy_graph_folder + '/' + trial +'/' + folder + '/0/graphs/'
            path_to_groundtruth_ref = path_to_graph_folder + '/' + trial +'/' + folder + '/permutation_to_ref_graph.gpickle'
            path_to_groundtruth  = path_to_graph_folder + '/' + trial + '/' + folder + '/ground_truth.gpickle'
            
            noise = folder.split(',')[0].split('_')[1]
            
            graph_meta = dataset_metadata(path_to_graphs, path_to_groundtruth_ref)
            ground_truth =  nx.read_gpickle(path_to_groundtruth)   
            res = get_permutation_matrix_from_dictionary(ground_truth, graph_meta.sizes)
            
            
            all_dummy_graphs = [nx.read_gpickle(path_to_dummy_graphs+'/'+g) for g in np.sort(os.listdir(path_to_dummy_graphs))]
            
            if exists(path_to_graph_folder + '/' + trial + '/' + folder +'/X_cao_cst_o.mat'):
                X_msync = sio.loadmat(path_to_graph_folder + '/' + trial + '/' + folder +'/X_cao_cst_o.mat')['X']            
                dummy_mask = [list(nx.get_node_attributes(graph,'is_dummy').values()) for graph in all_dummy_graphs]
                dummy_mask = sum(dummy_mask,[])
                dummy_indexes = [i for i in range(len(dummy_mask)) if dummy_mask[i]==True]            
                X_msync = np.delete(X_msync,dummy_indexes,0) # delete the dummy rows
                X_msync = np.delete(X_msync,dummy_indexes,1) # delete the dummy columns
            
            
                print('res shape: ',res.shape)
                print('X shape: ',X_msync.shape)
               
            
                f1, prec, rec = matching.compute_f1score(X_msync,res)
            
                #scores[int(noise)].append(f1)
                prec_scores[int(noise)].append(prec)
                rec_scores[int(noise)].append(rec)

trial:  0.0
Noise folder:  noise_100,outliers_varied
res shape:  (11434, 11434)
X shape:  (11434, 11434)
Noise folder:  noise_400,outliers_varied
Noise folder:  noise_700,outliers_varied
Noise folder:  noise_1300,outliers_varied
res shape:  (11432, 11432)
X shape:  (11432, 11432)
Noise folder:  noise_1000,outliers_varied
res shape:  (11318, 11318)
X shape:  (11318, 11318)
trial:  0.1
Noise folder:  noise_100,outliers_varied
res shape:  (11337, 11337)
X shape:  (11337, 11337)
Noise folder:  noise_400,outliers_varied
res shape:  (11406, 11406)
X shape:  (11406, 11406)
Noise folder:  noise_700,outliers_varied
Noise folder:  noise_1300,outliers_varied
res shape:  (11248, 11248)
X shape:  (11248, 11248)
Noise folder:  noise_1000,outliers_varied
res shape:  (11358, 11358)
X shape:  (11358, 11358)
trial:  0.2
Noise folder:  noise_100,outliers_varied
Noise folder:  noise_400,outliers_varied
Noise folder:  noise_700,outliers_varied
Noise folder:  noise_1300,outliers_varied
res shape:  (11292, 1

In [10]:
prec_scores

{100: [0.06940255491618295,
  0.0722003118668761,
  0.06919687576617281,
  0.08012665815255075],
 400: [0.33249761899362795, 0.35625069398376574],
 700: [],
 1000: [0.5803358931064015,
  0.5943148013855296,
  0.6172584181139315,
  0.5847887618063963],
 1300: [0.7009452776744259,
  0.6994513562888791,
  0.7219446167567614,
  0.68002519211172,
  0.6751715856227034]}

In [11]:
rec_scores

{100: [0.08339594416008694,
  0.0851446874124384,
  0.08223609723609723,
  0.09426311196571033],
 400: [0.394862188291054, 0.41722420711995695],
 700: [],
 1000: [0.6823852691026986,
  0.7068964371067405,
  0.7230928714145982,
  0.6913494371020555],
 1300: [0.8199302973202821,
  0.8120009329062546,
  0.8319503099860984,
  0.7877875358892079,
  0.7978001986192047]}

In [12]:
prec_scores

{100: [0.06940255491618295,
  0.0722003118668761,
  0.06919687576617281,
  0.08012665815255075],
 400: [0.33249761899362795, 0.35625069398376574],
 700: [],
 1000: [0.5803358931064015,
  0.5943148013855296,
  0.6172584181139315,
  0.5847887618063963],
 1300: [0.7009452776744259,
  0.6994513562888791,
  0.7219446167567614,
  0.68002519211172,
  0.6751715856227034]}

In [None]:
scores_selected = {100: [0.07575848754205779,
  0.07814004910410795],
 400: [0.3610063028572132, 0.3843341603488669],
 1000: [0.6272369172851308,
  0.6457353013735544],
 1300: [0.7557834176288084,
  0.7515356692225564]}

In [None]:
scores_selected

In [None]:
def score_mean_std(scores):
    
    avg_scores = []
    std_scores = []

    for keys,values in scores.items():
        avg_scores.append(np.mean(values))
        std_scores.append(np.std(values))
        
    return np.array(avg_scores), np.array(std_scores)

In [None]:
CAO_mean, CAO_std = score_mean_std(scores_selected)

In [None]:
fig = plt.figure(figsize=(16, 9))

plt.plot(list(scores_selected.keys()), CAO_mean ,label = 'CAO')
plt.fill_between(list(scores_selected.keys()), CAO_mean - CAO_std, CAO_mean + CAO_std, alpha=0.2)

plt.xlabel('kappa',fontweight="bold",fontsize=18)
plt.ylabel('F1 score',fontweight="bold",fontsize=18)
plt.legend(loc = 'lower left')
plt.title('CAO on simultion for different kappa values',fontweight="bold",fontsize=18)
plt.gca().yaxis.grid(True)
plt.gca().invert_xaxis()
plt.legend(loc=3, prop={'size': 15})
plt.show()