### Datasets loading

In [1]:
%cd ..

/home/houssem.souid/brainiac-1-temporal


In [2]:
from brainiac_temporal.data.peturb_topology import perturb_tgt_graph_topology
from brainiac_temporal.data.datasets import fetch_insecta_dataset
from brainiac_temporal.data.utils import remove_isolated_nodes

In [3]:
dataset_name= "insecta"

dynamic_signal =fetch_insecta_dataset(colony=6)
reference_dataset = remove_isolated_nodes(dynamic_signal)

### Perturbation on topology

Perturb dataset with multiple ratios

In [4]:
pertubed_graph = perturb_tgt_graph_topology(reference_dataset, [0.001, 0.01, 0.1, 0.5,1])

Let's loop over the perturbations

In [None]:
g_1 = pertubed_graph[0.001]
g_2 = pertubed_graph[0.01]
g_3 = pertubed_graph[0.1]
g_4 = pertubed_graph[0.5]
g_5 = pertubed_graph[1]

In [None]:
reference_dataset[0].edge_index

tensor([[  0,   0,   0,  ..., 137, 138, 138],
        [  1,   2,   3,  ..., 136, 136, 137]])

In [None]:
g_1[0].edge_index

tensor([[  0,   1,   7,  ..., 163, 163, 163],
        [ 63,  59,   8,  ..., 135, 136, 138]])

In [None]:
g_4[0].edge_index

tensor([[  0,   0,   0,  ..., 163, 163, 163],
        [  3,   4,   6,  ..., 158, 161, 162]])

Evaluate the pertubed datasets

In [5]:
from brainiac_temporal.metrics import MetricEvaluator

In [6]:
evaluator = MetricEvaluator(
            statistics="all",
            temporal_aggregation="dtw",
            utility_metrics=None,
            temporal_metrics="auto",
            get_privacy_metric=True,
            embedder_path="checkpoints_lp_rm_isolated_nodes/best_insecta.ckpt",
        )

In [31]:
metrics = evaluator(
            original=reference_dataset,
            generated=g_1,
        )

spectral
degree
degree_centrality
clustering
closeness_centrality
katz_centrality
eigenvector_centrality
avg_clust_coeff
transitivity
diameter
average_shortest_path_length


  embedder, data, torch.tensor(embeddings)


In [32]:
metrics

{'spectral': 0.010284821238619344,
 'degree': 3.4550687402713134,
 'degree_centrality': 4.937104414532874,
 'clustering': 0.006348129664761474,
 'closeness_centrality': 0.03206631179037227,
 'katz_centrality': nan,
 'eigenvector_centrality': 0.0029131697921128234,
 'avg_clust_coeff': 0.025759079213563883,
 'transitivity': 0.001647932727574813,
 'diameter': 2.0,
 'average_shortest_path_length': 0.25958345845120306,
 'temporal_correlation': 9.99893701521031,
 'diff_avg_temporal_closeness': 0.0006775067751050301,
 'diff_avg_temporal_clustering_coefficient': 0.0006398292804118411,
 'NNDR_mean': 0.25600001215934753,
 'NNDR_std': 0.2980000078678131}

In [33]:
metrics = evaluator(
            original=reference_dataset,
            generated=g_4,
        )

spectral
degree
degree_centrality
clustering
closeness_centrality
katz_centrality
eigenvector_centrality
avg_clust_coeff
transitivity
diameter
average_shortest_path_length


In [34]:
metrics

{'spectral': 3.8742660065616397,
 'degree': 169.72551369785273,
 'degree_centrality': 4.937104414532874,
 'clustering': 1.466629834402638,
 'closeness_centrality': 2.270710218161507,
 'katz_centrality': nan,
 'eigenvector_centrality': 0.11379823069802994,
 'avg_clust_coeff': 0.2634596200694959,
 'transitivity': 0.39694868593126437,
 'diameter': 0.0,
 'average_shortest_path_length': 1.083854888326082,
 'temporal_correlation': 9.789819609085814,
 'diff_avg_temporal_closeness': 2.0548780487804947,
 'diff_avg_temporal_clustering_coefficient': 0.35075712641797485,
 'NNDR_mean': 0.9200000166893005,
 'NNDR_std': 0.06400000303983688}

### Perturb Featured dataset

In [7]:
from brainiac_temporal.data.utils import remove_isolated_nodes, perturb_featured_dataset
import torch_geometric_temporal as tgt

dataset_name= "TwitterTennis"
dynamic_signal =tgt.TwitterTennisDatasetLoader().get_dataset()
reference_dataset = remove_isolated_nodes(dynamic_signal)

In [8]:
#initialise the perturbation ratios
perturbation_ratio =[0.05, 0.1, 0.25, 0.5,0.75, 0.9]

In [9]:
#features and topology follow the same amount of noise in this example
pertubed_graphs ={}
for p in perturbation_ratio:
    pertubed_graphs[p] = perturb_featured_dataset(reference_dataset, features_perturbation_ratio =p, is_binary=False,noise_scale=0.1, topology_perturbation_ratio =[p])


In [10]:
reference_dataset[0]

Data(x=[995, 16], edge_index=[2, 89], edge_attr=[89], y=[995])

In [11]:
pertubed_graphs[0.9][0]

Data(x=[995, 16], edge_index=[2, 111], edge_attr=[111], y=[995])

### Statistical metrics as features 

In [12]:
import numpy as np
import pandas as pd
from brainiac_temporal.metrics.statistics.statistics import GraphStatistics
from torch_geometric_temporal.signal import DynamicGraphTemporalSignal

In [13]:
dataset_name= "TwitterTennis_node_degree_spectral_clustering"
dynamic_signal =tgt.TwitterTennisDatasetLoader().get_dataset()
dynamic_signal = remove_isolated_nodes(dynamic_signal)

In [14]:
evaluator = GraphStatistics(metrics=["degree", "spectral", "clustering"], to_tensors=False)
evaluator = GraphStatistics(metrics=["degree", "spectral", "clustering"], to_tensors=False)
metrics = evaluator(dynamic_signal)
spectral = np.array([np.split(array, len(array)) for array in metrics["spectral"]])
degree = np.array([np.split(array, len(array)) for array in metrics["degree"]])
clustering = np.array([np.split(array, len(array)) for array in metrics["clustering"]])
feature_vector = np.concatenate((spectral, degree, clustering), axis=2)


reference_dataset = DynamicGraphTemporalSignal(
        edge_indices=dynamic_signal.edge_indices,
        edge_weights=dynamic_signal.edge_weights,
        features=feature_vector,
        targets=dynamic_signal.targets,
    )


In [15]:
perturbation_ratio =[0.05, 0.1, 0.25, 0.5,0.75, 0.9]

In [16]:
pertubed_graphs ={}
for p in perturbation_ratio:
    pertubed_graphs[p] = perturb_featured_dataset(reference_dataset, features_perturbation_ratio =p, is_binary=False,topology_perturbation_ratio =[p])


Calculate NNDR score 

In [18]:
method="dtw"
evaluator = MetricEvaluator(
            statistics="all",
            temporal_aggregation="dtw",
            utility_metrics=None,
            temporal_metrics="auto",
            get_privacy_metric=True,
            embedder_path= "checkpoints_lp_rm_isolated_nodes/best_" + dataset_name + ".ckpt",
            nndr_calculation_method= method
        )



evaluation_results = {}
privacy_score = evaluator._compute_nndr(
            original=remove_isolated_nodes(reference_dataset),
            generated=remove_isolated_nodes(reference_dataset),
        )
nndr={}
nndr["NNDR_mean"] = float( np.round(np.mean(privacy_score["nndr_score"]),3))
nndr["NNDR_std"] =  float(np.round(np.std(privacy_score["nndr_score"]),3))
evaluation_results["0 %"] = nndr
# Iterate over perturbed graphs
for perturbation_percentage, perturbed_graph in pertubed_graphs.items():
    # Assuming you have a function `evaluate_graph` to evaluate each perturbed graph
    privacy_score = evaluator._compute_nndr(
            original=remove_isolated_nodes(reference_dataset),
            generated=remove_isolated_nodes(perturbed_graph),
        )
    nndr={}
    nndr["NNDR_mean"] = float( np.round(np.mean(privacy_score["nndr_score"]),3))
    nndr["NNDR_std"] =  float(np.round(np.std(privacy_score["nndr_score"]),3))
    

    # Store the results in the evaluation dictionary
    evaluation_results[str(perturbation_percentage *100) + " %"] = nndr
    print(f"Calculated for {str(perturbation_percentage *100)}")

# Convert the dictionary to a DataFrame
df_results = pd.DataFrame.from_dict(evaluation_results, orient='index')
df_results.index.name = 'Perturbation Percentage'

# Save the DataFrame to a CSV file
df_results.to_csv("/nndr_results/nndr_results_"+ dataset_name + "_" + method +".csv")


  embedder, data, torch.tensor(embeddings)


Calculated for 5.0
