# Community Deception

Connect Google Drive to access the dataset.

In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

## Install Pytorch Geometric

If we are on Kaggle we need to run the following cells to install Pytorch Geometric

In [2]:
import torch
import os

os.environ["TORCH"] = torch.__version__

# On Colab we can have TORCH+CUDA on os.environ["TORCH"]

# Check if there is the cuda version on TORCH
if torch.cuda.is_available():
    print("CUDA is available")
    print(torch.version.cuda)
    if "+" not in os.environ["TORCH"]:
        os.environ["TORCH"] += "+cu" + \
            torch.version.cuda.replace(".", "")

print(os.environ["TORCH"])

2.0.0+cpu


Install torch geometric and optional dependencies:

In [3]:
! pip install torch_geometric
# Optional dependencies:
# ! pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html
# ! pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.0+cu118.html
# ! pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-${TORCH}.html
! pip install pyg_lib torch_scatter torch_sparse -f https://data.pyg.org/whl/torch-${TORCH}.html

# Graph
# ! pip install cugraph-cu11 --extra-index-url=https://pypi.ngc.nvidia.com
! pip install igraph
! pip install cdlib[C]
! pip install karateclub
# ! pip install graph2vec

Collecting torch_geometric
  Downloading torch_geometric-2.3.1.tar.gz (661 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m661.6/661.6 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: torch_geometric
  Building wheel for torch_geometric (pyproject.toml) ... [?25ldone
[?25h  Created wheel for torch_geometric: filename=torch_geometric-2.3.1-py3-none-any.whl size=910454 sha256=2396c01cf398aeaa7533d743798540a0d72187af7a47d125fcf0ba49c9ffb416
  Stored in directory: /root/.cache/pip/wheels/ac/dc/30/e2874821ff308ee67dcd7a66dbde912411e19e35a1addda028
Successfully built torch_geometric
Installing collected packages: torch_geometric
Successfully installed torch_geometric-2.3.1
Looking in links: https://data.pyg.org/whl/torch-2.0.0+cpu.html
Collecting py

**IMPORTANT!!!**
After the libraries installation, restart the runtime and start executing the cells below

## Import Libraries

In [45]:
# Import torch and os another time to reset the colab enviroment after PyG installation
from IPython.display import FileLink, display
import subprocess
import torch
import os
import gc

# Typing
from typing import List, Tuple, Set, Callable
from collections import Counter, namedtuple

# Deep Learning
from torch_geometric.utils import from_networkx
from torch_geometric.data import Data
from torch_geometric.data import Batch
from torch_geometric.nn import GCNConv, GATConv
from torch_geometric.nn import global_mean_pool
from torch.distributions import MultivariateNormal

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import scipy

# Graph
from karateclub import GL2Vec, Graph2Vec, Node2Vec
from cdlib import algorithms
import cdlib
import networkx as nx
import igraph as ig

# cuGraph
# import cugraph as cnx


# Misc
from statistics import mean
from enum import Enum
from tqdm import trange
import math
import random
import json
import time
import copy


# Plot
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('default')

## Utils

In [46]:
# Only for the notebook
TRAIN = True
# Set to True to test the results with the baselines algorithms
TEST = False

In [47]:
class FilePaths(Enum):
    """Class to store file paths for data and models"""
    # ° Local
    # DATASETS_DIR = 'dataset/data'
    # LOG_DIR    = 'src/logs/'
    # TEST_DIR = 'test/'
    # ° Kaggle
    DATASETS_DIR = '/kaggle/input/network-community'
    LOG_DIR = '/kaggle/working/logs/'
    TEST_DIR = '/kaggle/working/test/'
    # ° Google Colab
    # DATASETS_DIR = "/content/drive/MyDrive/Sapienza/Tesi/Datasets"
    # LOG_DIR = "/content/drive/MyDrive/Sapienza/Tesi/Logs/"
    # TEST_DIR = "/content/drive/MyDrive/Sapienza/Tesi/Test/"
    
    # ! Trained model path for testing (change the following line to change the model)
    TRAINED_MODEL = LOG_DIR + \
        "kar/greedy/lr-0.0001/gamma-0.7/lambda-0.1/alpha-0.7/model.pth"

    # Dataset file paths
    KAR = DATASETS_DIR + '/kar.mtx'
    # KAR = DATASETS_DIR + '/kar.gml'
    DOL = DATASETS_DIR + '/dol.mtx'
    # DOL = DATASETS_DIR + '/dol.gml'
    MAD = DATASETS_DIR + '/mad.mtx'
    # MAD = DATASETS_DIR + '/mad.gml'
    LESM = DATASETS_DIR + '/lesm.mtx'
    # LESM = DATASETS_DIR + '/lesm.gml'
    POLB = DATASETS_DIR + '/polb.mtx'
    # POLB = DATASETS_DIR + '/polb.gml'
    WORDS = DATASETS_DIR + '/words.mtx'
    # WORDS = DATASETS_DIR + '/words.gml'
    # NETS = DATASETS_DIR + '/nets.mtx'
    NETS = DATASETS_DIR + '/nets.gml'
    ERDOS = DATASETS_DIR + '/erdos.mtx'
    # ERDOS = DATASETS_DIR + '/erdos.gml'
    POW = DATASETS_DIR + '/pow.mtx'
    # POW = DATASETS_DIR + '/pow.gml'
    FB_75 = DATASETS_DIR + '/fb-75.mtx'
    # FB_75 = DATASETS_DIR + '/fb-75.gml'
    
    # The following datasets are too big, scipy cannot load them
    # DBLP = DATASETS_DIR + '/dblp.mtx'
    # ASTR = DATASETS_DIR + '/astr.mtx'
    # AMZ = DATASETS_DIR + '/amz.mtx'
    # YOU = DATASETS_DIR + '/you.mtx'
    # ORK = DATASETS_DIR + '/ork.mtx'


class DetectionAlgorithmsNames(Enum):
    """
    Enum class for the detection algorithms
    """
    LOUV = "louvain"            # ! NOT WORKING with: NETS (each community is a single node)
    WALK = "walktrap"           # ! NOT WORKING with: NETS (bipartite graphs)
    GRE = "greedy"              # ! NOT WORKING with: NETS (division by 0)
    INF = "infomap"             # ! NOT WORKING with: NETS (each community is a single node)
    LAB = "label_propagation"
    EIG = "eigenvector"         # ! NOT WORKING with: NETS (bipartite graphs)
    BTW = "edge_betweenness"
    SPIN = "spinglass"
    OPT = "optimal"
    SCD = "scalable_community_detection"


class SimilarityFunctionsNames(Enum):
    """
    Enum class for the similarity functions
    """
    # Community similarity functions
    JAC = "jaccard"
    OVE = "overlap"
    SOR = "sorensen"
    # Graph similarity functions
    GED = "ged"  # Graph edit distance
    JAC_1 = "jaccard_1"
    JAC_2 = "jaccard_2"


class HyperParams(Enum):
    """Hyperparameters for the Environment"""
    # ! REAL GRAPH Graph path (change the following line to change the graph)
    GRAPH_NAME = None # FilePaths.KAR.value
    # ! Define the detection algorithm to use (change the following line to change the algorithm)
    DETECTION_ALG_NAME = DetectionAlgorithmsNames.INF.value
    # Multiplier for the rewiring action number, i.e. (mean_degree * BETA)
    BETA = 2
    # ! Strength of the deception constraint, value between 0 (hard) and 1 (soft) 
    TAU = 0.5
    # ° Hyperparameters  Testing ° #
    # ! Weight to balance the penalty in the reward
    # The higher its value the more importance the penalty will have
    LAMBDA = [0.1, 1] # [0.01, 0.1, 1]
    # ! Weight to balance the two metrics in the definition of the penalty
    # The higher its value the more importance the distance between communities 
    # will have, compared with the distance between graphs
    ALPHA = [0.8] # [0.3, 0.5, 0.7]
    # Multiplier for the number of maximum steps allowed
    MAX_STEPS_MUL = 2
    
    """ Graph Encoder Parameters """""
    EMBEDDING_DIM = 128 # 256

    """ Agent Parameters"""
    # Networl Architecture
    HIDDEN_SIZE_1 = 64
    HIDDEN_SIZE_2 = 64
    # Hyperparameters for the ActorCritic
    EPS_CLIP = np.finfo(np.float32).eps.item()  # 0.2
    BEST_REWARD = -np.inf
    # ° Hyperparameters  Testing ° #
    # ! Learning rate, it controls how fast the network learns
    LR = [1e-7, 1e-4, 1e-1] # [1e-7, 1e-4, 1e-1]
    # ! Discount factor:
    # - 0: only the reward on the next step is important
    # - 1: a reward in the future is as important as a reward on the next step
    GAMMA = [0.3, 0.9] # [0.9, 0.95]
    
    """ Training Parameters """
    # Number of episodes to collect experience
    MAX_EPISODES = 1000
    # Dictonary for logging
    LOG_DICT = {
        # List of rewards per episode
        'train_reward_list': [],
        # Avg reward per episode, with the last value multiplied per 10 if the 
        # goal is reached
        'train_reward_mul': [],
        # Total reward per episode
        'train_reward': [],
        # Number of steps per episode
        'train_steps': [],
        # Average reward per episode
        'train_avg_reward': [],
        # Average Actor loss per episode
        'a_loss': [],
        # Average Critic loss per episode
        'v_loss': [],
        # set max number of training episodes
        'train_episodes': MAX_EPISODES,
    }
    
    """Evaluation Parameters"""
    # ! Change the following parameters according to the hyperparameters to test
    STEPS_EVAL = 100
    LR_EVAL = LR[0]
    GAMMA_EVAL = GAMMA[0]
    LAMBDA_EVAL = LAMBDA[0]
    ALPHA_EVAL = ALPHA[0]
    # Algorithms to evaluate
    ALGS_EVAL = ["Roam",  "Random", "Degree", "Agent"]
    # Metrics for each algorithm
    METRICS_EVAL = ["goal", "nmi", "time", "steps"]
    
    """Graph Generation Parameters"""
    # ! Change the following parameters to modify the graph
    # Number of nodes
    N_NODE = 250
    # Power law exponent for the degree distribution of the created graph.
    TAU1 = 2
    # Power law exponent for the community size distribution in the created graph.
    TAU2 = 1.1
    # Fraction of inter-community edges incident to each node.
    MU = 0.1

    # Desired average degree of nodes in the created graph.
    AVERAGE_DEGREE = int(0.05 * N_NODE)  # 20
    # Minimum degree of nodes in the created graph
    MIN_DEGREE = None  # 30
    # Maximum degree of nodes in the created graph
    MAX_DEGREE = int(0.19 * N_NODE)

    # Minimum size of communities in the graph.
    MIN_COMMUNITY = int(0.05 * N_NODE)
    # Maximum size of communities in the graph.
    MAX_COMMUNITY = int(0.2 * N_NODE)

    # Maximum number of iterations to try to create the community sizes, degree distribution, and community affiliations.
    MAX_ITERS = 5000
    # Seed for the random number generator.
    SEED = 10


class Utils:
    """Class to store utility functions"""

    @staticmethod
    def import_mtx_graph(file_path: str) -> nx.Graph:
        """
        Import a graph from a .mtx file

        Parameters
        ----------
        file_path : str
            File path of the .mtx file

        Returns
        -------
        nx.Graph
            Graph imported from the .mtx file
        """
        # try:
        # Check if the graph file is in the .mtx format or .gml
        if file_path.endswith(".mtx"):
            graph_matrix = scipy.io.mmread(file_path)
            graph = nx.Graph(graph_matrix)
        elif file_path.endswith(".gml"):
            graph = nx.read_gml(file_path, label='id')
        else:
            raise ValueError("File format not supported")

        for node in graph.nodes:
            # graph.nodes[node]['name'] = node
            graph.nodes[node]['num_neighbors'] = len(
                list(graph.neighbors(node)))
        return graph
        # except Exception as exception:
        #     print("Error: ", exception)
        #     return None
    
    @staticmethod
    def generate_lfr_benchmark_graph(
        n: int=HyperParams.N_NODE.value,
        tau1: float=HyperParams.TAU1.value,
        tau2: float=HyperParams.TAU2.value,
        mu: float=HyperParams.MU.value,   
        average_degree: int = HyperParams.AVERAGE_DEGREE.value,
        min_degree: int=HyperParams.MIN_DEGREE.value,
        max_degree: int=HyperParams.MAX_DEGREE.value,
        min_community: int=HyperParams.MIN_COMMUNITY.value,
        max_community: int=HyperParams.MAX_COMMUNITY.value,
        max_iters: int=HyperParams.MAX_ITERS.value,
        seed: int=HyperParams.SEED.value)->Tuple[nx.Graph, str]:
        """
        Generate a LFR benchmark graph for community detection algorithms.

        Parameters
        ----------
        n : int, optional
            Number of nodes, by default 500
        tau1 : float, optional
            _description_, by default 3
        tau2 : float, optional
            _description_
        mu : float, optional
            Mixing parameter, by default 0.1
        average_degree : int, optional
            Average degree of the nodes, by default 20
        min_degree : int, optional
            Minimum degree of the nodes, by default 20
        max_degree : int, optional
            Maximum degree of the nodes, by default 50
        min_community : int, optional
            Minimum number of communities, by default 10
        max_community : int, optional
            Maximum number of communities, by default 50
        max_iters : int, optional
            Maximum number of iterations, by default 5000
        seed : int, optional
            Seed for the random number generator, by default 10

        Returns
        -------
        nx.Graph
            Synthetic graph generated with the LFR benchmark
        file_path : str
            Path to the file where the graph is saved
        """
        graph = nx.generators.community.LFR_benchmark_graph(
            n=n,
            tau1=tau1,
            tau2=tau2,
            mu=mu,
            average_degree=average_degree,
            min_degree=min_degree,
            max_degree=max_degree,
            min_community=min_community,
            max_community=max_community,
            max_iters=max_iters,
            seed=seed)
        # Save the graph in a .mtx file
        # file_path = FilePaths.DATASETS_DIR.value + f"/lfr_benchmark_node-{n}"
        # ! FOR KAGGLE NOTEBOOK
        file_path = f"/kaggle/working/lfr_benchmark_node-{n}.mtx"
        # ! NOT WORKING: Write .gml file
        # nx.write_gml(graph, f"{file_path}.gml")
        # Write .mtx file
        nx.write_edgelist(graph, f"{file_path}.mtx", data=False)
        
        # Delete community attribute from the nodes to handle PyG compatibility
        for node in graph.nodes:
            if 'community' in graph.nodes[node]:
                del graph.nodes[node]['community']
        for edge in graph.edges:
            graph.edges[edge]['weight'] = 1
        return graph, file_path
        
    @staticmethod
    def check_dir(path: str):
        """
        Check if the directory exists, if not create it.

        Parameters
        ----------
        path : str
            Path to the directory
        """
        if not os.path.exists(path):
            os.makedirs(path)
    
    @staticmethod
    def plot_training(
        log: dict, 
        env_name: str, 
        detection_algorithm: str,
        file_path: str,
        window_size: int=int(HyperParams.MAX_EPISODES.value/100)):
        """Plot the training results

        Parameters
        ----------
        log : dict
            Dictionary containing the training logs
        env_name : str
            Name of the environment
        detection_algorithm : str
            Name of the detection algorithm
        file_path : str
            Path to save the plot
        window_size : int, optional
            Size of the rolling window, by default 100
        """
        def plot_seaborn(
                df: pd.DataFrame,
                path: str,
                env_name: str,
                detection_algorithm: str,
                labels: Tuple[str, str],
                colors: Tuple[str, str]) -> None:
            sns.set_style("darkgrid")
            sns.lineplot(data=df, x="Episode", y=labels[0], color=colors[0])
            sns.lineplot(data=df, x="Episode", y=labels[1], color=colors[1],
                        estimator="mean", errorbar=None)
            plt.title(
                f"Training on {env_name} graph with {detection_algorithm} algorithm")
            plt.xlabel("Episode")
            plt.ylabel(labels[0])
            plt.savefig(path)
            plt.clf()
        
        if window_size < 1:
            window_size = 1
        df = pd.DataFrame({
            "Episode": range(len(log["train_avg_reward"])),
            "Avg Reward": log["train_avg_reward"],
            "Steps per Epoch": log["train_steps"],
            "Goal Reward": log["train_reward_mul"],
            "Goal Reached": [1/log["train_steps"][i] if log["train_reward_list"][i][-1]
                > 1 else 0 for i in range(len(log["train_steps"]))],
        })
        df["Rolling_Avg_Reward"] = df["Avg Reward"].rolling(window_size).mean()
        df["Rolling_Steps"] = df["Steps per Epoch"].rolling(window_size).mean()
        df["Rolling_Goal_Reward"] = df["Goal Reward"].rolling(window_size).mean()
        df["Rolling_Goal_Reached"] = df["Goal Reached"].rolling(window_size).mean()
        plot_seaborn(
            df,
            file_path+"/training_reward.png",
            env_name,
            detection_algorithm,
            ("Avg Reward", "Rolling_Avg_Reward"),
            ("lightsteelblue", "darkblue"),
        )
        plot_seaborn(
            df,
            file_path+"/training_steps.png",
            env_name,
            detection_algorithm,
            ("Steps per Epoch", "Rolling_Steps"),
            ("thistle", "purple"),
        )
        plot_seaborn(
            df,
            file_path+"/training_goal_reward.png",
            env_name,
            detection_algorithm,
            ("Goal Reward", "Rolling_Goal_Reward"),
            ("darkgray", "black"),
        )
        plot_seaborn(
            df,
            file_path+"/training_goal_reached.png",
            env_name,
            detection_algorithm,
            ("Goal Reached", "Rolling_Goal_Reached"),
            ("darkgray", "black"),
        )

        df = pd.DataFrame({
            "Episode": range(len(log["a_loss"])),
            "Actor Loss": log["a_loss"],
            "Critic Loss": log["v_loss"],
        })
        df["Rolling_Actor_Loss"] = df["Actor Loss"].rolling(window_size).mean()
        df["Rolling_Critic_Loss"] = df["Critic Loss"].rolling(window_size).mean()
        plot_seaborn(
            df,
            file_path+"/training_a_loss.png",
            env_name,
            detection_algorithm,
            ("Actor Loss", "Rolling_Actor_Loss"),
            ("palegreen", "darkgreen"),
        )
        plot_seaborn(
            df,
            file_path+"/training_v_loss.png",
            env_name,
            detection_algorithm,
            ("Critic Loss", "Rolling_Critic_Loss"),
            ("lightcoral", "darkred"),
        )

        
    ############################################################################
    #                               EVALUATION                                 #
    ############################################################################   
    @staticmethod
    def save_test(
        log: dict, 
        files_path: str, 
        log_name: str, 
        algs: List[str],
        metrics: List[str]):
        """Save and Plot the testing results

        Parameters
        ----------
        log : dict
            Dictionary containing the training logs
        files_path : str
            Path to save the plot
        log_name : str
            Name of the log file
        algs : List[str]
            List of algorithms names to evaluate
        metrics : List[str]
            List of metrics to evaluate
        """
        file_name = f"{files_path}/{log_name}.json"
        # Save json file
        with open(file_name, "w", encoding="utf-8") as f:
            json.dump(log, f, indent=4)
            
        for metric in metrics:
            # Create a DataFrame with the mean values of each algorithm for the metric
            df = pd.DataFrame({
                "Algorithm": algs,
                metric.capitalize(): [mean(log[alg][metric]) for alg in algs]
            })
            
            # Convert the goal column to percentage
            if metric == "goal":
                df[metric.capitalize()] = df[metric.capitalize()] * 100

            sns.barplot(data=df,
                        x="Algorithm",
                        y=metric.capitalize(),
                        palette=sns.color_palette("Set1"))
            plt.title(f"Evaluation on {log['env']['dataset']} graph with {log['env']['detection_alg']} algorithm")
            plt.xlabel("Algorithm")
            if metric == "goal":
                plt.ylabel(f"{metric.capitalize()} reached %")
            elif metric == "time":
                plt.ylabel(f"{metric.capitalize()} (s)")
            else:
                plt.ylabel(metric.capitalize())
            plt.savefig(f"{files_path}/{log_name}_{metric}.png")
            plt.clf()

In [48]:
# Create paths
Utils.check_dir(FilePaths.LOG_DIR.value)
Utils.check_dir(FilePaths.TEST_DIR.value)

## Community Algorithms

### Community Detection

In [49]:
class CommunityDetectionAlgorithm(object):
    """Class for the community detection algorithms using CDLIB"""
    def __init__(self, alg_name: str) -> None:
        """
        Initialize the DetectionAlgorithm object

        Parameters
        ----------
        alg_name : str
            The name of the algorithm
        """
        self.alg_name = alg_name

    def compute_community(self, graph: nx.Graph) -> cdlib.NodeClustering:
        """Compute the community partition of the graph

        Parameters
        ----------
        graph : nx.Graph
            Input graph

        Returns
        -------
        cdlib.NodeClustering
            Cdlib NodeClustering object
        """
        # Rename DetectionAlgorithms Enum to da for convenience
        da = DetectionAlgorithmsNames
        # Choose the algorithm
        if self.alg_name == da.LOUV.value:
            return algorithms.louvain(graph)
        elif self.alg_name == da.WALK.value:
            return algorithms.walktrap(graph)
        elif self.alg_name == da.GRE.value:
            return algorithms.greedy_modularity(graph)
        elif self.alg_name == da.INF.value:
            return algorithms.infomap(graph)
        # elif self.alg_name == da.LAB.value:
        #    # ! Return a EdgeClustering object
        #    return algorithms.label_propagation(graph)
        elif self.alg_name == da.EIG.value:
            return algorithms.eigenvector(graph)
        # elif self.alg_name == da.BTW.value:
        #     return self.compute_btw(graph, args)
        elif self.alg_name == da.SPIN.value:
            return algorithms.spinglass(graph)
        # elif self.alg_name == da.OPT.value:
        #    return self.compute_opt(graph, args)
        # elif self.alg_name == da.SCD.value:
        #    return self.compute_scd(graph)
        else:
            raise ValueError('Invalid algorithm name')

### Community Deception Baselines

#### Random Hiding

In [50]:

class RandomHiding():
    
    def __init__(
        self, 
        env, 
        steps: int, 
        target_community: List[int]):
        self.env = env
        self.graph = self.env.original_graph
        self.steps = steps
        self.target_node = self.env.node_target
        self.target_community = target_community
        self.detection_alg = self.env.detection
        self.original_community_structure = self.env.original_community_structure
        self.possible_edges = self.get_possible_action() 
        
    def get_possible_action(self):
        # Put all edge between the target node and its neighbors in a list
        possible_actions_add = []
        for neighbor in self.graph.neighbors(self.target_node):
            possible_actions_add.append((self.target_node, neighbor))
        
        # Put all the edges that aren't neighbors of the target node in a list
        possible_actions_remove = []
        for node in self.graph.nodes():
            if node != self.target_node and node not in self.graph.neighbors(self.target_node):
                possible_actions_remove.append((self.target_node, node))
        possible_action = possible_actions_add + possible_actions_remove
        return possible_action
    
    def hide_target_node_from_community(self)->tuple:
        """
        Hide the target node from the target community by rewiring its edges, 
        choosing randomly between adding or removing an edge.
        
        Returns
        -------
        G_prime: nx.Graph
        """
        graph = self.graph.copy()
        done = False
        while self.steps > 0 and not done:
            # Random choose a edge from the possible edges
            edge = self.possible_edges.pop()
            if graph.has_edge(*edge):
                # Remove the edge
                graph.remove_edge(*edge)
            else:
                # Add the edge
                graph.add_edge(*edge)
            
            # Compute the new community structure
            communities = self.detection_alg.compute_community(graph)
            new_community = self.get_new_community(communities)

            check = self.check_goal(new_community)
            if check == 1:
                # If the target community is a subset of the new community, the episode is finished
                done = True
            self.steps -= 1
            
            self.steps -= 1
        return graph, communities

    def get_new_community(
                self,
                new_community_structure: List[List[int]]) -> List[int]:
        """
        Search the community target in the new community structure after 
        deception. As new community target after the action, we consider the 
        community that contains the target node, if this community satisfies 
        the deception constraint, the episode is finished, otherwise not.

        Parameters
        ----------
        node_target : int
            Target node to be hidden from the community
        new_community_structure : List[List[int]]
            New community structure after deception

        Returns
        -------
        List[int]
            New community target after deception
        """
        if new_community_structure is None:
            # The agent did not perform any rewiring, i.e. are the same communities
            return self.target_community
        for community in new_community_structure.communities:
            if self.target_node in community:
                return community
        raise ValueError("Community not found")

    def check_goal(self, new_community: int) -> int:
        """
        Check if the goal of hiding the target node was achieved

        Parameters
        ----------
        new_community : int
            New community of the target node

        Returns
        -------
        int
            1 if the goal was achieved, 0 otherwise
        """
        if len(new_community) == 1:
            return 1
        # Copy the communities to avoid modifying the original ones
        new_community_copy = new_community.copy()
        new_community_copy.remove(self.target_node)
        old_community_copy = self.target_community.copy()
        old_community_copy.remove(self.target_node)
        # Compute the similarity between the new and the old community
        similarity = self.env.community_similarity(
            new_community_copy,
            old_community_copy
        )
        del new_community_copy, old_community_copy
        if similarity <= self.env.tau:
            return 1
        return 0

#### Degree Hiding

In [51]:
class DegreeHiding():

    def __init__(
            self,
            env,
            steps: int,
            target_community: List[int]):
        self.env = env
        self.graph = self.env.original_graph
        self.steps = steps
        self.target_node = self.env.node_target
        self.target_community = target_community
        self.detection_alg = self.env.detection
        self.original_community_structure = self.env.original_community_structure
        self.possible_edges = self.get_possible_action()
        
    def get_possible_action(self):
        # Put all edge between the target node and its neighbors in a list
        possible_actions_add = []
        for neighbor in self.graph.neighbors(self.target_node):
            possible_actions_add.append((self.target_node, neighbor))

        # Put all the edges that aren't neighbors of the target node in a list
        possible_actions_remove = []
        for node in self.graph.nodes():
            if node != self.target_node and node not in self.graph.neighbors(self.target_node):
                possible_actions_remove.append((self.target_node, node))
        possible_action = possible_actions_add + possible_actions_remove
        return possible_action
    
    def hide_target_node_from_community(self) -> tuple:
        """
        Hide the target node from the target community by rewiring its edges, 
        choosing the node with the highest degree between adding or removing an edge.
        
        Returns
        -------
        G_prime: nx.Graph
        """
        graph = self.graph.copy()
        done = False
        # From the list possible_edges, create a list of tuples 
        # (node1, node2, degree_of_node2)
        possible_edges = []
        for edge in self.possible_edges:
                possible_edges.append(
                    (edge[0], edge[1], graph.degree(edge[1])))
        while self.steps > 0 and not done:
            # Choose the edge with the highest degree
            max_tuple = max(possible_edges, key=lambda x: x[2])
            possible_edges.remove(max_tuple)
            edge = (max_tuple[0], max_tuple[1])
            
            if graph.has_edge(*edge):
                # Remove the edge
                graph.remove_edge(*edge)
            else:
                # Add the edge
                graph.add_edge(*edge)

            # Compute the new community structure
            communities = self.detection_alg.compute_community(graph)
            new_community = self.get_new_community(communities)

            check = self.check_goal(new_community)
            if check == 1:
                # If the target community is a subset of the new community, the episode is finished
                done = True
            self.steps -= 1
        return graph, communities
    
    def get_new_community(
                self,
                new_community_structure: List[List[int]]) -> List[int]:
        """
        Search the community target in the new community structure after 
        deception. As new community target after the action, we consider the 
        community that contains the target node, if this community satisfies 
        the deception constraint, the episode is finished, otherwise not.

        Parameters
        ----------
        node_target : int
            Target node to be hidden from the community
        new_community_structure : List[List[int]]
            New community structure after deception

        Returns
        -------
        List[int]
            New community target after deception
        """
        if new_community_structure is None:
            # The agent did not perform any rewiring, i.e. are the same communities
            return self.target_community
        for community in new_community_structure.communities:
            if self.target_node in community:
                return community
        raise ValueError("Community not found")

    def check_goal(self, new_community: int) -> int:
        """
        Check if the goal of hiding the target node was achieved

        Parameters
        ----------
        new_community : int
            New community of the target node

        Returns
        -------
        int
            1 if the goal was achieved, 0 otherwise
        """
        if len(new_community) == 1:
            return 1
        # Copy the communities to avoid modifying the original ones
        new_community_copy = new_community.copy()
        new_community_copy.remove(self.target_node)
        old_community_copy = self.target_community.copy()
        old_community_copy.remove(self.target_node)
        # Compute the similarity between the new and the old community
        similarity = self.env.community_similarity(
            new_community_copy,
            old_community_copy
        )
        del new_community_copy, old_community_copy
        if similarity <= self.env.tau:
            return 1
        return 0


#### Roam Hiding

In [52]:
class RoamHiding():
    """Given a network and a source node v,our objective is to conceal the 
    importance of v by decreasing its centrality without compromising its
    influence over the network.
    
    From the article "Hiding Individuals and Communities in a Social Network".
    """
    def __init__(
        self, 
        graph: nx.Graph, 
        target_node: int, 
        edge_budget: int,
        detection_alg: str) -> None:
        self.graph = graph
        self.target_node = target_node
        self.edge_budget = edge_budget
        self.detection_alg = CommunityDetectionAlgorithm(detection_alg)
    
    def roam_heuristic(self, budget: int) -> tuple:
        """
        The ROAM heuristic given a budget b:
            - Step 1: Remove the link between the source node, v, and its 
            neighbour of choice, v0;
            - Step 2: Connect v0 to b − 1 nodes of choice, who are neighbours 
            of v but not of v0 (if there are fewer than b − 1 such neighbours, 
            connect v0 to all of them).

        Returns
        -------
        graph : nx.Graph
            The graph after the ROAM heuristic.
        """
        
        # ° --- Step 1 --- ° #
        target_node_neighbours = list(self.graph.neighbors(self.target_node))
        if len(target_node_neighbours) == 0:
            print("No neighbours for the target node", self.target_node)
            return self.graph, self.detection_alg.compute_community(self.graph)
        
        # Choose v0 as the neighbour of target_node with the most connections
        v0 = target_node_neighbours[0]
        for v in target_node_neighbours:
            if self.graph.degree[v] > self.graph.degree[v0]:
                v0 = v
        # v0 = random.choice(target_node_neighbours)    # Random choice
        # Remove the edge between v and v0
        self.graph.remove_edge(self.target_node, v0)
        
        # ° --- Step 2 --- ° #
        # Get the neighbours of v0
        v0_neighbours = list(self.graph.neighbors(v0))
        # Get the neighbours of v, who are not neighbours of v0
        v_neighbours_not_v0 = [x for x in target_node_neighbours if x not in v0_neighbours]
        # If there are fewer than b-1 such neighbours, connect v_0 to all of them
        if len(v_neighbours_not_v0) < self.edge_budget-1:
            self.edge_budget = len(v_neighbours_not_v0) + 1
        # Make an ascending order list of the neighbours of v0, based on their degree
        sorted_neighbors = sorted(v_neighbours_not_v0, key=lambda x: self.graph.degree[x]) 
        # Connect v_0 to b-1 nodes of choice, who are neighbours of v but not of v_0
        for i in range(self.edge_budget-1):
            v0_neighbour = sorted_neighbors[i]
            # v0_neighbour = random.choice(v_neighbours_not_v0)   # Random choice
            self.graph.add_edge(v0, v0_neighbour)
            v_neighbours_not_v0.remove(v0_neighbour)
        
        new_community_structure = self.detection_alg.compute_community(self.graph)
        return self.graph, new_community_structure

### Community Deception Metrics & Algorithms

#### Deception Score

In [53]:

class DeceptionScore(object):
    """Deception score of a community detection algorithm."""

    def __init__(self, community_target: List[int]) -> None:
        self.community_target = community_target

    @staticmethod
    def recall(g_i: List[int], community_target: List[int]) -> float:
        """Calculate recall score of a community g_i

        Parameters
        ----------
        g_i : List[int]
            Community found by a community detection algorithm.

        Returns
        -------
        float
            Recall score of g_i.
        """
        # Number of members in g_i that are also in our community
        members_in_g_i = len(set(community_target) & set(g_i))
        return members_in_g_i / len(community_target)

    @staticmethod
    def precision(g_i: List[int], community_target: List[int]) -> float:
        """Calculate precision score of a community g_i

        Parameters
        ----------
        g_i : List[int]
            Community found by a community detection algorithm.

        Returns
        -------
        float
            Precision score of g_i.
        """
        # Number of members in G_i that are also in our community
        members_in_g_i = len(set(community_target) & set(g_i))
        return members_in_g_i / len(g_i)

    @DeprecationWarning
    def compute_deception_score(
            self,
            community_structure: List[List[int]],
            connected_components: int) -> float:
        """Calculate deception score of a community detection algorithm.

        Parameters
        ----------
        community_structure : List(List(int))
            Community structure found by a community detection algorithm.
        connected_components : int
            Number of connected components in the graph.
        
        Returns
        -------
        deception_score : float
            Deception score of a community detection algorithm.
        """
        # Number of intersecting nodes between the community structure and community target
        n_intersecting_nodes = [g_i for g_i in community_structure if len(
            set(self.community_target) & set(g_i)) > 0]
        
        recall = max([self.recall(g_i, self.community_target) for g_i in community_structure])
        precision = sum([self.precision(g_i, self.community_target) for g_i in n_intersecting_nodes])
        
        # Ideal situation occurs when each member of the community target is 
        # placed in a different community and the value of the maximum recall 
        # is lower possible.
        assert len(self.community_target) - 1 > 0, "Community target must have at least 2 members."
        community_spread = 1 - (connected_components - 1) / (len(self.community_target) - 1)
        
        # Ideal situation occurs when each member of the community structure 
        # contains little percentage of the community target.
        assert len(n_intersecting_nodes) > 0, "Community structure must have at least 1 member."
        community_hiding = 0.5 * (1 - recall) + 0.5 * (1 - precision / len(n_intersecting_nodes))
        
        # Deception score is the product of community spread and community hiding. 
        deception_score = community_spread * community_hiding
        return deception_score
    
    # TEST
    def get_deception_score(self, graph, community_structure: List[List[int]]):
        """
        New version of the deception score, based on the repository:
            - https://github.com/vfionda/BHC/tree/main

        Parameters
        ----------
        community_structure : List[List[int]]
            _description_

        Returns
        -------
        _type_
            _description_
        """
        number_communities = len(community_structure)
        
        # Number of the target community members in the various communities
        member_for_community = np.zeros(number_communities, dtype=int)
        
        for i in range(number_communities):
            for node in community_structure[i]:
                if node in self.community_target:
                    member_for_community[i] += 1
        
        # ratio of the targetCommunity members in the various communities
        ratio_community_members = [members_for_c/len(com) for (members_for_c, com) in zip(member_for_community, community_structure)]
        
        # In how many commmunities are the members of the target spread?
        spread_members = sum([1 if mc > 0 else 0 for mc in member_for_community])
        
        second_part = 1 / 2 * ((spread_members - 1) / number_communities) + \
            1/2 * (1 - sum(ratio_community_members) / spread_members)
        
        # induced subraph sonly on target community nodes
        num_components = nx.number_connected_components(
            graph.subgraph(self.community_target))
        first_part = 1 - ((num_components - 1) / (len(self.community_target) - 1))
        dec_score = first_part * second_part
        return dec_score

#### Safeness

In [54]:
class CommunitySimilarity():
    """Computes the safeness of a node in a community and the safeness of a community."""
    def __init__(self, graph: nx.Graph, community_target: List[int]):
        self.graph = graph
        self.community_target = community_target
        
        # self.node_target = node_target
        # Compute the number of nodes in a community C that are in the same connected component of u
        self.V_u_C = self.num_nodes_in_same_component()
        # Get the number of intra-community edges for u.
        self.E_u_C = self.get_intra_comminty_edges()
        # Get the number of inter-community edges for u.
        self.E_u_C_bar = self.get_inter_community_edges()
        
        
        # Dict of add ratio for each node in the community {node: add_ratio}
        self.node_minimum_add_ratio = self.get_node_minimum_add_ratio(self.community_target)
        # Dictionary of external nodes for each node in the community {node: first external_node founded}
        self.external_node_dict = self.find_external_node_dict(self.community_target)
        # Dict of (edge, eps_del) for each edge in the community {edge: eps_del}
        self.best_del_excl_bridges = self.get_best_del_excl_bridges(self.community_target)
        
    def community_hiding(self, community_target, edge_budget: int) -> nx.Graph:
        """
        Hide the target community using the safeness metric.

        Parameters
        ----------
        community_target : _type_
            Community to hide.
        edge_budget : int
            Budget of edges to use.

        Returns
        -------
        nx.Graph
            Graph with the target community hidden.
        """
        initial_budget = edge_budget
        while True:
            # n_p = self.get_node_minimum_add_ratio(community_target)

            n_p = min(self.node_minimum_add_ratio, key=self.node_minimum_add_ratio.get)
            
            # n_t = self.find_external_node(n_p, community_target)
            n_t = max(self.external_node_dict, key=self.external_node_dict.get)
            
            eps_add = self.get_addition_gain((n_p, n_t), community_target)
            
            # n_k, n_l = self.get_best_del_excl_bridges(community_target)
            if len(self.best_del_excl_bridges) < 1:
                n_k, n_l = (None, None)
            else:
                # n_k, n_l = max(self.best_del_excl_bridges,key=lambda x: x[1])[0]
                # Get the edge with the maximum eps_del
                n_k, n_l = max(self.best_del_excl_bridges, key=self.best_del_excl_bridges.get)
            
            
            if n_k == None and n_l == None:
                eps_del = -1
            else:
                eps_del = self.get_deletion_gain((n_k, n_l), community_target)
            
            
            if eps_add >= eps_del:
                self.graph.add_edge(n_p, n_t)
                
                # Update the node_minimum_add_ratio
                min_add_ratio = 0
                self.node_minimum_add_ratio = self.get_node_minimum_add_ratio(self.community_target)
                self.external_node_dict = self.find_external_node_dict(self.community_target)
                
            elif eps_del > 0:
                self.graph.remove_edge(n_k, n_l)
                # Update the best_del_excl_bridges
                self.best_del_excl_bridges = self.get_best_del_excl_bridges(self.community_target)
                
            edge_budget -= 1
            
            if edge_budget <= 0 or (eps_add <= 0 and eps_del <= 0):
                break
        # print("Initial budget: {}, final budget: {}".format(initial_budget, edge_budget))
        steps = initial_budget - edge_budget
        return self.graph, steps
    
    def get_node_minimum_add_ratio(self, community_target: List[int])->int:
        """
        Computes for each node n inside the target community, the fraction of
        n’s edges that point outside C.

        Parameters
        ----------
        community_target : List[int]
            Target community.

        Returns
        -------
        min_add_ratio : int
            Node with the minimum add ratio.
        """
        # List of Tuple of (node, min_add_ratio)
        # node_min_add_ratio = dict()
        # for n in community_target:
        #     min_add_ratio = 0
        #     for neighbor in self.graph.neighbors(n):
        #         if neighbor not in community_target:
        #             min_add_ratio += 1
        #     if self.graph.degree(n) > 0:
        #         min_add_ratio = min_add_ratio / self.graph.degree(n)
        #     else:
        #         min_add_ratio = 0
        #     node_min_add_ratio[n] = min_add_ratio
        # return node_min_add_ratio
        # Get the node with the minimum add ratio
        # min_add_ratio = min(node_min_add_ratio, key=lambda x: x[1])
        # return min_add_ratio[0]

        node_min_add_ratio = {}
        for node in community_target:
            neighbors = set(self.graph.neighbors(node))
            min_add_ratio = sum(
                1 if neighbor not in community_target else 0 for neighbor in neighbors)
            degree = self.graph.degree(node)
            min_add_ratio /= degree if degree > 0 else 1
            node_min_add_ratio[node] = min_add_ratio
        return node_min_add_ratio
        

    def find_external_node_dict(self, community_target: List[int]) -> dict:
        """
        Find a node n_t not in community_target, such that the edge (n_p, n_t)
        does not exist, and that maximize the addition gain.
        
        Parameters
        ----------
        n_p : int
            Node p.
        community_target : List[int]
            Target community.

        Returns
        -------
        external_node_dict : dict
            Dict of (node, external_node) for each node in the community
            with external_node that maximize the addition gain.
        """
        external_node_dict = dict()
        for n_p in community_target:
            # get neighbors of n_p
            n_t_external = self.find_external_node(n_p, community_target)
            # For each node n_t, in the list of external nodes, compute the 
            # addition gain, and save in the dict the node with the maximum
            # addition gain.
            if len(n_t_external) > 0:
                max_addition_gain = -1
                for n_t in n_t_external:
                    addition_gain = self.get_addition_gain(
                        (n_p, n_t), community_target)
                    if addition_gain > max_addition_gain:
                        max_addition_gain = addition_gain
                        external_node_dict[n_p] = n_t
                
        return external_node_dict
    
    def find_external_node(self, n_p: int, community_target: List[int]) -> list:
        """
        Find a node n_t not in community_target, such that the edge (n_p, n_t)
        does not exist.
        
        Parameters
        ----------
        n_p : int
            Node p.
        community_target : List[int]
            Target community.

        Returns
        -------
        n_t_external : list
            List of nodes that are not in community_target and that are not
            neighbors of n_p.
        """
        # get neighbors of n_p
        neighbors_p = self.graph.neighbors(n_p)
        n_t_external = list()
        for n_t in self.graph.nodes():
            if n_t not in community_target and n_t not in neighbors_p:
                n_t_external.append(n_t)
        return n_t_external
    
    def get_best_del_excl_bridges(self, community_target: List[int]) -> dict:
        """
        It works in two phases:
            1. It excludes bridge edges that, if deleted, could disconnect 
                target community.
            2. For each remeining edge, it computes the value specified in 
                theorem 8.

        Parameters
        ----------
        community_target : 
            Community target.

        Returns
        -------
        dict
            Dict of (edge, eps_del) for each edge in the community {edge: eps_del}
        """
        graph = self.graph.copy()
        # Delete all bridge edges that, if deleted, could disconnect target community.
        for node in community_target:
            for neighbor in self.graph.neighbors(node):
                if neighbor not in community_target and self.is_bridge((node, neighbor)):
                    graph.remove_edge(node, neighbor)
        # List of Tuple of (edge, eps_del)
        esp_del_list = dict()
        for node in community_target:
            # Get the neighbors of the node
            for neighbor in self.graph.neighbors(node):
                if neighbor not in community_target:
                    esp_del_list[(node, neighbor)] = self.get_deletion_gain(
                        (node, neighbor), community_target)
        return esp_del_list
    
    @DeprecationWarning
    def get_best_del_excl_bridges_tuple(self, community_target: List[int]) -> Tuple[int, int]:
        """
        It works in two phases:
            1. It excludes bridge edges that, if deleted, could disconnect 
                target community.
            2. For each remeining edge, it computes the value specified in 
                theorem 8.

        Parameters
        ----------
        community_target : 
            Community target.

        Returns
        -------
        Tuple[int, int]
            Edge with the maximum eps_del.
        """
        graph = self.graph.copy()

        # Delete all bridge edges that, if deleted, could disconnect target community.
        for node in community_target:
            for neighbor in self.graph.neighbors(node):
                if neighbor not in community_target and self.is_bridge((node, neighbor)):
                    graph.remove_edge(node, neighbor)

        # List of Tuple of (edge, eps_del)
        esp_del_list = dict()
        for node in community_target:
            # Get the neighbors of the node
            for neighbor in self.graph.neighbors(node):
                if neighbor not in community_target:
                    esp_del_list[(node, neighbor)] = self.get_deletion_gain(
                        (node, neighbor), community_target)

        # Get the edge with the maximum eps_del
        if len(esp_del_list) < 1:
            return None, None
        n_k, n_l = max(self.best_del_excl_bridges, key=self.best_del_excl_bridges.get)
        return n_k, n_l 
    
    def is_bridge(self, edge: Tuple[int, int]) -> bool:
        """
        Check if the edge (node, neighbor) is a bridge, i.e. if we remove it 
        the graph will be disconnected.

        Parameters
        ----------
        edge : Tuple[int, int]
            Edge to check.

        Returns
        -------
        bool
            True if the edge is a bridge, False otherwise.
        """
        graph = self.graph.copy()
        graph.remove_edge(*edge)
        return not nx.is_connected(graph)
    
    def get_addition_gain(self, edge: Tuple[int, int], community_target: List[int])->float:
        """
        Computes the addition gain of adding an edge.

        Parameters
        ----------
        edge : Tuple[int, int]
            Edge to add.
        community_target : List[int]
            Community target.

        Returns
        -------
        float
            Addition gain.
        """
        graph = self.graph.copy()
        # Compute the safeness before and after adding the edge.
        safeness_before = self.compute_community_safeness(
            graph, community_target)
        graph.add_edge(*edge)
        safeness_after = self.compute_community_safeness(
            graph, community_target)
        return safeness_after - safeness_before
    
    def get_deletion_gain(self, edge: Tuple[int, int], community_target: List[int])->float:
        """
        Computes the deletion gain of deleting an edge.

        Parameters
        ----------
        edge : Tuple[int, int]
            Edge to delete.
        community_target : List[int]
            Community target.

        Returns
        -------
        float
            Delete gain.
        """
        graph = self.graph.copy()
        # Compute the safeness before and after adding the edge.
        safeness_before = self.compute_community_safeness(
            graph, community_target)
        graph.remove_edge(*edge)
        safeness_after = self.compute_community_safeness(
            graph, community_target)
        return safeness_after - safeness_before
    
    
    def compute_community_safeness(self, graph, community_target: List[int]) -> float:
        """
        Computes the community safeness of the community.
        
        Parameters
        ----------
        community_taget: List[int]
            The community that we want to compute the safeness.

        Returns
        -------
        float
            The community safeness.
        """
        community_safeness = 0
        for node in community_target:
            community_safeness += self.compute_node_safeness(
                graph, community_target, node)
        return community_safeness / len(community_target)
    
    def compute_node_safeness(
        self, 
        graph: nx.Graph, 
        community_target: List[int], 
        node: int) -> float:
        """
        Computes the node safeness of the node in the community.
        
        Parameters
        ----------
        graph: nx.Graph
            The graph.
        community_target: List[int]
            The community of the node that we want to compute the safeness.
        
        node: int
            The node that we want to compute the safeness.

        Returns
        -------
        sigma_u_C: float
            The node safeness.
        """
        # Get the degree of u.
        deg_u = graph.degree(node)

        # Compute the node safeness.
        # assert len(community_target) > 1, "The community must have at least 2 nodes."
        # assert deg_u > 0, "The node must have at least 1 edge."
        
        if len(community_target) <= 1:
            argument_1 = 0
        else:
            argument_1 = ((self.V_u_C[node] - 1) / (len(community_target) - 1))
            
        if deg_u < 1:
            argument_2 = 0
        else:
            argument_2 = len(self.E_u_C) / deg_u
        
        sigma_u_C = 0.5*argument_1 + 0.5*argument_2
        return sigma_u_C
    
    # TEST, check if it si correct
    def num_nodes_in_same_component(self):
        """
        Computes the number of nodes in a community C that are in the same 
        connected component of u.

        Returns
        -------
        _type_
            _description_
        """
        V_u_C = dict()
        # Create a subgraph induced by the nodes in community C
        subgraph = self.graph.subgraph(self.community_target)

        # Compute the connected components of the subgraph
        components = list(nx.connected_components(subgraph))
        # Find the component that contains node u
        for u in self.community_target:
            V_u_C[u] = 0
            for component in components:
                if u in component:
                    # Return the number of nodes in the component
                    V_u_C[u] = len(component)
        return V_u_C

    def get_intra_comminty_edges(self) -> List[int]:
        """
        Get the intra-community edges of the community.

        Returns
        -------
        intra_community_edges: Set[int]
            The intra-community edges of the community.
        """
        intra_community_edges = list()
        for u in self.community_target:
            for v in self.community_target:
                if u != v and self.graph.has_edge(u, v) and (v, u) not in intra_community_edges:
                    intra_community_edges.append((u, v))
        return intra_community_edges

    def get_inter_community_edges(self) -> List[int]:
        """
        Get the inter-community edges of the community.

        Returns
        -------
        inter_community_edges: Set[int]
            The inter-community edges of the community.
        """
        inter_community_edges = list()
        for u in self.community_target:
            for v in self.graph.neighbors(u):
                if v not in self.community_target:
                    inter_community_edges.append((u, v))
        return inter_community_edges

## Similarity Metrics

In [55]:
class CommunitySimilarity():
    """Class to compute the similarity between two lists of integers"""
    def __init__(self, function_name: str) -> None:
        self.function_name = function_name

    def select_similarity_function(self) -> Callable:
        """
        Select the similarity function to use

        Returns
        -------
        Callable
            Similarity function to use
        """
        if self.function_name == SimilarityFunctionsNames.JAC.value:
            return self.jaccard_similarity
        elif self.function_name == SimilarityFunctionsNames.OVE.value:
            return self.overlap_similarity
        elif self.function_name == SimilarityFunctionsNames.SOR.value:
            return self.sorensen_similarity
        else:
            raise Exception("Similarity function not found")

    @staticmethod
    def jaccard_similarity(a: List[int], b: List[int]) -> float:
        """
        Compute the Jaccard similarity between two lists, A and B:
            J(A,B) = |A ∩ B| / |A U B|

        Parameters
        ----------
        a : List[int]
            First List
        b : List[int]
            Second List

        Returns
        -------
        float
            Jaccard similarity between the two lists, between 0 and 1
        """
        assert len(a) > 0 and len(b) > 0, "Lists must be not empty"
        # Convert lists to sets
        a_set = set(a)
        b_set = set(b)
        # Compute the intersection and union
        intersection = a_set.intersection(b_set)
        union = a_set.union(b_set)
        return len(intersection) / len(union)

    @staticmethod
    def overlap_similarity(a: List[int], b: List[int]) -> float:
        """
        Compute the Overlap similarity between two lists, A and B:
            O(A,B) = |A ∩ B| / min(|A|, |B|)

        Parameters
        ----------
        a : List[int]
            First List
        b : List[int]
            Fist List

        Returns
        -------
        float
            Overlap coefficient between the two lists, value between 0 and 1
        """
        assert len(a) > 0 and len(b) > 0, "Lists must be not empty"
        # Convert lists to sets
        a_set = set(a)
        b_set = set(b)
        # Compute the intersection
        intersection = a_set.intersection(b_set)
        return len(intersection) / min(len(a_set), len(b_set))

    @staticmethod
    def sorensen_similarity(a: List[int], b: List[int]) -> float:
        """
        Compute the Sorensen similarity between two lists, A and B:
            S(A,B) = 2 * |A ∩ B| / (|A| + |B|)

        Parameters
        ----------
        a : List[int]
            First List
        b : List[int]
            Second List

        Returns
        -------
        float
            Sorensen similarity between the two lists, between 0 and 1
        """
        assert len(a) > 0 and len(b) > 0, "Lists must be not empty"
        # Convert lists to sets
        a_set = set(a)
        b_set = set(b)
        # Compute the intersection
        intersection = a_set.intersection(b_set)
        return 2 * len(intersection) / (len(a_set) + len(b_set))


class GraphSimilarity():
    """Class to compute the similarity between two graphs"""
    def __init__(self, function_name: str) -> None:
        """
        Initialize the GraphSimilarity class

        Parameters
        ----------
        function_name : str
            Name of the similarity function to use
        """
        self.function_name = function_name

    def select_similarity_function(self) -> Callable:
        """
        Select the similarity function to use

        Returns
        -------
        Callable
            Similarity function to use
        """
        if self.function_name == SimilarityFunctionsNames.GED.value:
            return self.graph_edit_distance
        elif self.function_name == SimilarityFunctionsNames.JAC_1.value:
            return self.jaccard_similarity_1
        elif self.function_name == SimilarityFunctionsNames.JAC_2.value:
            return self.jaccard_similarity_2
        else:
            raise Exception("Similarity function not found")

    def graph_edit_distance(self, g: nx.Graph, h: nx.Graph) -> float:
        """
        Compute the graph edit distance between two graphs, then normalize it
        using a null graph:
            GED(G1,G2)/[GED(G1,G0) + GED(G2,G0)]  with G0 = null graph

        Parameters
        ----------
        g : nx.Graph
            First graph
        h : nx.Graph
            Second graph

        Returns
        -------
        graph_distance : float
            Graph edit distance between the two graphs normalized
        """
        # Slow, but precise
        # graph_distance = nx.graph_edit_distance(self.graph, self.old_graph)

        # Faster approximation of the graph edit distance
        graph_distance = next(nx.optimize_graph_edit_distance(g, h))
        # Normalize
        g_dist_1 = next(nx.optimize_graph_edit_distance(g, nx.null_graph()))
        g_dist_2 = next(nx.optimize_graph_edit_distance(h, nx.null_graph()))
        graph_distance /= (g_dist_1 + g_dist_2)
        return graph_distance

    def jaccard_similarity_1(self, g: nx.Graph, h: nx.Graph) -> float:
        """
        Compute the Jaccard Similarity between two graphs
        J(G, H) = (∑_{i,j} |A_{ij}^G - A_{i,j}^H|) / (∑_{i,j} max(A_{i,j)^G, A_{i,j}^H))

        Parameters
        ----------
        g : nx.Graph
            First graph
        h : nx.Graph
            Second graph

        Returns
        -------
        jaccard_sim : float
            Jaccard Similarity between the two graphs, between 0 and 1,
            where 0 means the two graphs are identical and 1 means they are
            completely different
        """
        # Get adjacency matrices
        g_matrix = nx.to_numpy_array(g)
        h_matrix = nx.to_numpy_array(h)
        # Ensure G and H have the same shape
        if g_matrix.shape != h_matrix.shape:
            raise ValueError("Input matrices must have the same shape.")
        # Calculate the numerator (sum of absolute differences)
        numerator = np.sum(np.abs(g_matrix - h_matrix))
        # Calculate the denominator (sum of element-wise maximum values)
        denominator = np.sum(np.maximum(g_matrix, h_matrix))
        # Calculate the Jaccard similarity
        jaccard_sim = numerator / denominator
        return jaccard_sim

    def jaccard_similarity_2(self, g: nx.Graph, h: nx.Graph) -> float:
        """
        Compute the Jaccard Similarity between two graphs, second version

        Parameters
        ----------
        g : nx.Graph
            First graph
        h : nx.Graph
            Second graph

        Returns
        -------
        float
            jaccard similarity between the two graphs
        """
        g = g.edges()
        h = h.edges()
        i = set(g).intersection(h)
        j = round(len(i) / (len(g) + len(h) - len(i)), 3)
        # Normalize to have 0 if the graphs are identical and 1 if they are
        # completely different
        return 1-j


## Enviroment

In [56]:

class GraphEnvironment(object):
    """Enviroment where the agent will act, it will be a graph with a community"""

    def __init__(
        self,
        graph_path: str = HyperParams.GRAPH_NAME.value,
        community_detection_algorithm: str = HyperParams.DETECTION_ALG_NAME.value,
        beta: float = HyperParams.BETA.value,
        tau: float = HyperParams.TAU.value,
        community_similarity_function: str = SimilarityFunctionsNames.SOR.value,
        graph_similarity_function: str = SimilarityFunctionsNames.JAC_1.value,
    ) -> None:
        """Constructor for Graph Environment
        Parameters
        ----------
        graph_path : str, optional
            Path of the graph to load, by default HyperParams.GRAPH_NAME.value
        community_detection_algorithm : str
            Name of the community detection algorithm to use
        beta : float, optional
            Hyperparameter for the edge budget, value between 0 and 100
        tau : float, optional
            Strength of the deception constraint, value between 0 and 1, with 1
            we have a soft constraint, hard constraint otherwise, by default
            HyperParams.T.value
        community_similarity_function : str, optional
            Name of the community similarity function to use, by default
            SimilarityFunctionsNames.SOR.value
        graph_similarity_function : str, optional
            Name of the graph similarity function to use, by default
            SimilarityFunctionsNames.JAC_1.value
        """
        random.seed(time.time())
        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        
        # ° ---- GRAPH ---- ° #
        self.env_name = None
        self.graph = None
        self.original_graph = None
        self.old_graph = None
        self.n_connected_components = None
        self.set_graph(graph_path)
        
        # ° ---- NODE FEATURES ---- ° #
        # Set the node features of the graph, using Node2Vec
        self.embedding_model = None
        self.embedding = None
        # self.set_node_features()

        # ° ---- HYPERPARAMETERS ---- ° #
        assert beta >= 0 and beta <= 100, "Beta must be between 0 and 100"
        assert tau >= 0 and tau <= 1, "T value must be between 0 and 1"
        # Percentage of edges to remove
        self.beta = beta
        self.tau = tau
        # Weights for the reward and the penalty
        self.lambda_metric = None  # lambda_metric
        self.alpha_metric = None  # alpha_metric

        # ° ---- SIMILARITY FUNCTIONS ---- ° #
        self.community_similarity = None
        self.graph_similarity = None
        self.set_similarity_funtions(community_similarity_function, graph_similarity_function)

        # ° ---- COMMUNITY DETECTION ---- ° #
        self.detection_alg = None
        self.detection = None
        self.old_penalty_value = None
        self.original_community_structure = None
        self.old_community_structure = None
        self.new_community_structure = None
        self.set_communities(community_detection_algorithm)

        # ° ---- COMMUNITY DECEPTION ---- ° #
        self.community_target = None
        self.node_target = None
        self.set_targets()

        # ° ---- REWIRING STEP ---- ° #
        self.edge_budget = 0
        self.used_edge_budget = 0
        self.max_steps = 0
        self.stop_episode = False
        self.rewards = 0
        self.old_rewards = 0
        self.possible_actions = None
        self.len_add_actions = None
        self.set_rewiring_budget()
        
        # ° ---- PRINT ENVIRONMENT INFO ---- ° #
        # Print the environment information
        self.print_env_info()


    ############################################################################
    #                       EPISODE RESET FUNCTIONS                            #
    ############################################################################
    def reset(self, graph_reset=True) -> nx.Graph:
        """
        Reset the environment
        
        Parameters
        ----------
        graph_reset : bool, optional
            Whether to reset the graph to the original state, by default True

        Returns
        -------
        self.graph : nx.Graph
            Graph state after the reset, i.e. the original graph
        """
        self.used_edge_budget = 0
        self.stop_episode = False
        self.rewards = 0
        self.old_rewards = 0
        if graph_reset:
            self.graph = self.original_graph.copy()
        self.old_graph = None
        self.old_penalty_value = 0
        self.old_community_structure = self.original_community_structure
        self.possible_actions = self.get_possible_actions()
        return self.graph

    def change_target_node(self, node_target: int = None) -> None:
        """
        Change the target node to remove from the community

        Parameters
        ----------
        node_target : int, optional
            Node to remove from the community, by default None
        """
        if node_target is None:
            # Choose a node randomly from the community
            old_node = self.node_target
            while self.node_target == old_node:
                random.seed(time.time())
                self.node_target = random.choice(self.community_target)
        else:
            self.node_target = node_target

    def change_target_community(
            self,
            community: List[int] = None,
            node_target: int = None) -> None:
        """
        Change the target community from which we want to hide the node

        Parameters
        ----------
        community : List[int]
            Community of node we want to remove from it
        node_target : int
            Node to remove from the community
        """
        if community is None:
            # Select randomly a new community target different from the last one
            old_community = self.community_target.copy()
            done = False
            while not done:
                random.seed(time.time())
                self.community_target = random.choice(
                    self.original_community_structure.communities)
                # Check condition on new community
                if (len(self.community_target) > 1 and \
                        self.community_target != old_community) or \
                            len(self.original_community_structure.communities) < 2:
                    done = True
            del old_community
        else:
            self.community_target = community
        # Change the target node to remove from the community
        self.change_target_node(node_target=node_target)

    ############################################################################
    #                      EPISODE STEP FUNCTIONS                              #
    ############################################################################
    def step(self, action: int) -> Tuple[nx.Graph, float, bool, bool]:
        """
        Step function for the environment

        Parameters
        ----------
        action : int
            Integer representing a node in the graph, it will be the destination
            node of the rewiring action (out source node is always the target node).

        Returns
        -------
        self.graph : nx.Graph
            Graph state after the action
        self.rewards : float
            Reward of the agent
        self.stop_episode : bool
            If the budget for the graph rewiring is exhausted, or the target
            node does not belong to the community anymore, the episode is finished
        done : bool
            Whether the episode is finished, if the target node does not belong
            to the community anymore, the episode is finished.
        """
        # ° ---- ACTION ---- ° #
        # Save the graph state before the action, used to compute the metrics
        self.old_graph = self.graph.copy()
        # Take action, add/remove the edge between target node and the model output
        budget_consumed = self.apply_action(action)
        # Set a negative reward if the action has not been applied
        if budget_consumed == 0:
            self.rewards = -1
            # The state is the same as before
            # return self.data_pyg, self.rewards, self.stop_episode
            return self.graph, self.rewards, self.stop_episode, False

        # ° ---- COMMUNITY DETECTION ---- ° #
        # Compute the community structure of the graph after the action
        self.new_community_structure = self.detection.compute_community(
            self.graph)

        # ° ---- REWARD ---- ° #
        self.rewards, done = self.get_reward()
        # If the target node does not belong to the community anymore,
        # the episode is finished
        if done:
            self.stop_episode = True

        # ° ---- BUDGET ---- ° #
        # Compute used budget
        self.used_edge_budget += budget_consumed
        # If the budget for the graph rewiring is exhausted, stop the episode
        if self.edge_budget - self.used_edge_budget < 1:
            self.stop_episode = True
            # If the budget is exhausted, and the target node still belongs to
            # the community, the reward is negative
            # if not done:
            #    self.rewards = -2

        self.old_community_structure = self.new_community_structure
        return self.graph, self.rewards, self.stop_episode, done

    def apply_action(self, action: int) -> int:
        """
        Applies the action to the graph, if there is an edge between the two
        nodes, it removes it, otherwise it adds it

        Parameters
        ----------
        action : int
            Integer representing a node in the graph, it will be the destination
            node of the rewiring action (out source node is always the target node).

        Returns
        -------
        budget_consumed : int
            Amount of budget consumed, 1 if the action has been applied, 0 otherwise
        """
        action = (self.node_target, action)
        # We need to take into account both the actions (u,v) and (v,u)
        action_reversed = (action[1], action[0])
        if action in self.possible_actions["ADD"]:
            self.graph.add_edge(*action, weight=1)
            self.possible_actions["ADD"].remove(action)
            return 1
        elif action_reversed in self.possible_actions["ADD"]:
            self.graph.add_edge(*action_reversed, weight=1)
            self.possible_actions["ADD"].remove(action_reversed)
            return 1
        elif action in self.possible_actions["REMOVE"]:
            self.graph.remove_edge(*action)
            self.possible_actions["REMOVE"].remove(action)
            return 1
        elif action_reversed in self.possible_actions["REMOVE"]:
            self.graph.remove_edge(*action_reversed)
            self.possible_actions["REMOVE"].remove(action_reversed)
            return 1
        return 0

    
    ############################################################################
    #                       SETTERS FUNCTIONS                                  #
    ############################################################################
    def set_graph(self, graph_path: str) -> None:
        """Set the graph of the environment"""
        # Load the graph from the dataset folder
        if graph_path is None:
            # Generate a synthetic graph
            graph, graph_path = Utils.generate_lfr_benchmark_graph()
        else:
            graph = Utils.import_mtx_graph(graph_path)

        self.env_name = graph_path.split("/")[-1].split(".")[0]
        self.graph = self.set_node_features(graph)
        
        # Save the original graph to restart the rewiring process at each episode
        self.original_graph = self.graph.copy()
        # Save the graph state before the action, used to compute the metrics
        self.old_graph = None
        # Get the Number of connected components
        self.n_connected_components = nx.number_connected_components(self.graph)
    
    def set_node_features(self, graph)-> None:
        """Set the node features of the graph, using Node2Vec"""
        print("*"*20, "Environment Information", "*"*20)
        print("* Graph Name:", self.env_name)
        print("*", graph)
        print("* * Compute Node Embedding using Node2Vec for nodes features")
        print("* * ...")
        # Build node features using Node2Vec, set the embedding dimension to 128.
        self.embedding_model = Node2Vec(dimensions=HyperParams.EMBEDDING_DIM.value)
        self.embedding_model.fit(graph)
        print("* * End Embedding Computation")
        self.embedding = self.embedding_model.get_embedding()
        # Add the embedding to the graph
        for node in graph.nodes():
            graph.nodes[node]["x"] = torch.tensor(self.embedding[node])
            # delete all the other features
        # Delete edges without "weight" attribute
        for edge in graph.edges():
            if "weight" not in graph.edges[edge]:
                graph.remove_edge(*edge)
        return graph
        
    def set_similarity_funtions(
        self, 
        community_similarity_function: str, 
        graph_similarity_function: str)-> None:
        """
        Set the similarity functions to use to compare the communities and
        the graphs
        """
        # Select the similarity function to use to compare the communities
        self.community_similarity = CommunitySimilarity(
            community_similarity_function).select_similarity_function()
        self.graph_similarity = GraphSimilarity(
            graph_similarity_function).select_similarity_function()
    
    def set_communities(self, community_detection_algorithm)-> None:
        """
        Set the community detection algorithm to use, and compute the community
        structure of the graph before the deception actions.
        """
        self.detection_alg = community_detection_algorithm
        # Community Algorithms objects
        self.detection = CommunityDetectionAlgorithm(community_detection_algorithm)
        # Metrics
        self.old_penalty_value = 0
        # Compute the community structure of the graph, before the action,
        # i.e. before the deception
        self.original_community_structure = self.detection.compute_community(self.graph)
        # ! It is a NodeClustering object
        self.old_community_structure = self.original_community_structure

    def set_targets(self) -> None:
        """
        Set the target community as the community with the highest number 
        of nodes, and the target node as a random node in the community
        """
        # Choose one of the communities found by the algorithm, as initial
        # community we choose the community with the highest number of nodes
        self.community_target = max(
            self.original_community_structure.communities, key=len)
        if len(self.community_target) <= 1:
            raise Exception("Community target must have at least two node.")
        # Choose a node randomly from the community, as initial node to remove
        self.node_target = random.choice(self.community_target)
    
    def set_rewiring_budget(self) -> None:
        """Set the rewiring budget for the graph, and the valid actions"""
        # Compute the action budget for the graph
        self.edge_budget = self.get_edge_budget()
        # Amount of budget used
        self.used_edge_budget = 0
        # Max Rewiring Steps during an episode, set a limit to avoid infinite
        # episodes in case the agent does not find the target node
        self.max_steps = self.edge_budget # * HyperParams.MAX_STEPS_MUL.value
        # Whether the budget for the graph rewiring is exhausted, or the target
        # node does not belong to the community anymore
        self.stop_episode = False
        self.rewards = 0
        # Reward of the previous step
        self.old_rewards = 0
        # Compute the set of possible actions
        self.possible_actions = self.get_possible_actions()
        # Length of the list of possible actions to add
        self.len_add_actions = len(self.possible_actions["ADD"])
        
    ############################################################################
    #                       GETTERS FUNCTIONS                                  #
    ############################################################################

    def get_edge_budget(self) -> int:
        """
        Computes the edge budget for each graph

        Returns
        -------
        int
            Edge budgets of the graph
        """
        # TEST: Three different ways to compute the edge budget
        
        # 1. Mean degree of the graph times the parameter beta
        return int(self.graph.number_of_edges() / self.graph.number_of_nodes() * self.beta)
        
        # 2. Percentage of edges of the whole graph
        # return int(math.ceil((self.graph.number_of_edges() * self.beta / 100)))
        
        # 3. Percentage of edges of the whole graph divided by the number of nodes in the community
        # return int(math.ceil((self.graph.number_of_edges() * self.beta / 100) / len(self.community_target)))

    def get_penalty(self) -> float:
        """
        Compute the metrics and return the penalty to subtract from the reward

        Returns
        -------
        penalty: float
            Penalty to subtract from the reward
        """
        # ° ---- COMMUNITY DISTANCE ---- ° #
        community_distance = self.new_community_structure.normalized_mutual_information(
            self.old_community_structure).score
        # In NMI 1 means that the two community structures are identical,
        # 0 means that they are completely different
        # We want to maximize the NMI, so we subtract it from 1
        community_distance = 1 - community_distance
        # ° ---- GRAPH DISTANCE ---- ° #
        graph_distance = self.graph_similarity(self.graph, self.old_graph)
        # ° ---- PENALTY ---- ° #
        assert self.alpha_metric is not None, "Alpha metric is None, must be set in grid search"
        penalty = self.alpha_metric * community_distance + \
            (1 - self.alpha_metric) * graph_distance
        # Subtract the metric value of the previous step
        penalty -= self.old_penalty_value
        # Update with the new values
        self.old_penalty_value = penalty
        return penalty

    def get_reward(self) -> Tuple[float, bool]:
        """
        Computes the reward for the agent, it is a 0-1 value function, if the
        target node still belongs to the community, the reward is 0 minus the
        penalty, otherwise the reward is 1 minus the penalty.

        As new community target after the action, we consider the community
        that contains the target node, if this community satisfies the deception
        constraint, the episode is finished, otherwise not.

        Returns
        -------
        reward : float
            Reward of the agent
        done : bool
            Whether the episode is finished, if the target node does not belong
            to the community anymore, the episode is finished
        """
        assert self.lambda_metric is not None, "Lambda metric is None, must be set in grid search"
        # Get the target community in the new community structure that
        # contains the target node
        for community in self.new_community_structure.communities:
            if self.node_target in community:
                new_community_target = community
                break
        assert new_community_target is not None, "New community target is None"
        # ° ---------- PENALTY ---------- ° #
        # Compute the metric to subtract from the reward
        penalty = self.get_penalty()
        # If the target node does not belong to the community anymore,
        # the episode is finished
        if len(new_community_target) == 1:
            reward = 1 - (self.lambda_metric * penalty)
            return reward, True
        # ° ---- COMMUNITY SIMILARITY ---- ° #
        # Remove target node from the communities, but first copy the lists
        # to avoid modifying them
        new_community_target_copy = new_community_target.copy()
        new_community_target_copy.remove(self.node_target)
        community_target_copy = self.community_target.copy()
        community_target_copy.remove(self.node_target)
        # Compute the similarity between the new communities
        community_similarity = self.community_similarity(
            new_community_target_copy,
            community_target_copy,
        )
        # Delete the copies
        del new_community_target_copy, community_target_copy
        # ° ---------- REWARD ---------- ° #
        if community_similarity <= self.tau:
            # We have reached the deception constraint, the episode is finished
            reward = 1 - (self.lambda_metric * penalty)
            return reward, True
        reward = 0 - (self.lambda_metric * penalty)
        return reward, False

    def get_possible_actions(self) -> dict:
        """
        Returns all the possible actions that can be applied to the graph
        given a source node (self.node_target). The possible actions are:
            - Add an edge between the source node and a node outside the community
            - Remove an edge between the source node and a node inside the community

        Returns
        -------
        self.possible_actions : dict
            Dictionary containing the possible actions that can be applied to
            the graph. The dictionary has two keys: "ADD" and "REMOVE", each
            key has a list of tuples as value, where each tuple is an action.
        """
        possible_actions = {"ADD": set(), "REMOVE": set()}
        # Helper functions to check if a node is in/out-side the community

        def in_community(node):
            return node in self.community_target

        def out_community(node):
            return node not in self.community_target

        u = self.node_target
        for v in self.graph.nodes():
            if u == v:
                continue
            # We can remove an edge iff both nodes are in the community
            if in_community(u) and in_community(v):
                if self.graph.has_edge(u, v):
                    if (v, u) not in possible_actions["REMOVE"]:
                        possible_actions["REMOVE"].add((u, v))
            # We can add an edge iff one node is in the community and the other is not
            elif (in_community(u) and out_community(v)) \
                    or (out_community(u) and in_community(v)):
                # Check if there is already an edge between the two nodes
                if not self.graph.has_edge(u, v):
                    if (v, u) not in possible_actions["ADD"]:
                        possible_actions["ADD"].add((u, v))
        return possible_actions
    
    ############################################################################
    #                           ENVIRONMENT INFO                               #
    ############################################################################
    def print_env_info(self) -> None:
        """Print the environment information"""
        print("* Community Detection Algorithm:", self.detection_alg)
        print("* Number of communities found:",
              len(self.original_community_structure.communities))
        # print("* Rewiring Budget:", self.edge_budget, "=", self.beta, "*", self.graph.number_of_edges(), "/ 100",)
        print("* Rewiring Budget: (n_edges/n_nodes)*BETA =",
              self.graph.number_of_edges(), "/",
              self.graph.number_of_nodes(), "*", self.beta, "=",
              int(self.graph.number_of_edges() / self.graph.number_of_nodes())*self.beta)
        print("* Weight of the Deception Constraint:", self.tau)
        print("*", "-"*58, "\n")


## Agent

In [57]:

class Agent:
    def __init__(
            self,
            env: GraphEnvironment,
            state_dim: int = HyperParams.EMBEDDING_DIM.value,
            hidden_size_1: int = HyperParams.HIDDEN_SIZE_1.value,
            hidden_size_2: int = HyperParams.HIDDEN_SIZE_2.value,
            lr: List[float] = HyperParams.LR.value,
            gamma: List[float] = HyperParams.GAMMA.value,
            lambda_metrics: List[float] = HyperParams.LAMBDA.value,
            alpha_metrics: List[float] = HyperParams.ALPHA.value,
            eps: float = HyperParams.EPS_CLIP.value,
            best_reward: float = HyperParams.BEST_REWARD.value):
        """
        Initialize the agent.

        Parameters
        ----------
        env : GraphEnvironment
            Environment to train the agent on
        state_dim : int
            Dimensions of the state, i.e. length of the feature vector
        hidden_size_1 : int
            First A2C hidden layer size
        hidden_size_2 : int
            Second A2C hidden layer size
        action_dim : int
            Dimensions of the action (it is set to 1, to return a tensor N*1)
        lr : List[float]
            List of Learning rate, each element of the list is a learning rate
        gamma : List[float]
            List of gamma parameter, each element of the list is a gamma
        lambda_metrics : List[float]
            List of lambda parameter, each element of the list is a lambda used
            to balance the reward and the penalty
        alpha_metrics : List[float]
            List of alpha parameter, each element of the list is a alpha used
            to balance the two penalties
        eps : List[float]
            Value for clipping the loss function, each element of the list is a
            clipping value
        best_reward : float, optional
            Best reward, by default 0.8
        """
        # ° ----- Environment ----- ° #
        self.env = env

        # ° ----- A2C ----- ° #
        self.state_dim = state_dim # self.env.graph.number_of_nodes()
        self.hidden_size_1 = hidden_size_1
        self.hidden_size_2 = hidden_size_2
        self.action_dim = self.env.graph.number_of_nodes()
        self.policy = ActorCritic(
            state_dim=self.state_dim,
            hidden_size_1=self.hidden_size_1,
            hidden_size_2=self.hidden_size_2,
            action_dim=self.action_dim,
            graph=self.env.graph
        )
        # Set device
        self.device = torch.device(
            'cuda:0' if torch.cuda.is_available() else 'cpu')
        # Move model to device
        self.policy.to(self.device)

        # ° ----- Hyperparameters ----- ° #
        # A2C hyperparameters
        self.lr_list = lr
        self.gamma_list = gamma
        self.eps = eps
        self.best_reward = best_reward
        # Environment hyperparameters
        self.lambda_metrics = lambda_metrics
        self.alpha_metrics = alpha_metrics
        # Hyperparameters to be set during grid search
        self.lr = None
        self.gamma = None
        self.alpha_metric = None
        self. optimizers = dict()

        # ° ----- Training ----- ° #
        # State, nx.Graph
        self.obs = None
        # Cumulative reward of the episode
        self.episode_reward = 0
        # Boolean variable to check if the episode is ended
        self.done = False
        # Boolean variable to check if the goal is reached
        self.goal = False
        # Number of steps in the episode
        self.step = 0
        # Tuple to store the values for each action
        self.SavedAction = namedtuple('SavedAction', ['log_prob', 'value'])
        self.saved_actions = []
        self.rewards = []
        # List of rewards for one episode
        self.episode_rewards = []
        # Initialize lists for logging, it contains: avg_reward, avg_steps per episode
        self.log_dict = HyperParams.LOG_DICT.value
        # Print agent info
        self.print_agent_info()

        # ° ----- Evaluation ----- ° #
        # List of actions performed during the evaluation
        self.action_list = {"ADD": [], "REMOVE": []}

    ############################################################################
    #                       PRE-TRAINING/TESTING                               #
    ############################################################################
    def reset_hyperparams(
            self,
            lr: float,
            gamma: float,
            lambda_metric: float,
            alpha_metric: float,
            test: bool = False) -> None:
        """
        Reset hyperparameters
        
        Parameters
        ----------
        lr : float
            Learning rate
        gamma : float
            Discount factor
        lambda_metric : float
            Lambda parameter used to balance the reward and the penalty
        alpha_metric : float
            Alpha parameter used to balance the two penalties
        test : bool, optional
            Print hyperparameters during training, by default False
        """
        # Set A2C hyperparameters
        self.lr = lr
        self.gamma = gamma
        # Set environment hyperparameters
        self.env.lambda_metric = lambda_metric
        self.env.alpha_metric = alpha_metric
        # Print hyperparameters if we are not testing
        if not test:
            self.print_hyperparams()
        # Clear logs, except for the training episodes
        for key in self.log_dict.keys():
            if key != 'train_episodes':
                self.log_dict[key] = list()
        # Clear action list
        self.saved_actions = []
        self.rewards = []
        self.episode_rewards = []
        # Clear state
        self.obs = None
        self.episode_reward = 0
        self.best_reward = HyperParams.BEST_REWARD.value
        self.done = False
        self.goal = False
        self.step = 0
        self.optimizers = dict()

    def configure_optimizers(self) -> None:
        """
        Configure optimizers
        
        Returns
        -------
        optimizers : dict
            Dictionary of optimizers
        """
        actor_params = list(self.policy.actor.parameters())
        critic_params = list(self.policy.critic.parameters())
        self.optimizers['a_optimizer'] = torch.optim.Adam(
            actor_params, lr=self.lr)
        self.optimizers['c_optimizer'] = torch.optim.Adam(
            critic_params, lr=self.lr)

    ############################################################################
    #                            GRID SEARCH                                   #
    ############################################################################
    def grid_search(self) -> None:
        """Perform grid search on the hyperparameters"""
        for lr in self.lr_list:
            for gamma in self.gamma_list:
                for lambda_metric in self.lambda_metrics:
                    for alpha_metric in self.alpha_metrics:
                        # Change Hyperparameters
                        self.reset_hyperparams(
                            lr, gamma, lambda_metric, alpha_metric)
                        # Configure optimizers with the current learning rate
                        self.configure_optimizers()
                        # Training
                        log = self.training()
                        # Save results in correct folder
                        self.save_plots(log, self.get_path())
                        # Free memory
                        gc.collect()

    ############################################################################
    #                               TRAINING                                   #
    ############################################################################
    def training(self) -> dict:
        """
        Train the agent on the environment, change the target node every 10
        episodes and the target community every 100 episodes. The episode ends
        when the target node is isolated from the target community, or when the
        maximum number of steps is reached.
            
        Returns
        -------
        log_dict : dict
            Dictionary containing the training logs
        """
        episode = self.log_dict['train_episodes']
        epochs = trange(episode)  # epoch iterator
        self.policy.train()  # set model in train mode
        for i_episode in epochs:
            # Print node_target and community_target
            # print("* Node target:", self.env.node_target)
            # print("* Community target:", self.env.community_target)
            # Reset environment, original graph, and new set of possible actions
            self.obs = self.env.reset()
            self.episode_reward = 0
            self.done = False
            self.goal = False
            self.episode_rewards = []
            self.step = 0
            
            # Rewiring the graph until the target node is isolated from the
            # target community
            while not self.done and self.step < self.env.max_steps:
                self.rewiring()
                
            # perform on-policy backpropagation
            self.a_loss, self.v_loss = self.training_step()
            # Checkpoint best performing model
            if self.episode_reward / self.step >= self.best_reward:
                self.save_checkpoint()
                self.best_reward = self.episode_reward

            # ° Log
            # Get the list of reward of the last self.step steps
            rewards = self.episode_rewards[-self.step:]
            # If the goal is reached, multiply the last reward by 10
            if self.goal:
                rewards[-1] *= 10
            self.log_dict['train_reward_list'].append(rewards)
            self.log_dict['train_reward_mul'].append(sum(rewards)/len(rewards))

            self.log_dict['train_reward'].append(self.episode_reward)
            self.log_dict['train_steps'].append(self.step)
            self.log_dict['train_avg_reward'].append(
                self.episode_reward/self.step)
            self.log_dict['a_loss'].append(self.a_loss)
            self.log_dict['v_loss'].append(self.v_loss)

            # Send current statistics to screen
            epochs.set_description(
                f"* Episode {i_episode+1} " +
                f"| Mul Reward: {sum(rewards)/len(rewards):.2f}"
                f"| Avg Reward: {self.episode_reward/self.step:.2f} " +
                f"| Steps: {self.step} " +
                f"| Actor Loss: {self.a_loss:.2f} " +
                f"| Critic Loss: {self.v_loss:.2f}")
            del rewards
        return self.log_dict

    def rewiring(self, test=False) -> None:
        """
        Rewiring step, select action and take step in environment.
        
        Parameters
        ----------
        test : bool, optional
            If True, print rewiring action, by default False
        """
        # Select action: return a list of the probabilities of each action
        action_rl = self.select_action(self.obs)
        torch.cuda.empty_cache()
        # Save rewiring action if we are testing
        if test:
            edge = (self.env.node_target, action_rl)
            if edge in self.env.possible_actions["ADD"]:
                if not self.env.graph.has_edge(*edge):
                    self.action_list["ADD"].append(edge)
            elif edge in self.env.possible_actions["REMOVE"]:
                if self.env.graph.has_edge(*edge):
                    self.action_list["REMOVE"].append(edge)
        
        # Take action in environment
        self.obs, reward, self.done, self.goal = self.env.step(action_rl)

        # Update ra_losseward
        self.episode_reward += reward
        # Store the transition in memory, used for the training step
        self.rewards.append(reward)
        # Used for logging
        self.episode_rewards.append(reward)
        self.step += 1
        # print("STEP", self.step, "  GOAL:", self.goal, "  DONE:", self.done, "  REWARD:", reward)

    def select_action(self, state: nx.Graph) -> int:
        """
        Select action, given a state, using the policy network.
        
        Parameters
        ----------
        state : nx.Graph
            Current state of the environment
        
        Returns
        -------
        action: int
            Integer representing a node in the graph, it will be the destination
            node of the rewiring action
        """
        concentration, value = self.policy(state)
        dist = torch.distributions.Categorical(concentration)
        action = dist.sample()
        self.saved_actions.append(
            self.SavedAction(dist.log_prob(action), value))
        return int(action.item())

    def training_step(self) -> Tuple[float, float]:
        """
        Perform a single training step of the A2C algorithm, which involves
        computing the actor and critic losses, taking gradient steps, and 
        resetting the rewards and action buffer.
        
        Returns
        -------
        mean_a_loss : float
            Mean actor loss
        mean_v_loss : float
            Mean critic loss
        """
        R = 0
        saved_actions = self.saved_actions
        policy_losses = []  # list to save actor (policy) loss
        value_losses = []  # list to save critic (value) loss
        returns = []  # list to save the true values
        # Compute the true value using rewards returned from the environment
        for r in self.rewards[::-1]:
            # calculate the discounted value
            R = r + self.gamma * R
            # insert to the beginning of the list
            returns.insert(0, R)
        # Normalize returns by subtracting mean and dividing by standard deviation
        # NOTE: May cause NaN problem
        if len(returns) > 1:
            returns = torch.tensor(returns)
            returns = (returns - returns.mean()) / (returns.std() + self.eps)
        else:
            returns = torch.tensor(returns)
        # Computing losses
        for (log_prob, value), R in zip(saved_actions, returns):
            # Difference between true value and estimated value from critic
            advantage = R - value.item()
            # calculate actor (policy) loss
            policy_losses.append(-log_prob * advantage)
            # calculate critic (value) loss using L1 smooth loss
            value_losses.append(F.smooth_l1_loss(
                value, torch.tensor([R]).to(self.device)))
        # take gradient steps
        self.optimizers['a_optimizer'].zero_grad()
        a_loss = torch.stack(policy_losses).sum()
        a_loss.backward()
        self.optimizers['a_optimizer'].step()
        self.optimizers['c_optimizer'].zero_grad()
        v_loss = torch.stack(value_losses).sum()
        v_loss.backward()
        self.optimizers['c_optimizer'].step()
        # Compute mean losses
        mean_a_loss = torch.stack(policy_losses).mean().item()
        mean_v_loss = torch.stack(value_losses).mean().item()
        # reset rewards and action buffer
        del self.rewards[:]
        del self.saved_actions[:]
        return mean_a_loss, mean_v_loss

    ############################################################################
    #                               TEST                                       #
    ############################################################################
    def test(
            self,
            lr: float,
            gamma: float,
            lambda_metric: float,
            alpha_metric: float,
            model_path: str,
            graph_reset=True) -> nx.Graph:
        """Hide a given node from a given community"""
        # Set hyperparameters to select the correct folder
        self.reset_hyperparams(lr, gamma, lambda_metric, alpha_metric, True)
        # Load best performing model
        self.load_checkpoint(path=model_path)
        # Set model in evaluation mode
        self.policy.eval()
        self.obs = self.env.reset(graph_reset)
        # Rewiring the graph until the target node is isolated from the
        # target community
        while not self.done and self.step < self.env.max_steps:
            self.rewiring(test=True)
        # if self.step >= self.env.max_steps:
        #     print("* !!!Maximum number of steps reached!!!")
        return self.obs

    ############################################################################
    #                            CHECKPOINTING                                 #
    ############################################################################
    def get_path(self) -> str:
        """
        Return the path of the folder where to save the plots and the logs
        
        Returns
        -------
        file_path : str
            Path to the correct folder
        """
        file_path = FilePaths.LOG_DIR.value + \
            f"{self.env.env_name}/{self.env.detection_alg}/" +\
            f"lr-{self.lr}/gamma-{self.gamma}/" +\
            f"lambda-{self.env.lambda_metric}/alpha-{self.env.alpha_metric}"
        return file_path

    def save_plots(self, log: dict, file_path: str) -> None:
        """
        Save training plots and logs

        Parameters
        ----------
        log : dict
            Dict containing the training logs
        file_path : str
            Path to the directory where to save the plots and the logs
        """
        Utils.check_dir(file_path)
        self.log(log)
        Utils.plot_training(
            log,
            self.env.env_name,
            self.env.detection_alg,
            file_path)

    def save_checkpoint(self):
        """Save checkpoint"""
        log_dir = self.get_path()
        # Check if the directory exists, otherwise create it
        Utils.check_dir(log_dir)
        checkpoint = dict()
        checkpoint['model'] = self.policy.state_dict()
        for key, value in self.optimizers.items():
            checkpoint[key] = value.state_dict()
        path = f'{log_dir}/model.pth'
        torch.save(checkpoint, path)

    def load_checkpoint(self, path=None):
        """Load checkpoint"""
        if path is None:
            log_dir = self.get_path()
            path = f'{log_dir}/model.pth'
        
        checkpoint = torch.load(path, map_location=self.device)
        self.policy.load_state_dict(checkpoint['model'])
        for key, _ in self.optimizers.items():
            self.optimizers[key].load_state_dict(checkpoint[key])

    def log(self, log_dict: dict):
        """Log data
        
        Parameters
        ----------
        log_dict : dict
            Dictionary containing the data to be logged
        """
        log_dir = self.get_path()
        Utils.check_dir(log_dir)
        file_name = f'{log_dir}/training_results.json'
        with open(file_name, "w", encoding="utf-8") as f:
            json.dump(log_dict, f, indent=4)

    ############################################################################
    #                   AGENT INFO AND PRINTING                                #
    ############################################################################
    def print_agent_info(self):
        # Print model architecture
        print("*", "-"*18, " Model Architecture ", "-"*18)
        # print("* Embedding dimension: ", self.state_dim)
        print("* Features vector size: ", self.state_dim)
        print("* A2C Hidden layer 1 size: ", self.hidden_size_1)
        print("* A2C Hidden layer 2 size: ", self.hidden_size_2)
        print("* Actor Action dimension: ", self.action_dim)
        print("*", "-"*58, "\n")
        # Print Hyperparameters List
        print("*", "-"*18, "Hyperparameters List", "-"*18)
        print("* Learning rate list: ", self.lr_list)
        print("* Gamma parameter list: ", self.gamma_list)
        print("* Lambda Metric list: ", self.lambda_metrics)
        print("* Alpha Metric list: ", self.alpha_metrics)
        print("*", "-"*58, "\n")

    def print_hyperparams(self):
        print("*", "-"*18, "Model Hyperparameters", "-"*18)
        print("* Learning rate: ", self.lr)
        print("* Gamma parameter: ", self.gamma)
        print("* Lambda Metric: ", self.env.lambda_metric)
        print("* Alpha Metric: ", self.env.alpha_metric)
        print("* Value for clipping the loss function: ", self.eps)


### A2C

In [58]:

class ActorCritic(nn.Module):
    """ActorCritic Network"""

    def __init__(
        self, 
        state_dim: int, 
        hidden_size_1: int, 
        hidden_size_2: int, 
        action_dim: int,
        graph: nx.Graph):
        super(ActorCritic, self).__init__()
        self.actor = ActorNetwork(
            state_dim=state_dim,
            hidden_size_1=hidden_size_1,
            hidden_size_2=hidden_size_2,
            action_dim=action_dim
        )
        self.critic = CriticNetwork(
            state_dim=state_dim,
            hidden_size_1=hidden_size_1,
            hidden_size_2=hidden_size_2
        )
        self.device = torch.device(
            'cuda:0' if torch.cuda.is_available() else 'cpu')
        

    def forward(self, graph: nx.Graph, jitter=1e-20) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Forward pass, computes action and value

        Parameters
        ----------
        graph : nx.Graph
            Graph state
        jitter : float, optional
            Jitter value, by default 1e-20

        Returns
        -------
        Tuple[torch.Tensor, torch.Tensor]
            Tuple of concentration and value
        """
        # Convert graph to torch_geometric.data.Data
        state = from_networkx(graph).to(self.device)

        # Actor
        probs = self.actor(state)        
        # Use softplus to ensure concentration is positive, then add jitter to 
        # ensure numerical stability
        concentration = F.softplus(probs).reshape(-1) + jitter

        # Critic
        value = self.critic(state)
        return concentration, value


#### Encoder

In [59]:
class GraphEncoder(nn.Module):
    def __init__(self, in_feature):
        super(GraphEncoder, self).__init__()

        self.in_feature = in_feature
        self.hidden_feature = 64
        self.out_feature = HyperParams.EMBEDDING_DIM.value


        self.conv1 = GCNConv(in_feature, self.hidden_feature)
        self.linear1 = nn.Linear(self.hidden_feature, self.out_feature)
        self.tanh = nn.Tanh()
        self.relu = torch.relu

    def forward(self,  Graph):
        x, edge_index, batch = Graph.x, Graph.edge_index, Graph.batch
        x = self.conv1(x, edge_index)
        x = self.relu(x)
        embedding = global_mean_pool(x,batch)
        embedding = self.linear1(embedding)
        embedding = self.tanh(embedding)
        return embedding

#### Actor

In [60]:

class ActorNetwork(nn.Module):
    """Actor Network"""

    def __init__(
            self,
            state_dim: int,
            hidden_size_1: int,
            hidden_size_2: int,
            action_dim: int):
        super(ActorNetwork, self).__init__()

        # self.graph_encoder = GraphEncoder(state_dim)
        self.conv1 = GCNConv(state_dim, hidden_size_1)

        self.lin1 = nn.Linear(hidden_size_1, hidden_size_1)
        self.lin2 = nn.Linear(hidden_size_1, hidden_size_2)
        # self.lin3 = nn.Linear(hidden_size_2, action_dim)
        self.lin3 = nn.Linear(hidden_size_2, 1)

        # self.relu = nn.LeakyReLU()
        self.relu = nn.ReLU()
        # self.tanh = nn.Tanh()

    def forward(self, data: torch.Tensor) -> torch.Tensor:
        out = F.relu(self.conv1(data.x, data.edge_index))
        # out = out + data.x
        x = F.relu(self.lin1(out))
        x = F.relu(self.lin2(x))
        x = self.lin3(x)
        return x
    
    """
    # Using GraphEncoder
    def forward(self, state: Data):
        embedding, _ = self.graph_encoder(state)
        embedding += state.x
        actions = self.relu(self.lin1(embedding))
        actions = self.relu(self.lin2(actions))
        actions = self.lin3(actions)
        return actions
    """


#### Critic

In [61]:

class CriticNetwork(nn.Module):
    def __init__(
            self,
            state_dim: int,
            hidden_size_1: int,
            hidden_size_2: int):
        super(CriticNetwork, self).__init__()

        # self.graph_encoder = GraphEncoder(state_dim)
        self.conv1 = GCNConv(state_dim, hidden_size_1)

        self.lin1 = nn.Linear(hidden_size_1, hidden_size_1)
        self.lin2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.lin3 = nn.Linear(hidden_size_2, 1)

        # self.relu = nn.LeakyReLU()
        self.relu = nn.ReLU()
        # self.relu = F.relu
        # self.tanh = nn.Tanh()

    def forward(self, data: torch.Tensor) -> torch.Tensor:
        out = F.relu(self.conv1(data.x, data.edge_index))
        # x = out + data.x
        # x = torch.sum(x, dim=0)
        x = torch.sum(out, dim=0)
        # x = self.relu(self.lin1(data))
        x = self.relu(self.lin1(x))
        x = self.relu(self.lin2(x))
        x = self.lin3(x)
        return x

    """
    # Using GraphEncoder
    def forward(self, state: Data):
        embedding, _ = self.graph_encoder(state)
        embedding += state.x
        embedding = torch.sum(embedding, dim=0)
        value = self.relu(self.lin1(embedding))
        value = self.relu(self.lin2(value))
        value = self.lin3(value)
        return value
    """


## Test

### Node Hiding

In [62]:

class NodeHiding():
    """
    Class to evaluate the performance of the agent on the Node Hiding task, and 
    compare it with the baseline algorithms:
        - Random Hiding: choose randomly the edges to remove/add
        - Degree Hiding: choose the edges to remove/add based on the degree 
        - Roam Heuristic: use roam heuristic
    """
    
    def __init__(
            self,
            agent: Agent,
            model_path: str,
            lr: float = HyperParams.LR_EVAL.value,
            gamma: float = HyperParams.GAMMA_EVAL.value,
            lambda_metric: float = HyperParams.LAMBDA_EVAL.value,
            alpha_metric: float = HyperParams.ALPHA_EVAL.value,
            eval_steps: int = HyperParams.STEPS_EVAL.value,) -> None:
        self.agent = agent
        self.original_graph = agent.env.original_graph.copy()
        self.model_path = model_path
        self.env_name = agent.env.env_name
        self.detection_alg = agent.env.detection_alg
        self.community_target = agent.env.community_target

        # Copy the community structure to avoid modifying the original one
        self.community_structure = copy.deepcopy(agent.env.original_community_structure)
        self.node_target = agent.env.node_target

        self.lr = lr
        self.gamma = gamma
        self.lambda_metric = lambda_metric
        self.alpha_metric = alpha_metric
        self.eval_steps = eval_steps
        
        self.beta = None
        self.tau = None
        self.edge_budget = None
        self.max_steps = None
        
        # HyperParams.ALGS_EVAL.value
        self.evaluation_algs = ["Agent", "Random", "Degree", "Roam"]

    def set_parameters(self, beta: int, tau: float) -> None:
        """Set the environment with the new parameters, for new experiments

        Parameters
        ----------
        beta : int
            Multiplicative factor for the number of edges to remove/add
        tau : float
            Constraint on the goal achievement
        """
        self.beta = beta
        self.tau = tau
        
        self.agent.env.beta = beta
        self.agent.env.tau = tau
        self.agent.env.set_rewiring_budget()
        
        self.edge_budget = self.agent.env.edge_budget
        self.max_steps = self.agent.env.max_steps
        
        # Initialize the log dictionary
        self.set_log_dict()
        
        self.path_to_save = FilePaths.TEST_DIR.value + \
            f"{self.env_name}/{self.detection_alg}/" + \
            f"tau_{self.tau}/" + \
            f"node_hiding/" + \
            f"beta_{self.beta}/" + \
            f"lr_{self.lr}/gamma_{self.gamma}/" + \
            f"lambda_{self.lambda_metric}/alpha_{self.alpha_metric}/"
            
    
    def reset_experiment(self) -> None:
        """
        Reset the environment and the agent at the beginning of each episode,
        and change the target community and node
        """
        self.agent.env.change_target_community()
        
        # Copy the community target to avoid modifying the original one
        self.community_target = copy.deepcopy(self.agent.env.community_target)
        self.node_target = self.agent.env.node_target
    
    ############################################################################
    #                               EVALUATION                                 #
    ############################################################################
    def run_experiment(self):
        """
        Function to run the evaluation of the agent on the Node Hiding task,
        and compare it with the baseline algorithms
        """
        # Start evaluation
        steps = trange(self.eval_steps, desc="Testing Episode")
        for step in steps:

            # Change the target community and node at each episode
            self.reset_experiment()
            # print("* Node Target:", self.node_target)
            # print("* Community Target:", self.community_target)

            # ° ------ Agent Rewiring ------ ° #
            steps.set_description(
                f"* * * Testing Episode {step+1} | Agent Rewiring")
            self.run_alg(self.run_agent)

            # ° ------   Baselines   ------ ° #
            # Random Rewiring
            steps.set_description(
                f"* * * Testing Episode {step+1} | Random Rewiring")
            self.run_alg(self.run_random)

            # Degree Rewiring
            steps.set_description(
                f"* * * Testing Episode {step+1} | Degree Rewiring")
            self.run_alg(self.run_degree)

            # Roam Rewiring
            steps.set_description(
                f"* * * Testing Episode {step+1} | Roam Rewiring")
            self.run_alg(self.run_roam)

        Utils.check_dir(self.path_to_save)
        Utils.save_test(
            log=self.log_dict,
            files_path=self.path_to_save,
            log_name="evaluation_node_hiding",
            algs=self.evaluation_algs,
            metrics=["nmi", "goal", "time", "steps"])
    
    def run_alg(self, function: Callable) -> None:
        """
        Wrapper function to run the evaluation of a generic algorithm

        Parameters
        ----------
        function : Callable
            Algorithm to evaluate
        """
        start = time.time()
        alg_name, new_graph, goal, nmi, step = function()
        end = time.time() - start
        # Save results in the log dictionary
        self.save_metrics(alg_name, goal, nmi, end, step)
        
    ############################################################################
    #                               AGENT                                      #
    ############################################################################
    def run_agent(self) -> Tuple[str, nx.Graph, int, float, int]:
        """
        Evaluate the agent on the Node Hiding task

        Returns
        -------
        Tuple[str, nx.Graph, int, float, int]
            Algorithm name, new graph, goal, nmi, steps
        """
        new_graph = self.agent.test(
            lr=self.lr,
            gamma=self.gamma,
            lambda_metric=self.lambda_metric,
            alpha_metric=self.alpha_metric,
            model_path=self.model_path,
        )
        # Compute NMI between the new community structure and the original one
        nmi = self.get_nmi(self.community_structure, self.agent.env.new_community_structure)
        # Check if the goal of hiding the target node was achieved
        community_target = self.get_new_community(self.agent.env.new_community_structure)
        goal = self.check_goal(community_target)
        return self.evaluation_algs[0], new_graph, goal, nmi, self.agent.step
    
    ############################################################################
    #                               BASELINES                                  #
    ############################################################################
    def run_random(self) -> Tuple[str, nx.Graph, int, float, int]:
        """
        Evaluate the Random Hiding algorithm on the Node Hiding task

        Returns
        -------
        Tuple[str, nx.Graph, int, float, int]
            Algorithm name, new graph, goal, nmi, steps
        """
        random_hiding = RandomHiding(
            env=self.agent.env,
            steps=self.edge_budget,
            target_community=self.community_target)
        rh_graph, rh_communities = random_hiding.hide_target_node_from_community()
        # Compute NMI between the new community structure and the original one
        nmi = self.get_nmi(self.community_structure, rh_communities)
        # Check if the goal of hiding the target node was achieved
        rh_community_target = self.get_new_community(rh_communities)
        goal = self.check_goal(rh_community_target)
        steps = self.edge_budget - random_hiding.steps
        return self.evaluation_algs[1], rh_graph, goal, nmi, steps
    
    def run_degree(self) -> Tuple[str, nx.Graph, int, float, int]:
        """
        Evaluate the Degree Hiding algorithm on the Node Hiding task

        Returns
        -------
        Tuple[str, nx.Graph, int, float, int]
            Algorithm name, new graph, goal, nmi, steps
        """
        degree_hiding = DegreeHiding(
            env=self.agent.env,
            steps=self.edge_budget,
            target_community=self.community_target)
        dh_graph, dh_communities = degree_hiding.hide_target_node_from_community()
        # Compute NMI between the new community structure and the original one
        nmi = self.get_nmi(self.community_structure, dh_communities)
        # Check if the goal of hiding the target node was achieved
        dh_community_target = self.get_new_community(dh_communities)
        goal = self.check_goal(dh_community_target)
        steps = self.edge_budget - degree_hiding.steps
        return self.evaluation_algs[2], dh_graph, goal, nmi, steps
    
    def run_roam(self) -> Tuple[str, nx.Graph, int, float, int]:
        """
        Evaluate the Roam Hiding algorithm on the Node Hiding task

        Returns
        -------
        Tuple[str, nx.Graph, int, float, int]
            Algorithm name, new graph, goal, nmi, steps
        """
        roam_hiding = RoamHiding(
            self.original_graph.copy(),
            self.node_target,
            self.edge_budget,
            self.detection_alg)
        ro_graph, ro_communities = roam_hiding.roam_heuristic(self.edge_budget)
        # Compute NMI between the new community structure and the original one
        nmi = self.get_nmi(self.community_structure, ro_communities)
        # Check if the goal of hiding the target node was achieved
        ro_community_target = self.get_new_community(ro_communities)
        goal = self.check_goal(ro_community_target)
        steps = self.edge_budget
        return self.evaluation_algs[3], ro_graph, goal, nmi, steps

    
    ############################################################################
    #                               UTILS                                      #
    ############################################################################
    def get_nmi(
        self,
        old_communities: cdlib.NodeClustering,
        new_communities: cdlib.NodeClustering) -> float:
        """
        Compute the Normalized Mutual Information between the old and the new
        community structure

        Parameters
        ----------
        old_communities : cdlib.NodeClustering
            Community structure before deception
        new_communities : cdlib.NodeClustering
            Community structure after deception

        Returns
        -------
        float
            Normalized Mutual Information between the old and the new community
        """
        if new_communities is None:
            # The agent did not perform any rewiring, i.e. are the same communities
            return 1
        return old_communities.normalized_mutual_information(new_communities).score
    
    def get_new_community(
        self,
        new_community_structure: List[List[int]]) -> List[int]:
        """
        Search the community target in the new community structure after 
        deception. As new community target after the action, we consider the 
        community that contains the target node, if this community satisfies 
        the deception constraint, the episode is finished, otherwise not.

        Parameters
        ----------
        node_target : int
            Target node to be hidden from the community
        new_community_structure : List[List[int]]
            New community structure after deception

        Returns
        -------
        List[int]
            New community target after deception
        """
        if new_community_structure is None:
            # The agent did not perform any rewiring, i.e. are the same communities
            return self.community_target
        for community in new_community_structure.communities:
            if self.node_target in community:
                return community
        raise ValueError("Community not found")

    def check_goal(self, new_community: int) -> int:
        """
        Check if the goal of hiding the target node was achieved

        Parameters
        ----------
        new_community : int
            New community of the target node

        Returns
        -------
        int
            1 if the goal was achieved, 0 otherwise
        """
        if len(new_community) == 1:
            return 1
        # Copy the communities to avoid modifying the original ones
        new_community_copy = new_community.copy()
        new_community_copy.remove(self.node_target)
        old_community_copy = self.community_target.copy()
        old_community_copy.remove(self.node_target)
        # Compute the similarity between the new and the old community
        similarity = self.agent.env.community_similarity(
            new_community_copy,
            old_community_copy
        )
        del new_community_copy, old_community_copy
        if similarity <= self.tau:
            return 1
        return 0
    
    ############################################################################
    #                               LOG                                        #
    ############################################################################
    def set_log_dict(self) -> None:
        self.log_dict = dict()

        for alg in self.evaluation_algs:
            self.log_dict[alg] = {
                "goal": [],
                "nmi": [],
                "time": [],
                "steps": [],
            }

        # Add environment parameters to the log dictionaryù
        self.log_dict["env"] = dict()
        self.log_dict["env"]["dataset"] = self.env_name
        self.log_dict["env"]["detection_alg"] = self.detection_alg
        self.log_dict["env"]["beta"] = self.beta
        self.log_dict["env"]["tau"] = self.tau
        self.log_dict["env"]["edge_budget"] = self.edge_budget
        self.log_dict["env"]["max_steps"] = self.max_steps

        # Add Agent Hyperparameters to the log dictionary
        self.log_dict["Agent"]["lr"] = self.lr
        self.log_dict["Agent"]["gamma"] = self.gamma
        self.log_dict["Agent"]["lambda_metric"] = self.lambda_metric
        self.log_dict["Agent"]["alpha_metric"] = self.alpha_metric

    def save_metrics(
            self,
            alg: str,
            goal: int,
            nmi: float,
            time: float,
            steps: int) -> dict:
        """Save the metrics of the algorithm in the log dictionary"""
        self.log_dict[alg]["goal"].append(goal)
        self.log_dict[alg]["nmi"].append(nmi)
        self.log_dict[alg]["time"].append(time)
        self.log_dict[alg]["steps"].append(steps)

### Community Hiding

In [63]:

class CommunityHiding():
    """
    Class to evaluate the performance of the agent in the community hiding task,
    where the agent has to hide a community from a detection algorithm.
    Futhermore, it is compared with other baselines algorithms:
        - Safeness Community Deception
    """

    def __init__(
            self,
            agent: Agent,
            model_path: str,
            lr: float = HyperParams.LR_EVAL.value,
            gamma: float = HyperParams.GAMMA_EVAL.value,
            lambda_metric: float = HyperParams.LAMBDA_EVAL.value,
            alpha_metric: float = HyperParams.ALPHA_EVAL.value,
            eval_steps: int = HyperParams.STEPS_EVAL.value,) -> None:
        self.agent = agent
        self.original_graph = agent.env.original_graph.copy()
        self.model_path = model_path
        self.env_name = agent.env.env_name
        self.detection_alg = agent.env.detection_alg
        self.community_target = agent.env.community_target

        # Copy the community structure to avoid modifying the original one
        self.community_structure = copy.deepcopy(
            agent.env.original_community_structure)
        # self.node_target = agent.env.node_target

        self.lr = lr
        self.gamma = gamma
        self.lambda_metric = lambda_metric
        self.alpha_metric = alpha_metric
        self.eval_steps = eval_steps

        self.beta = None
        self.tau = None
        self.edge_budget = None
        self.max_steps = None
        
        self.evaluation_algs = ["Agent", "Safeness"]

    def set_parameters(self, beta: int, tau: float) -> None:
        """Set the environment with the new parameters, for new experiments

        Parameters
        ----------
        beta : int
            In this case beta is the percentage of edges to remove or add
        tau : float
            Constraint on the goal achievement
        """
        self.beta = beta
        self.tau = tau

        self.agent.env.tau = tau
        # ! NOTE: It isn't the same beta as the one used in the Node Hiding task
        # self.agent.env.beta = beta
        # self.agent.env.set_rewiring_budget()

        # Budget for the whole community
        self.community_edge_budget = int(math.ceil(self.original_graph.number_of_edges() * \
            (self.beta/100)))
        # Budget for each single node
        self.node_edge_budget = int(math.ceil(self.community_edge_budget / len(
            self.community_target)))
        
        # We can't call the set_rewiring_budget function because we don't have
        # the beta value multiplier, and also we need to adapt to the Community
        # Hiding task, where the budget for the agent is set as the BETA percentage
        # of all the edges in the graph divided by the number of nodes in the
        # target community. So we set manually all the values of set_rewiring_budget
        # function.
        self.agent.env.edge_budget = self.node_edge_budget
        self.agent.env.max_steps = self.node_edge_budget
        self.agent.env.used_edge_budget = 0
        self.agent.env.stop_episode = False
        self.agent.env.reward = 0
        self.agent.env.old_rewards = 0
        self.agent.env.possible_actions = self.agent.env.get_possible_actions()
        self.agent.env.len_add_actions = len(self.agent.env.possible_actions["ADD"])

        # Initialize the log dictionary
        self.set_log_dict()

        self.path_to_save = FilePaths.TEST_DIR.value + \
            f"{self.env_name}/{self.detection_alg}/" + \
            f"tau_{self.tau}/" + \
            f"community_hiding/" + \
            f"beta_{self.beta}/" + \
            f"lr_{self.lr}/gamma_{self.gamma}/" + \
            f"lambda_{self.lambda_metric}/alpha_{self.alpha_metric}/"

    def reset_experiment(self) -> None:
        """
        Reset the environment and the agent at the beginning of each episode,
        and change the target community and node
        """
        self.agent.env.change_target_community()

        # Copy the community target to avoid modifying the original one
        self.community_target = copy.deepcopy(self.agent.env.community_target)
        # self.node_target = self.agent.env.node_target
        
        # Initialize the Deception Score algorithm
        self.deception_score_obj = DeceptionScore(
            copy.deepcopy(self.community_target))
        # Initialize the Safeness algorithm
        self.safeness_obj = Safeness(
            self.original_graph.copy(),
            copy.deepcopy(self.community_target),
        )
    
    def run_experiment(self)->None:
        # Start evaluation
        steps = trange(self.eval_steps, desc="Testing Episode")
        for step in steps:

            # Change the target community and node at each episode
            self.reset_experiment()
            # print("* Node Target:", self.node_target)
            # print("* Community Target:", self.community_target)
            
            # ° ------ Agent Rewiring ------ ° #
            steps.set_description(
                f"* * * Testing Episode {step+1} | Agent Rewiring")
            self.run_alg(self.run_agent)
            
            # ° --------- Baselines --------- ° #
            # Safeness
            steps.set_description(
                f"* * * Testing Episode {step+1} | Safeness")
            self.run_alg(self.run_safeness)
        Utils.check_dir(self.path_to_save)
        Utils.save_test(
            log=self.log_dict,
            files_path=self.path_to_save,
            log_name="evaluation_community_hiding",
            algs=self.evaluation_algs,
            metrics=["nmi", "goal", "deception_score", "time", "steps"])

    
    def run_alg(self, function: Callable) -> None:
        """
        Wrapper function to run the evaluation of a generic algorithm

        Parameters
        ----------
        function : Callable
            Algorithm to evaluate
        """
        start = time.time()
        alg_name, goal, nmi, deception_score, step = function()
        end = time.time() - start
        # Save results in the log dictionary
        self.save_metrics(alg_name, goal, nmi, deception_score, end, step)
    
    ############################################################################
    #                               AGENT                                      #
    ############################################################################
    def run_agent(self) -> Tuple[str, int, float, float, int]:
        """
        Evaluate the agent on the Node Hiding task

        Returns
        -------
        Tuple[str, nx.Graph, int, float, int]
            Algorithm name, goal, nmi, deception score, steps
        """
        tot_steps = 0
        agent_goal_reached = False
        for node in self.community_target:
            self.agent.env.node_target = node
            # The agent possible action are changed in the test function, which
            # calls the reset function of the environment
            new_graph = self.agent.test(
                lr=self.lr,
                gamma=self.gamma,
                lambda_metric=self.lambda_metric,
                alpha_metric=self.alpha_metric,
                model_path=self.model_path,
            )
            # print("Node {} - Steps: {}".format(node, agent.step))
            tot_steps += self.agent.step
            if tot_steps >= self.community_edge_budget:
                if self.community_target not in self.agent.env.new_community_structure.communities:
                    agent_goal_reached = True
                break
        # Compute Deception Score between the new community structure and the
        # original one
        deception_score = self.deception_score_obj.get_deception_score(
            self.agent.env.graph.copy(),
            copy.deepcopy(self.agent.env.new_community_structure.communities),
        )
        # Compute NMI between the new community structure and the original one
        nmi = self.get_nmi(self.community_structure, self.agent.env.new_community_structure)
        goal = 1 if agent_goal_reached else 0
        return self.evaluation_algs[0], goal, nmi, deception_score, tot_steps

    ############################################################################
    #                               BASELINES                                  #
    ############################################################################
    def run_safeness(self) -> Tuple[str, int, float, float, int]:
        """
        Evaluate the Safeness algorithm on the Node Hiding task

        Returns
        -------
        Tuple[str, nx.Graph, int, float, int]
            Algorithm name, goal, nmi, deception score, steps
        """
        new_graph, steps = self.safeness_obj.community_hiding(
            community_target=self.community_target,
            edge_budget=self.community_edge_budget,
        )
        # Compute the new community structure
        new_communities = self.agent.env.detection.compute_community(new_graph)
        
        # Compute Deception Score between the new community structure and the
        # original one
        deception_score = self.deception_score_obj.get_deception_score(
            self.original_graph.copy(),
            copy.deepcopy(new_communities.communities),
        )
        # Compute NMI between the new community structure and the original one
        nmi = self.get_nmi(self.community_structure, new_communities)
        goal = 1 if self.community_target not in new_communities.communities else 0
        return self.evaluation_algs[1], goal, nmi, deception_score, steps

    ############################################################################
    #                               UTILS                                      #
    ############################################################################
    def get_nmi(
            self,
            old_communities: cdlib.NodeClustering,
            new_communities: cdlib.NodeClustering) -> float:
        """
        Compute the Normalized Mutual Information between the old and the new
        community structure

        Parameters
        ----------
        old_communities : cdlib.NodeClustering
            Community structure before deception
        new_communities : cdlib.NodeClustering
            Community structure after deception

        Returns
        -------
        float
            Normalized Mutual Information between the old and the new community
        """
        if new_communities is None:
            # The agent did not perform any rewiring, i.e. are the same communities
            return 1
        return old_communities.normalized_mutual_information(new_communities).score

    ############################################################################
    #                               LOG                                        #
    ############################################################################
    def set_log_dict(self) -> None:
        self.log_dict = dict()

        for alg in self.evaluation_algs:
            self.log_dict[alg] = {
                "goal": [],
                "nmi": [],
                "time": [],
                "deception_score": [],
                "steps": [],
            }

        # Add environment parameters to the log dictionaryù
        self.log_dict["env"] = dict()
        self.log_dict["env"]["dataset"] = self.env_name
        self.log_dict["env"]["detection_alg"] = self.detection_alg
        self.log_dict["env"]["beta"] = self.beta
        self.log_dict["env"]["tau"] = self.tau
        self.log_dict["env"]["edge_budget"] = self.edge_budget
        self.log_dict["env"]["max_steps"] = self.max_steps

        # Add Agent Hyperparameters to the log dictionary
        self.log_dict["Agent"]["lr"] = self.lr
        self.log_dict["Agent"]["gamma"] = self.gamma
        self.log_dict["Agent"]["lambda_metric"] = self.lambda_metric
        self.log_dict["Agent"]["alpha_metric"] = self.alpha_metric

    def save_metrics(
            self,
            alg: str,
            goal: int,
            nmi: float,
            deception_score: float,
            time: float,
            steps: int) -> dict:
        """Save the metrics of the algorithm in the log dictionary"""
        self.log_dict[alg]["goal"].append(goal)
        self.log_dict[alg]["nmi"].append(nmi)
        self.log_dict[alg]["deception_score"].append(deception_score)
        self.log_dict[alg]["time"].append(time)
        self.log_dict[alg]["steps"].append(steps)


## Execution

In [64]:
# ° --- Environment Setup --- ° #
env = GraphEnvironment()

# ° ------  Agent Setup ----- ° #
agent = Agent(env=env)

******************** Environment Information ********************
* Graph Name: lfr_benchmark_node-250
* Graph with 250 nodes and 2103 edges
* * Compute Node Embedding using Node2Vec for nodes features
* * ...
* * End Embedding Computation
* Community Detection Algorithm: infomap
* Number of communities found: 11
* Rewiring Budget: (n_edges/n_nodes)*BETA = 2103 / 250 * 2 = 16
* Weight of the Deception Constraint: 0.5
* ---------------------------------------------------------- 

* ------------------  Model Architecture  ------------------
* Features vector size:  128
* A2C Hidden layer 1 size:  64
* A2C Hidden layer 2 size:  64
* Actor Action dimension:  250
* ---------------------------------------------------------- 

* ------------------ Hyperparameters List ------------------
* Learning rate list:  [1e-07, 0.0001, 0.1]
* Gamma parameter list:  [0.3, 0.9]
* Lambda Metric list:  [0.1, 1]
* Alpha Metric list:  [0.8]
* ---------------------------------------------------------- 



In [None]:
# ° ------ TRAIN ------ ° #
if TRAIN:
    # Training
    agent.grid_search()
    
    !zip -r logs.zip /kaggle/working/logs/
    FileLink(r'logs.zip')

* ------------------ Model Hyperparameters ------------------
* Learning rate:  1e-07
* Gamma parameter:  0.3
* Lambda Metric:  0.1
* Alpha Metric:  0.8
* Value for clipping the loss function:  1.1920928955078125e-07


* Episode 34 | Mul Reward: -0.13| Avg Reward: -0.13 | Steps: 16 | Actor Loss: 33.10 | Critic Loss: 5.50:   3%|▎         | 34/1000 [01:10<32:57,  2.05s/it]

In [None]:
# ° ------ TEST ------ ° #
if TEST:
    # To change the detection algorithm, or the dataset, on which the model
    # will be tested, please refer to the class HyperParams in the file
    # src/utils/utils.py, changing the values of the variables:
    # - GRAPH_NAME, for the dataset
    # - DETECTION_ALG, for the detection algorithm

    # To change the model path, please refer to the class FilePaths in the
    # file src/utils/utils.py
    model_path = FilePaths.TRAINED_MODEL.value

    # Tau defines the strength of the constraint on the goal achievement
    taus = [0.3, 0.5, 0.8]
    # BETAs defines the number of actions to perform
    # Beta for the community hiding task defines the percentage of rewiring 
    # action, add or remove edges
    community_betas = [10, 20, 30]
    # Beta for the node hiding task is a multiplier of mean degree of the
    # the graph
    node_betas = [1, 2, 3]  # [1,3,5]

    # Initialize the test class
    node_hiding = NodeHiding(agent=agent, model_path=model_path)
    community_hiding = CommunityHiding(agent=agent, model_path=model_path)

    for tau in taus:

        print("* Node Hiding with tau = {}".format(tau))
        for beta in node_betas:
            print("* * Beta = {}".format(beta))
            node_hiding.set_parameters(beta=beta, tau=tau)
            node_hiding.run_experiment()

        print("* Community Hiding with tau = {}".format(tau))
        for beta in community_betas:
            print("* * Beta = {}".format(beta))
            community_hiding.set_parameters(beta=beta, tau=tau)
            community_hiding.run_experiment()
        print("* "*50)
    
    !zip -r test.zip /kaggle/working/test/
    FileLink(r'test.zip')