# 第六章

在本次作业中，我们来实现一些简单的攻击方法。具体地，我们将对原邻接矩阵分别进行加边和减边操作。对于测试模型，我们使用`deeprobust.graph.defense.DeepWalk`。

In [1]:
# set up session:
import numpy as np
import scipy.sparse as sp

from deeprobust.graph.data import Dataset
from deeprobust.graph.defense import DeepWalk

device='cuda:0'

In [2]:
data = Dataset(root='./data', name='cora', setting='prognn')

adj, features, labels = data.adj, data.features, data.labels
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test

Loading cora dataset...
Selecting 1 largest connected components


In [3]:
# init model:
defender = DeepWalk()
# train:
defender.fit(adj)
# evaluate:
pred, micro_f1, macro_f1= defender.evaluate_node_classification(labels, idx_train, idx_test)

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see https://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.
[1m
File "../../../../opt/anaconda/envs/graph/lib/python3.7/site-packages/deeprobust/graph/defense/node_embedding.py", line 332:[0m
[1m@numba.jit(nopython=True, parallel=True)
[1mdef _random_walk(indptr, indices, walk_length, walks_per_node, seed):
[0m[1m^[0m[0m
[0m
  state.func_ir.loc))


Micro F1: 0.7972837022132797
Macro F1: 0.7743413494924823


下面我们来实现一个简单的攻击方式。

In [4]:
class SimpleAttack():
    """
    首先得到一定数量的边（称之为candidates），然后从候选边当中
    根据某种方式（random或者degree）来选择最后需要扰动的边。
        
    参数说明
    ----
    metric: random或者degree。random表示随机选择，degree表示选择degree更大的边。
    attack_type: add或者remove，分别表示加边和减边。
    """
    def __init__(self, metric, attack_type):
        self.metric = metric
        self.attack_type = attack_type
                 
    def attack(self, adj, n_perturbations=1000, n_candidates=10000, seed=0):
        if self.attack_type == 'add':
            # 加边的时候我们设置一下n_candidates来限制选择范围，不然可以选择的边实在是太多了。
            candidates = self.generate_candidates_addition(adj, n_candidates, seed) 
        elif self.attack_type == 'remove':
            candidates = self.generate_candidates_removal(adj)
        else:
            raise NotImplementedError

        if self.metric == "random":
            top_flips = self.random_top_flips(candidates, n_perturbations, seed)
        elif self.metric == "degree":
            top_flips = self.degree_top_flips(adj, candidates, n_perturbations)
        else:
            raise NotImplementedError 
        
        assert len(top_flips) == n_perturbations
        modified_adj = self.flip_candidates(adj, top_flips)
        self.modified_adj = modified_adj

    def generate_candidates_removal(self, adj):
        """产生减边的候选边：从当前的所有边中除开那些会产生孤立节点的边。
        """
        # first, build a minimum-spanning tree to identify all critical edges:
        mst = sp.csgraph.minimum_spanning_tree(adj)
        mst = mst.maximum(mst.T)
        
        # extract candidate edges:
        candidates = list(
            zip(*(adj - mst).nonzero())
        )
        
        return candidates
    
    def generate_candidates_addition(self, adj, n_candidates, seed=None):
        """产生可以被加边的候选边（也就是说，现在不是边)。
        """
        # set seed for numpy.random if provided:
        if seed is not None:
            np.random.seed(seed)

        # generate candidates:
        N = len(adj.indptr) - 1
        
        candidates = set()
        while len(candidates) < n_candidates:
            proposals = np.random.randint(0, N, [n_candidates, 2])
            
            proposals = set(
                list(map(tuple, proposals[adj[proposals[:, 0], proposals[:, 1]].A1 < 1.0]))
            )
            
            candidates = candidates.union(proposals)
        candidates = list(candidates)
        
        return candidates[:n_candidates]
    
    def random_top_flips(self, candidates, n_perturbations, seed=None):
        """从candidates中随机选择n_perturbations个候选边。
        """
        # set seed for numpy.random if provided:
        if seed is not None:
            np.random.seed(seed)
            
        return [
            candidates[i] for i in 
            np.random.permutation(len(candidates))[:n_perturbations]
        ]

    def degree_top_flips(self, adj, candidates, n_perturbations):
        """从candidates中随机选择n_perturbations个degree最大的候选边。
        这里，边的degree我们可以计算为它连接的节点的degree的相加。
        """
        # get node degrees:
        N = len(adj.indptr) - 1
        node_degree = {
            k: v for (k, v) in zip(range(N), np.diff(adj.indptr))
        }
        
        # get candidate edge degree
        edge_degree = [
            node_degree[u] + node_degree[v] for (u, v) in candidates
        ]
        
        return [
            candidates[i] for i in 
            # identify most influential edges through fast selection:
            np.argpartition(edge_degree, -n_perturbations)[-n_perturbations:]
        ]
    
    def flip_candidates(self, adj, candidates):
        """翻转候选边，0变成1，1变成0。

        返回值: sp.csr_matrix, shape [n_nodes, n_nodes]
           翻转后的邻接矩阵。
        """
        # num. of nodes:
        N = len(adj.indptr) - 1
        
        # generate pertubation:
        pertubation = sp.coo_matrix(
            (np.ones(len(candidates)), tuple(zip(*candidates))), 
            shape=(N, N)
        ).tocsr()
        
        # flip selected edges:
        adj_flipped = pertubation - adj
        
        # prune:
        adj_flipped.eliminate_zeros()
        
        # done:
        return adj_flipped

In [5]:
import itertools
comb = itertools.product(["random", "degree"], ["add", "remove"])

for metric, attack_type in comb:
    print(metric, attack_type)
    attacker = SimpleAttack(metric=metric, attack_type=attack_type)
    attacker.attack(adj, n_perturbations=1000)
    modified_adj = attacker.modified_adj
    defender = DeepWalk()
    defender.fit(modified_adj)
    _ = defender.evaluate_node_classification(labels, idx_train, idx_test)

random add
Micro F1: 0.7484909456740443
Macro F1: 0.7179212506057302
random remove
Micro F1: 0.7952716297786722
Macro F1: 0.7689443654931297
degree add
Micro F1: 0.772635814889336
Macro F1: 0.7468532574920008
degree remove
Micro F1: 0.7882293762575453
Macro F1: 0.754619675083964


比较之后我们会发现，加边比减边更具有破坏力。

**我们也可以直接调用DeepRobust来完成这个任务**：

In [6]:
from deeprobust.graph.global_attack import OtherNodeEmbeddingAttack
import itertools
comb = itertools.product(["random", "degree"], ["add", "remove"])

for type_, attack_type in comb:
    model = OtherNodeEmbeddingAttack(type=type_)
    print(type_, attack_type)
    model.attack(adj, attack_type=attack_type, n_candidates=10000)
    modified_adj = model.modified_adj
    defender = DeepWalk()
    defender.fit(modified_adj)
    defender.evaluate_node_classification(labels, idx_train, idx_test)

random add
Micro F1: 0.7112676056338029
Macro F1: 0.6835577036603301
random remove
Micro F1: 0.7469818913480886
Macro F1: 0.7260457807340718
degree add
Micro F1: 0.7137826961770624
Macro F1: 0.6827644139178616
degree remove
Micro F1: 0.7565392354124748
Macro F1: 0.72535397683836
