In [156]:
# This code is open source to USENIX Security submission only (now). 
# After the publication of the corresponding thesis, the code will be free to use, modify, and distribute.
# Copyright (c) [Anonymous authors] [2023], which will be public after the publication.
# Licensed under the MIT License, Version 2.0.
# We kindly hope that this code should only be used for academic perspectives, 
# Please do not abuse this code to launch any attacks. 

# This code is the CrossPoint controller.
# After you collect the bot-config files, you can run this file to reveal hidden links.
# Input: 1. bot-config files (JSON) should in the correct place.
#        2. Optionally, you can have csv files that represent the result of congestion.
#        3. Your budget.
# Output: 1. The control group.
#         2. The attack bots (using CC,SD or both.)

# CrossPoint attacks workflow: 

# For reasearhers who want to rebuild our experiments in your own environment:
    # 1. Run the bot_config in each bot, generating attack_flow JSON files. 
    # 2. Send these JSON files to the controller. 
    # 3. In the controller, run controller_CrossPoint to output the suspisous attack flow set. (This file)
    # 4. In the controller, find the control group and the attack flow set.
    # 5. Run bash_ping in control group bots and suspicious attack flow bots. 
    # 6. run conrtoller_CrossPoint to find profitable links.

# (Fast example) We also give a fast example with configed bots and congestion files:
    # 1. Run the main code in folder example.
    # It might take few minutes to run (the main part is analyzing the congestion). 

# The experiment can be done with simulations and experiments. 
# The only differences is in the step 2: The method how you transfer JSON bot config files. 

# Using this you can run the bash_ping command on a bot to get the csv ping files.
# You should add your destination address in the @ip_list variable.

import re
import os
import time
import json
import random
import logging
import itertools
import numpy as np
import pandas as pd
import geopy.distance
import networkx as nx
import multiprocessing
from datetime import datetime
import matplotlib.pyplot as plt 
from itertools import combinations
from networkx.exception import NetworkXError
from networkx.readwrite.json_graph import node_link
from networkx.algorithms.operators.unary import reverse


DEBUG = True

class atf: # attack_flow_class
    def __init__(self,src,dst,weight,delay,route,route_nh,route_ip,route_eq,nh_hd,similarity_v,similarity_p):
        self.src = src # Self IP address.
        self.dst = dst # dst IP address.
        self.weight = weight # Size of attack flows, in default it is 1.
        self.delay = delay # propagation delay.
        self.route = route # physical route of traceroute.
        # This attribute is used in experiment for debug and judge the success. 
        self.route_nh = route_nh # obfuscated route from nethide.
        self.route_ip = route_ip # physical route of traceroute IP, same as self.route
        # This attribute is used in experiment for debug and judge the success.
        self.route_eq = route_eq # obfuscated route from equalnet.
        self.nh_hd = nh_hd # The statistical disparities (SD) value.
        self.similarity_v = similarity_v # The statistical disparities value.
        self.similarity_p = similarity_p # physical SD, Used for debug and draw figures.
    
    def __str__(self):
        mystr = f"({self.src},{self.dst}),route={self.route}"
        return mystr
    
    def get_sd_ip(self):
        # Used as keys when sort.
        return self.similarity_v
    
    def get_sd_nh(self):
        # Used as keys when sort.
        return self.nh_hd
    
    def __lt__(self,other):
        # When sort, this function will be rewrite.
        return self.similarity_v < other.similarity_v
    
    def __eq__(self,other):
        if self.src == other.src and self.dst == other.dst:
            return True
        else:
            return False
        
    def to_json(self):
        return json.dumps(self.__dict__)
    
    @classmethod
    def from_json(cls, json_file):
        try:
            with open(json_file,'r') as f:
                data = json.load(f)
                datanew = json.loads(data)
               # print(datanew,type(datanew))
                f.close()
            return cls(**datanew)
        except FileNotFoundError:
            print(f"The json file {json_file} not exist.")
        return None
        
    
    def to_json_file(self, path):
        myjson = json.dumps(self.__dict__)
        myname = path+f'Bot_{self.src}_{self.dst}.json'
        with open(myname,'w') as f:
            json.dump(myjson,f)
            f.close()    

class edg: # class edge, for convinence evaluation of different atf_set
    def __init__(self,src,dst) -> None:
        self.src = src
        self.dst = dst
        self.atf_set = []
        self.fd = 0
        self.cp_num = 0
        self.cp_set = []
        self.real_ip = []
    
    def update_crosspoint_num(self,link_capacity):
        self.cp_num = link_capacity - self.fd

    def write_atf_set(self,atf_set):
        for atf in atf_set:
            self.atf_set.append(atf)
        self.fd = len(self.atf_set)
        return self.fd

    def __lt__(self,other):
        return self.fd < other.fd

    def __eq__(self,other):
        if other == None:
            return False
        l1 = (self.src == other.src and self.dst == other.dst)
        l2 = (self.src == other.dst and self.dst == other.src)
        return l1 or l2

    def __str__(self):
        mystr = f"({self.src},{self.dst}),flow_density = {self.fd},atf_set = "
        myatf_set = []
        for a in self.atf_set:
            myatf_set.append((a.src,a.dst))
        
        return mystr + str(myatf_set)
    

def raw_ping_to_csv(rawping_file,csv_file_name):
    # This function will read a rowping_file and output it to csv ping files.
    # Read the file of raw ping data. 
    # If you do not use our ping tool bash_ping, you will get a raw ping data
    # Then you can use this function to transfer the raw ping data to csv files.
    # '/home/hxb/CAIDA-dataset/myping_/ping_log_sh_5.18'
    with open(rawping_file, 'r') as f:
        data = f.readlines()
    # Extract the desired features
    timestamps = []
    icmp_seqs = []
    times = []
    for row in data:
        if 'bytes from' in row:
            timestamp = re.search(r"\[(.*?)\]", row).group(1)
            icmp_seq = re.search(r"icmp_seq=(\d+)", row).group(1)
            time = re.search(r"time=(.*?) ms", row).group(1)
            timestamps.append(timestamp)
            icmp_seqs.append(icmp_seq)
            times.append(time)
    # Create a DataFrame
    df = pd.DataFrame({'time': timestamps, 'seq': icmp_seqs, 'delay': times})
    # Save to a new CSV file
    df.to_csv(csv_file_name+'.csv', index=False)
    return 

class congestion:
    def __init__(self,start_time,congestion_sample,length,coef_start_time,minrtt) -> None:
        self.length = length
        # Most importantly, the start_time must correct. 
        # Most importantly, the start_time must correct. 
        # It is the one of the main feature that influence the accuracy of CrossPoint attacks. 
        # (One of the authors wrote a bug here ... We spent almost one week to figure it out....)
        self.start_time = start_time
        self.congestion_sample = congestion_sample
        self.coef_sample = [minrtt]*10 + congestion_sample + [minrtt]*10
        self.coef_start_time = coef_start_time
        
    def update_bucket(self,value):
        self.congestion_sample = [value if x == -1 else x for x in self.congestion_sample]
            
    def update_length(self):
        # Not use in the latest version for CCS submission
        assert(len(self.delta_rtt) == len(self.drop))
        self.length = len(self.delta_rtt)
        mow = int(self.length / 10 + 1) 
        
    def get_coef_trace(self):
        # Not use in the latest version for CCS submission
        return self.coef_rtt_before + self.delta_rtt + self.coef_rtt_after 

    def update_coef_list(self,tr_list):
        # Not use in the latest version for CCS submission
        assert(self.length!=0)
        coef_before_t = max(self.start_time - self.length * 0.1, 0)
        coef_after_t = min(self.start_time + self.length * 0.2, len(tr_list.tr_list))
        idx_before = tr_list.get_trace_from_time(coef_before_t)
        idx_after = tr_list.get_trace_from_time(coef_after_t)
        c = 0
        while c < self.length:
            self.coef_rtt_before.append(tr_list.tr_list[idx_before].rtt)
            self.coef_rtt_after.append(tr_list.tr_list[idx_after].rtt)
            c +=1
            idx_before +=1
            idx_after +=1
        return 0

    def __str__(self) -> str:
        s = f'Congestion {self.start_time} inteval {self.length} rtt {self.congestion_sample} '
        return s

class correlated_congestion:
    
    def __init__(self,start_time,member_congestion):
        self.start_time = start_time
        self.member_congestion = member_congestion

    @classmethod
    def from_json(cls, json_str):
        json_dict = json.loads(json_str)
        return cls(**json_dict)

    def to_json(self):
        return json.dumps(self.__dict__)


def get_minRTT(trace):
    # The minimal rtt of a trace represents the propagation delay.
    delays = trace["delay"]
    return delays.min()

def get_maxRTT(trace):
    # The maximal rtt of a trace represents the egde value of RTT near a packet loss.
    # Therefore, we use the maximal value of rtt to change the lost dropped packet's rtt.
    delays = trace["delay"]
    return delays.max()

def get_local_max(trace,idx):
    # Local max delay parameter indicates the edge value of a packet loss. 
    delays = trace["delay"].iloc[idx-5:idx+5]
    return delays.max()

def get_loss(trace):
    seq = trace["seq"]
    c = 0
    for idx,s in enumerate(seq):
        if idx == 0:
            continue
        loss_idt = seq[idx] - seq[idx-1]
        #print(loss_idt)
        if loss_idt > 1 :
            c += 1
    return c
            
def get_abnRTT(trace):
    d = trace["delay"]
    minrtt = get_minRTT(trace)
    c = 0
    for idx,s in enumerate(d):
        if idx == 0:
            continue
        if s > minrtt + 30:
            c += 1
    return c    

def delay_update(trace,minrtt):
    trace.loc[abs(trace["delay"] - minrtt)< 10,'delay'] = minrtt
    trace.loc[trace["delay"] < minrtt,'delay'] = minrtt

def time_synchronize(trace,propagation_delay_to_target):
    # This is a synchonization step for updating the time stamp in the trace.
    # This step is optional, it need you know the propagation delay to the target.
    # So you need to send extra ping messages and get the minimal delay.
    # We reconmand you do that because in evaluations it shows a ~20% performance increases.
    # Note that the propagation delay and the min rtt are ms. therefore we * 1000
    def algin(row):
        return row - minrtt/2 * 0.001 - propagation_delay_to_target * 0.001
    
    trace["time"] = trace["time"].apply(algin)

def init_trace(trace):
    def create_time_stamp(start,length):
        return [start+0.1 + i*0.1 for i in range(length)]

    rtt = trace["delay"]
    minrtt = get_minRTT(trace)
    idx_loss = trace.index[trace["seq"].diff() > 1]
    #print(idx_loss)
    while not idx_loss.empty:
        idx = idx_loss[0]
        #print(idx,seq[idx],seq[idx-1])
        diff = trace['seq'][idx] - trace['seq'][idx -1] - 1
        insert_df = pd.DataFrame(
                                 {"seq":range(trace.loc[idx-1,'seq'] + 1, trace.loc[idx,'seq']), 
                                  'delay':[-1.0] * diff, 
                                 "time":create_time_stamp(trace.loc[idx-1,'time'],diff),
                                 "drop":1})
        trace = pd.concat([trace.iloc[:idx],insert_df,trace.iloc[idx:]]).reset_index(drop=True)
        #print(trace.iloc[270:280])
        idx_loss = trace.index[trace["seq"].diff() > 1]
        #print(idx_loss)
    indices = trace.loc[trace['delay'] == -1 ].index
    for i in indices:
        local_max = get_local_max(trace,i)
        trace.loc[i,'delay'] = local_max
    #print(trace.iloc[270:280])
    return trace

def init_congestion(trace_file_name):
    # Read the ping csv data and output the congestion() samples
    congestion_list = []
    trace_raw = pd.read_csv(trace_file_name)
    trace = init_trace(trace_raw)
    #print(trace.iloc[270:300])
    seq = trace["seq"]
    rtt = trace["delay"]
    timestamp = trace["time"]
    minrtt = get_minRTT(trace)
    #maxrtt = get_maxRTT(trace)
    
    #minrtt = 56 # debug
    print(f"minrtt is {minrtt}")
    delay_update(trace,minrtt)
    #print(minrtt)

    # We set the congestion as delay increase more than 30\% of the minimal RTT.
    # In most congestion control thesis, congestion is any condition when delay > minimal RTT.
    delay_idt_thre = min([0.3 * minrtt,30])
    #print(delay_idt_thre)
    idx = 1 
    while idx < len(seq):
        # loss indicator to judge whether congestion happens.
        loss_idt = seq[idx] - seq[idx - 1]
        # delay indicator to judge whether congestion happens.
        delay_idt = rtt[idx] - minrtt
        # If lost package or delay is high
        if loss_idt > 1 or delay_idt > delay_idt_thre:
            c_flag = True
            congestion_start_time = timestamp[idx]
            congestion_sample = []
            if loss_idt > 1:
                #maxrtt = get_local_max(trace,idx)
                maxrtt = max(rtt[idx],rtt[idx-1])
                #maxrtt = rtt[idx]
                sample = [maxrtt] * (loss_idt - 1)
                congestion_sample += sample
            if delay_idt > delay_idt_thre:
                #congestion_sample.append(delay_idt // delay_idt_thre)
                congestion_sample.append(rtt[idx])
            leng_idx = idx
            # A congestion starts, judge whether the following trace belongs to this congestion.
            while c_flag:
                idx += 1
                if idx >= len(seq):
                    break
                loss_idt = seq[idx] - seq[idx - 1]
                delay_idt = rtt[idx] - minrtt
                if loss_idt > 1 or delay_idt > delay_idt_thre:
                    c_flag = True
                    if loss_idt > 1:
                        maxrtt = max(rtt[idx],rtt[idx-1])
                        sample = [maxrtt] * (loss_idt - 1)
                        congestion_sample += sample
                    if delay_idt > delay_idt_thre:
                        #congestion_sample.append(delay_idt // delay_idt_thre)
                        congestion_sample.append(rtt[idx])
                    continue
                else:
                    c_flag = False
                    congestion_length = len(congestion_sample)
                    coef_start_time = congestion_start_time - 1.0
                    cgt = congestion(congestion_start_time, \
                                     congestion_sample,congestion_length, \
                                    coef_start_time, minrtt)
                    congestion_list.append(cgt)
        idx += 1
    return congestion_list

def search_time_trace(trace,start_time):
    # Given a start_time (from another bot), we need to search the rtt samples in this bot.
    # And then we can use them to judge the coefficiency. 
    new_df = trace[abs(trace["time"] - start_time) < 0.1]
    if not new_df.empty:
        idx = (new_df["time"] - start_time).abs().idxmin()
        #result = new_df.loc[idx]
        #print("idx",idx,"time {:.3f}".format(new_df["time"][idx]))
        return idx
    else:
        return None

def prepare_coef_sample(trace,trace_start_time,length):
    # Trace : the whole csv trace
    # teace_start_time: the congestion.coef_start_time 
    # length: the congestion.length
    idx_base = search_time_trace(trace,trace_start_time)
    if idx_base == None:
        return []
    if idx_base - 10 < 0 or idx_base + length + 10 > len(trace):
        return []
    seq = trace["seq"]
    rtt = trace["delay"]
    timestamp = trace["time"]
    drop = trace["drop"]
    minrtt = get_minRTT(trace)
    delay_update(trace,minrtt)
    #idx = trace.loc[trace['time'] == coef_sample_start].index[0]
    delay_idt_thre = min([0.3 * minrtt,30])
    coef_sample = []
    idx = idx_base
    drop_count = 0
    while len(coef_sample) < length:
        coef_sample.append(rtt[idx])
        idx += 1
        drop_count += drop[idx]
    if drop_count > len(coef_sample)*0.8:
        return []
    nan_flag = True
    for t in coef_sample:
        if t != minrtt:
            nan_flag = False
    if nan_flag:
        coef_sample[0] -= random.random() / 10
    return coef_sample

def c_group_congestions(C_group_congestion_file,target_name) -> list:
    # Given members in C-group, this function generates a congestion samples list,
    # And save it to a JSON file. 
    # In CrossPoint attacks, this file should be sent to unknown bots for judging.
    # This function might be memory starving, depending on how many members in the c-group.
    basic_congestion_list = init_congestion(C_group_congestion_file[0])
    idx = 1
    tr_list = []
    c_group_congestion_list = []
    # Init trace_list_files
    while idx < len(C_group_congestion_file):
        print("Start inititial traces, ",idx)
        tr_raw = pd.read_csv(C_group_congestion_file[idx])
        # The init_trace may need a long time to run.
        tr = init_trace(tr_raw)
        tr_list.append(tr)
        idx += 1
    print("Init congestion and trace finished")
    for c in basic_congestion_list:
        bad_congestion_flag = False  
        if not c.length > 1:
            # Too short congestion, drop
            continue
        if c.coef_sample == []:
            # Bad congestion, might be the "host unreadable" in data.
            continue
        samples = [c.coef_sample]
        for tr in tr_list:
            bad_congestion_flag = False
            c_sample_tmp = prepare_coef_sample(tr, c.start_time -1.0, c.length + 20)
            if c_sample_tmp == []:
                bad_congestion_flag = True
                # If A observes congestion at 9:00 p.m.
                # But B's csv file lasts from 8:00 p.m. to 8:30 p.m. (end before the congestion)
                # The prepare_coef_sample will return [] to tell the abnormal condition. 
                break    
            samples.append(c_sample_tmp)
        if bad_congestion_flag:
            continue
        co = np.corrcoef(samples)
        dis = 1 - co
        max_distance = np.max(dis[np.where(~np.eye(dis.shape[0], dtype=bool))])
        if max_distance < 0.5 and max_distance >= 0:
            cc = correlated_congestion(c.start_time,samples)
            c_group_congestion_list.append(cc)
        else:
            continue
    file_name = '_'.join([target_name,'cc'])        
    with open(file_name+'.json','w') as f:
        for idx,cc in enumerate(c_group_congestion_list):
            file_content = cc.to_json()
            json.dump(file_content,f)
            f.write('\n')
        f.close()
            
    return c_group_congestion_list

def get_cgroup_congestion(filename):
    cc_congestion = []
    with open(filename) as infile:
        for line in infile:
            item = json.loads(line)
            cc_instance = correlated_congestion.from_json(item)
            cc_congestion.append(cc_instance)
        infile.close()
    return cc_congestion

def bot_judge_congestion(bot_congestion_file,cc_congestion_sample):
    trace_raw = pd.read_csv(bot_congestion_file)
    trace = init_trace(trace_raw)
    Flag_of_true_result = 0
    print("Trace initialize finished. ")
    for cc in cc_congestion_sample:
        bad_congestion_flag = False
        start_time = cc.start_time
        length = len(cc.member_congestion[0])
        bot_sample = prepare_coef_sample(trace, start_time -1.0, length)
        if bot_sample == []:
            bad_congestion_flag = True
            continue
        samples_all = []
        for cm in cc.member_congestion:
            samples_all.append(cm)
        samples_all.append(bot_sample)
        co = np.corrcoef(samples_all)
        dis = 1 - co
        max_distance = np.max(dis[np.where(~np.eye(dis.shape[0], dtype=bool))])
        if max_distance < 0.5 and max_distance >= 0:
                Flag_of_true_result += 1
    print(f"Find {Flag_of_true_result} evidence of link sharing.")
    return Flag_of_true_result

# Trace is a csv file gained from bash_ping shell codes. 
def get_minRTT(trace):
    delays = trace["delay"]
    #print(delays.min())
    return delays.min()

def get_loss(trace):
    seq = trace["seq"]
    c = 0
    for idx,s in enumerate(seq):
        if idx == 0:
            continue
        loss_idt = seq[idx] - seq[idx-1]
        #print(loss_idt)
        if loss_idt > 1 :
            c += 1
    return c
            
def get_abnRTT(trace):
    d = trace["delay"]
    minrtt = get_minRTT(trace)
    c = 0
    for idx,s in enumerate(d):
        if idx == 0:
            continue
        if s > minrtt + 30:
            c += 1
    return c    

def init_congestion(trace):
    seq = trace["seq"]
    rtt = trace["delay"]
    minrtt = get_minRTT(t)

def bot_init_from_json_files(folder_path):
    # This function reads all bots JSON files and config them in an atf_set.
    # This file will deterime the attack_set. 
    atf_set = []
    for root, dirs, files in os.walk(folder_path):
        #print(root,dirs,files)
        for file in files:
            if file.endswith('.json'):
                json_file = os.path.join(root, file)
               #print(json_file)
                atf_instance = atf.from_json(json_file)
                #print(atf_instance)
                atf_set.append(atf_instance)
    return atf_set

def bot_init_debugs(phy_topo):
    # This function is used for debug and used as a fast run example.
    # It generates whole possible bots around a topology. 
    # Using this you donot need config bot's JSON files; therefore easy for running.
    # INPUT: a networkx Graph phy_topo.
    # OUTPUT: atttack flow set generates around the topology. 
    # In detail, the fast flow set uses the shortest path alg to generate routes. 
    def _attack_flows(phy_topo): 
            #TODO
            #This generation inputs all possible src-dst pairs into defenders.
            atk_node_list = []
            for node in phy_topo.nodes:
                if phy_topo.nodes[node]["Internal"] == 1:
                    atk_node_list.append(node)
            p_tmp = nx.Graph()
            p_tmp.add_nodes_from(atk_node_list)
            P_CompleteGraph = nx.complete_graph(p_tmp.nodes,create_using = nx.DiGraph)
            FlowSet = [edge for edge in P_CompleteGraph.edges] 
            #print(FlowSet)
            return FlowSet  
        
    def atf_set_init(phy_topo):
        _atf_set = _attack_flows(phy_topo)
        #print(_atf_set)
        atf_set = []
        for att_flow in _atf_set:
            #print(att_flow)
            tmp = atf(src=att_flow[0],dst=att_flow[1])
            atf_set.append(tmp)
        for atf_flow in atf_set:
         # In some topology, where an atf have multiple same-weight routes, 
         # the shortest_path function randomly choose one.
         # So, the shortest_path should be use only once in each experiment.
            atf_flow.route = nx.shortest_path(phy_topo,source=atf_flow.src,target=atf_flow.dst)
            ip_path = [phy_topo.nodes[node]["ip"] for node in atf_flow.route]
            atf_flow.route = ip_path
            #atf_flow.delay = get_path_delay(atf_flow.route,topo)
            atf_flow.weight = 0
        return atf_set
    
    atf_set = atf_set_init(phy_topo)
    return atf_set

def get_random_bs_pair(number,atf_set):
    # Get random number of bot,server pairs from the atf_set.
    # This function correspond to the Coremelt+Random attack.
    # The number of bot,server pairs is the budget. 
    random_atf = []
    if number > len(atf_set):
        return atf_set
    random.seed()
    random_atf_index = random.sample(range(0,len(atf_set)),number)
    for i in random_atf_index:
        random_atf.append(atf_set[i])
    return random_atf

def get_edge_from_list(l):
    list_of_edge = []
    for idx in range(len(l)-1):
        e_tmp = edg(l[idx],l[idx+1])
        list_of_edge.append(e_tmp)
    return list_of_edge

def link_map_actual_route(atf_set):
    # Used for debug.
    edg_list = []
    for atf in atf_set:
        edges = get_edge_from_list(atf.route_ip)
        for e in edges:
            if e not in edg_list:
                edg_list.append(e)
                inc_fd_in_edg_list(e,atf,edg_list)
            else:
                inc_fd_in_edg_list(e,atf,edg_list)
    return edg_list    


def link_map(atf_set,route_name):
    def _pre(ip):
        ip_p = ip.split('.')[:-1]
        ip_prefix = '.'.join(ip_p)
        return ip_prefix
    
    def _get_edge_from_list(l):
        list_of_edge = []
        for idx in range(len(l)-1):
            if route_name =='nh':
                e_tmp = edg(l[idx],l[idx+1])
            else:
                e_tmp = edg(_pre(l[idx]),_pre(l[idx+1]))
            
            list_of_edge.append(e_tmp)
        return list_of_edge
    
    edg_list = []
    for atf in atf_set:
        if route_name == 'eq':
            edges = _get_edge_from_list(atf.route_eq)
        elif route_name == 'nh':
            edges = _get_edge_from_list(atf.route_nh)
        else:
            edges = _get_edge_from_list(atf.route)
        for e in edges:
            if e not in edg_list:
                edg_list.append(e)
                inc_fd_in_edg_list(e,atf,edg_list)
            else:
                inc_fd_in_edg_list(e,atf,edg_list)
    return edg_list

def get_individual_atfs(edg,atf_set):
    return [atf for atf in atf_set if atf not in edg.atf_set]

def get_number_of_bots(candi_atfs,number):
    if number <= 0:
        return []
    else:
        return candi_atfs[0:number]

def link_map_crossfire(atf_set):
    edg_list = []
    for atf in atf_set:
        edges = get_edge_from_list(atf.route_eq)
        for e in edges:
            if e not in edg_list:
                edg_list.append(e)
                inc_fd_in_edg_list(e,atf,edg_list)
            else:
                inc_fd_in_edg_list(e,atf,edg_list)
    return edg_list

# def sort_with_ohd(indv):
#     indv.sort(reverse=True)
#     return 

def _pre(ip):
    ip_p = ip.split('.')[:-1]
    ip_prefix = '.'.join(ip_p)
    return ip_prefix

def get_subnet_edge_from_list(l):
    def _pre(ip):
        ip_p = ip.split('.')[:-1]
        ip_prefix = '.'.join(ip_p)
        return ip_prefix
    list_of_edge = []
    for idx in range(len(l)-1):
        e_tmp = edg(_pre(l[idx]),_pre(l[idx+1]))
        list_of_edge.append(e_tmp)
    return list_of_edge

def botnum_success(atf_set,e_tar,route_name):
    c = 0
    for atf in atf_set:
        if route_name =='eq':
            e_list = get_edge_from_list(atf.route_ip)
        else:
            e_list = get_edge_from_list(atf.route)
        if e_tar in e_list:
            c += 1
    return c


def inc_fd_in_edg_list(edg,atf,edg_list):
    for e in edg_list:
        if e == edg:
            e.atf_set.append(atf)
            e.fd +=1
    return edg_list

def gen_control_group(atf_set,target_edg):
    candidates = []
    for atf in atf_set:
        edges = get_edge_from_list(atf.route)
        if target_edg in edges:
            candidates.append(atf)
    i = 0
    current_set_length = 10000
    current_control_group = []
    while i < len(candidates):
        combs = list(combination(candidates,i))
        for comb in combs:
            e = []
            for atf in comb:
                e.append(get_edge_from_list(atf.route))
            shared_links = set(e)
            if len(shared_links) < current_set_length:
                current_set_length = len(shared_links)
                current_control_group = [a for a in comb]
                if len(shared_links) == 1:
                    break
        i += 1 
    return current_control_group

def assign_ip_addr_randomly(phy_topo,addr):
    # used for debug
    #addr = './CAIDA dataset/as_id_node/ipaddr_13576.csv'
    df = pd.read_csv(addr)
    random_node = random.sample(range(0,len(df)),len(phy_topo.nodes))
    for x_id,node in zip(random_node,phy_topo.nodes):
        ip_list = df.loc[x_id,'ip_addr']
        ip_list = ip_list[1:-1].split(', ')
        ip_list = list(set(ip_list))
        ip_list.remove('nan')
        ip_list_formal = [ip[1:-1] for ip in ip_list]
        xx_id = random.randint(0,len(ip_list)-1)
        phy_topo.nodes[node]["ip"] = ip_list_formal[xx_id]
        if DEBUG:
            phy_topo.nodes[node]["ip_list_debug"] = ip_list_formal
    return phy_topo

def CrossPoint_Attack_SD(folder_path,budget,route_name):
    # Init attack flow set. 
    print("Start attack..")
    atf_set = bot_init_from_json_files(folder_path)
    print("Init atf set finished.")
    # A simple value of example in Abilene:
    Link_capacity = 15
    # If you want to run a fast workflow instead of using real probed bot JSON files:
    # You can try the debug mode
    if DEBUG:
        phy_topo = nx.read_gml("Abilene.gml",label='id')
        # link capacity is a value that influence the succ rate of the attack.
        # As we addressed in Figure.8 and 9 in our thesis.
        # for a simple exmple you can set:
        
        # The following is want we use in experiments:
        # A typical value is 80% of the links are secure (according to Crossfire).
        # link_cp_bics = [9, 14, 15, 25, 26, 26, 28, 30, 33, 34, 36, 37, 38, 39, 40,
        #                     42, 42, 43, 45, 46, 48, 50, 53, 57, 62, 63, 64, 65, 66, 67, 
        #                     75, 78, 82, 97, 98, 102, 103, 114, 116, 124, 129, 137, 170, 
        #                     176, 205, 224, 263, 277]
        # link_cp_uscarrier = [2, 2, 4, 19, 24, 31, 44, 64, 91, 96, 108, 123, 130, 
        #                          148, 166, 177, 180, 184, 216, 218, 219, 241, 254, 255, 
        #                          266, 277, 281, 283, 304, 306, 310, 312, 312, 312, 312, 
        #                          313, 314, 314, 314, 314, 314, 314, 314, 314, 314, 317, 
        #                          317, 317, 318, 325, 329, 342, 343, 355, 358, 387, 392, 
        #                          405, 410, 413, 418, 443, 444, 449, 476, 493, 493, 501, 
        #                          505, 508, 513, 520, 531, 533, 536, 575, 576, 592, 592, 
        #                          615, 615, 617, 620, 624, 624, 624, 624, 624, 640, 641, 
        #                          666, 685, 689, 704, 706, 714, 733, 736, 738, 750, 788, 
        #                          790, 804, 866, 890, 908, 915, 930, 930, 930, 930, 930, 
        #                          930, 945, 984, 997, 1002, 1022, 1050, 1074, 1076, 1096, 
        #                          1188, 1190, 1226, 1232, 1232, 1232, 1232, 1274, 1330, 
        #                          1456, 1458, 1480, 1492, 1530, 1530, 1530, 1544, 1610, 
        #                          1774, 1778, 1820, 1824, 1824, 1869, 1883, 1888, 1951, 
        #                          2191, 2227, 2442, 2570, 2608, 2642, 2651, 2674, 2815, 
        #                          2916, 3031, 3209, 3504, 3808, 4004, 4216, 4290, 4544, 
        #                          4571, 4619, 4675, 4794,4813, 4887, 4975, 5857, 6026, 
        #                          6113, 6179, 6210, 6257, 6434, 6508, 6738, 6832, 7244, 
        #                          9173, 9279, 9393, 11404]
        #  link_cp_viatel = [82, 168, 262, 389, 423, 441, 461, 463, 477, 503, 523, 559, 
        #                       571, 613, 635, 679, 705, 711, 733, 749, 755, 758, 781, 787, 
        #                       791, 795, 802, 815, 852, 854, 855, 858, 873, 879, 893, 896, 
        #                       897, 910, 918, 933, 950, 977, 982, 986, 997, 999, 1002, 1007, 
        #                       1016, 1050, 1067, 1083, 1086, 1094, 1115, 1119, 1121, 1124, 
        #                       1131, 1139, 1145, 1191, 1211, 1215, 1223, 1285, 1299, 1305, 
        #                       1331, 1359, 1367, 1383, 1407, 1413, 1437, 1444, 1451, 1467, 
        #                       1517, 1533, 1536, 1543, 1623, 1627, 1705, 1735, 
        #                       1787, 1843, 2489, 2549, 2607, 2667]
        phy_topo = assign_ip_addr_randomly(phy_topo,'ipaddr_13576.csv')
        # you can change the ipaddr of the CAIDA dataset.
        # The influence should be trivial.
        if atf_set == []:
            print("Something wrong, atf set is missing...")
            atf_set = bot_init_debugs(phy_topo)
        # This code will generate bots randomly around the topology.
    
    # The following 2 steps are the CrossFire attack.
    # Give the second attribute of link_map to show which route should be use.
    # if "eq", link map use route_eq for attack.
    # if "nh", link map use route_nh for attack.
    if route_name == 'eq':
        e_list = link_map_crossfire(atf_set)
        for e in e_list:
            for atf in atf_set:
                #print(e,"e.cp_num=",e.cp_num)
                e_tmp = get_subnet_edge_from_list(atf.route_eq)
                e_new = edg(_pre(e.src),_pre(e.dst))
                #print(e_list,e_new)
                if e_new in e_tmp:
                    e.cp_num += 1
        e_list.sort(reverse=True,key = lambda x:x.fd+x.cp_num)
    elif route_name == 'nh':
        e_list = link_map(atf_set,route_name)
        e_list.sort(reverse=True)
    potential_tar = e_list[0:5]   
    if DEBUG:
        # In DEBUG mode you can compare the probed edges and the real edges. 
        e_list_actual = link_map_actual_route(atf_set)
    
    # In default, CrossPoint attacks try to check 5 links with side-channel.
    # (despite there are hundreds of links in large networks) 
    # Check more links are easy, and do not have any risks being discovered.
    for e in e_list:
        print(e)
    for e in potential_tar:
        
        # This step get the remained attack flows that does not pass the target egde e.
        # The remained attack flows might pass edge e but hided by defenders, we need to figure it out.
        candidate_atfs = get_individual_atfs(e,atf_set)
        
        # You can try one of the statistical dispairities against PTO defenses.
        candidate_atfs.sort(reverse=True,key=lambda x:x.get_sd_ip())
        # candidate_atfs.sort(reverse=True,key=lambda x:x.get_sd_nh())
        attack_atfs = []
        ##### Important #####
        # Directly attack with SD, does not base on any congestion. 
        attack_atfs = get_number_of_bots(candidate_atfs, budget - e.fd)
        print("Attack flows number :", len(attack_atfs), budget, e.fd)
        if DEBUG:
            succ_bot = botnum_success(attack_atfs,e,route_name)
            print(f"edge {e} Success bot number =", succ_bot +e.fd)
            if e.fd + succ_bot >= Link_capacity:
                print("In the example, you find sufficient bots against a link!")
                return
            else:
                print("Attack failed, try again with larger budget.")


In [159]:
# Run attack against equalNet:
# Change the second value of budget to trying to 'attack' the network!
# This example uses a simple topology named Abilene. 
# You can run Nethide or EqualNet to create more complex topologies to test our CrossPoint attack!
CrossPoint_Attack_SD('./botconfig-example-equalnet/',15,'eq')
# Note : basic Link capacity is set to 15, in this example.
# So your budget must large then 15 in this case.
# Note: CrossPoint-SD attack not very effcient in some times, as our exeriments show: (Figure9-10)

Start attack..
Init atf set finished.
(139.60.110.217,198.74.30.62),flow_density = 14,atf_set = [(10, 0), (10, 4), (10, 2), (2, 8), (2, 10), (6, 10), (0, 8), (10, 6), (6, 8), (8, 6), (4, 9), (8, 0), (8, 4), (8, 2)]
(198.74.30.44,139.60.110.7),flow_density = 11,atf_set = [(2, 9), (4, 10), (8, 5), (10, 5), (8, 3), (10, 3), (0, 10), (10, 1), (8, 1), (6, 9), (4, 8)]
(198.74.30.62,216.106.201.6),flow_density = 9,atf_set = [(10, 0), (10, 4), (10, 2), (2, 8), (3, 8), (2, 10), (1, 6), (1, 10), (4, 9)]
(198.74.30.44,216.106.201.6),flow_density = 9,atf_set = [(6, 2), (8, 3), (6, 4), (9, 2), (0, 10), (2, 6), (8, 1), (0, 6), (6, 0)]
(216.106.201.151,198.74.30.44),flow_density = 8,atf_set = [(2, 9), (4, 10), (3, 10), (10, 3), (10, 1), (1, 8), (3, 6), (4, 8)]
(198.74.30.62,216.106.201.151),flow_density = 8,atf_set = [(9, 4), (6, 3), (4, 6), (0, 8), (8, 0), (6, 1), (8, 4), (8, 2)]
(139.60.110.217,198.74.30.44),flow_density = 5,atf_set = [(9, 6), (9, 2), (3, 10), (1, 8), (5, 8)]
(139.60.110.7,198.74.3

In [158]:
# Run attack against Nethide:
CrossPoint_Attack_SD('./botconfig-example-nethide/',30,'nh')
# Note : basic Link capacity is set to 15, in this example.
# So your budget must large then 15 in this case.
# Note: CrossPoint-SD attack not very effcient in some times, as our exeriments show: (Figure9-10)

Start attack..
Init atf set finished.
(7,10),flow_density = 10,atf_set = [(7, 1), (4, 10), (8, 10), (3, 10), (10, 8), (6, 10), (10, 7), (1, 8), (5, 10), (7, 10)]
(10,1),flow_density = 10,atf_set = [(7, 1), (4, 1), (9, 1), (1, 9), (0, 10), (10, 1), (1, 10), (1, 8), (2, 1), (6, 1)]
(5,4),flow_density = 10,atf_set = [(5, 4), (3, 8), (5, 6), (3, 9), (4, 9), (4, 5), (2, 5), (0, 5), (3, 5), (4, 8)]
(2,9),flow_density = 10,atf_set = [(2, 9), (4, 2), (9, 1), (2, 8), (9, 2), (2, 10), (0, 8), (3, 2), (0, 9), (8, 2)]
(7,8),flow_density = 10,atf_set = [(7, 9), (8, 10), (10, 8), (8, 7), (1, 8), (6, 8), (5, 10), (8, 0), (7, 8), (6, 9)]
(8,9),flow_density = 10,atf_set = [(7, 9), (2, 8), (0, 8), (8, 9), (3, 9), (9, 8), (4, 9), (6, 9), (5, 9), (8, 2)]
(0,2),flow_density = 10,atf_set = [(0, 2), (2, 0), (6, 2), (10, 2), (0, 8), (5, 2), (1, 2), (7, 2), (6, 0), (0, 9)]
(7,6),flow_density = 10,atf_set = [(7, 3), (4, 10), (7, 6), (6, 7), (7, 4), (3, 10), (6, 10), (6, 8), (7, 2), (6, 9)]
(8,5),flow_density = 