In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import authentic_performance as ap
from scipy.stats import uniform, norm
import networkx as nx
import numpy as np
import pandas as pd

# <>

In [3]:
def generate_nodes(n, strengths):
    G =  nx.DiGraph()
    G.add_nodes_from(range(n))
    nx.set_node_attributes(G, {k:{"strength":v} for k, v in dict(zip(range(n), strengths)).items()})
    nx.set_node_attributes(G, {k:{"fraud":False} for k in range(n)})
    return G

def specify_fraudster(G, index=0, strength=None):
    if strength is not None:
        nx.set_node_attributes(G, {index:{"strength":strength}})
    nx.set_node_attributes(G, {index:{"fraud":True}})

def generate_edges(n, p, strengths):
    G = nx.erdos_renyi_graph(n, p)
    df = pd.DataFrame(G.edges, columns=["v1", "v2"]).merge(pd.Series(strengths, name="v1_strength"), left_on="v1", right_index=True
                                                          ).merge(pd.Series(strengths, name="v2_strength"), left_on="v2", right_index=True)
    df["target"] = np.where(df["v1_strength"] > df["v2_strength"], df["v1"], df["v2"])
    df["source"] = np.where(df["v1_strength"] > df["v2_strength"], df["v2"], df["v1"])
    return df[["source", "target"]].values

def generate_fraudstersEdges(G, n, strengths, fraud_probability=0.5):
    list_fraudsters = [x for x, y in G.nodes(data=True) if y["fraud"]]
    for f in list_fraudsters:
        f_strength = G.nodes[f]["strength"]
        f_degree = G.degree[f]
        G.remove_node(f)
        G.add_node(f, strength=f_strength, fraud=True)
        n_fraudulous_links = round(f_degree*fraud_probability)
        n_regular_links = f_degree - n_fraudulous_links
        easy_win_neighbours = np.random.choice((np.nonzero(strengths<f_strength))[0], n_fraudulous_links, replace=False)
        regular_neighbours = np.random.choice(np.nonzero(~np.isin(np.array(range(n)), np.append(easy_win_neighbours, f)))[0], n_regular_links, replace=False)
        fraudulous_links = [(source, f) for source in easy_win_neighbours]
        regular_links = list(zip(np.where(strengths[regular_neighbours]>f_strength, f, regular_neighbours),
                 np.where(strengths[regular_neighbours]>f_strength, regular_neighbours, f)))
        G.add_edges_from(fraudulous_links, fraudulous=True)
        G.add_edges_from(regular_links, fraudulous=False)
        # return fraudulous_links, regular_links
    
def generate_network(n, p, distribution=uniform, fraudster_index=0, fraudster_strength=None):
    strengths = distribution.rvs(size=n)
    G = generate_nodes(n, strengths)
    G.add_edges_from(generate_edges(n, p, strengths))
    specify_fraudster(G, index=fraudster_index, strength=fraudster_strength)
    generate_fraudstersEdges(G, n, strengths, fraud_probability=0.5)
    return G

In [4]:
n = 100
distribution = uniform
p = 10/n
fraud_probability=0.1

G = generate_network(n, p, distribution, fraudster_strength=distribution.expect())
nx.write_gexf(G, "gephi_viz/synthetic.gexf")