# Conception data for Experiment 4

In [7]:
# importation of the packages

import numpy as np
from graph_tool.all import *
import random
import graph_tool.topology as gt
import graph_tool.clustering as gc
import graph_tool.centrality as gcent
import graph_tool.generation as gg
import graph_tool.stats as gs

import matplotlib.pyplot as plt
import pickle
import matplotlib.colors as colors
import matplotlib
import copy
from matplotlib.lines import Line2D 
import matplotlib.pyplot as plt

from scipy.stats.stats import pearsonr
import pandas as pd

%matplotlib inline
import seaborn as sns
from scipy.stats import gaussian_kde
from collections import Counter
import math
import scipy.stats
import collections

import matplotlib.ticker as mtick
from matplotlib.ticker import FormatStrFormatter
from scipy.stats import norm 
from sklearn.neighbors import KernelDensity 
from sklearn.utils.fixes import parse_version 

from sklearn.svm import SVC # "Support vector classifier"
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split, cross_val_predict, cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_squared_error, r2_score

from collections import Counter
#from tabulate import tabulate
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.svm import LinearSVC

from multiprocessing import Pool
from  matplotlib.colors import LinearSegmentedColormap

from scipy.stats.stats import pearsonr 
from collections import defaultdict
import itertools

import networkx as nx
import os
import re
from scipy.stats import lognorm

from scipy.optimize import curve_fit
import scipy.stats as stats

## A - Useful functions

### 1-Functions to get random activity, beta and phy from the statistics of the #GiletsJaunes propagation

In [8]:
def get_activity(degree):
    
    """
    Get an activity in function of the degree
    """
    
    with open('param_degree_activity.pickle', 'rb') as handle:
        a_mean, b_mean, a_std, b_std = pickle.load(handle)
    
    mean_activity = math.log(degree)*a_mean + b_mean
    std_activity = math.log(degree)*a_std + b_std
    
    x = np.random.normal(loc=mean_activity, scale=std_activity, size=None)
    return(x if x>=0 else get_activity(degree))

def get_beta(degree, percentage, name_dataset = 'GiletsJaunes'):
    
    """
    Get a beta value in function of the degree and the percentage decided for the filtering
    """
    
    print(name_dataset)
    with open('param_beta_'+name_dataset+'.pickle', 'rb') as handle:
        a_mean, b_mean, a_std, b_std, list_beta = pickle.load(handle)
        
    mean_beta = math.exp(b_mean)*degree**a_mean
    std_beta = math.exp(b_std)*degree**a_std
    lower, upper = 0, np.percentile(list_beta, percentage)
    
    mu_norm = np.log(mean_beta**2 / math.sqrt(mean_beta**2 + std_beta**2))
    std_norm = math.sqrt(np.log(1 + std_beta**2 / mean_beta**2))

    shape = std_norm
    scale = math.exp(mu_norm)
    loc = 0
    
    x = np.linspace(lower,upper,500000)
    pdf_fitted = lognorm.pdf(x, shape, loc, scale)
    
    number = random.choices(x, weights=pdf_fitted, k=1)[0]
 
    return(number)

def get_phi(percentage, name_dataset = 'GiletsJaunes'):
    
    """
    Get a beta value in function of the percentage decided for the filtering
    """
    
    with open('param_phi_'+name_dataset+'.pickle', 'rb') as handle:
        p1, p2, list_prop = pickle.load(handle)
        
    min_prop = 0
    max_prop = np.percentile(list_prop, percentage)
        
    x_distri = np.linspace(min_prop, max_prop, num=1000)
    y_distri = [p1 * np.exp(-p2 * x) for x in x_distri]
    x = random.choices(x_distri, weights=y_distri)[0]
    return(x)


### 2-Function which creates a network

In [35]:
def make_random_network(network,N,mean_degree):
    
    """
    Function which computes a graph
    
    Parameters:
    - network: type of network between 'BA', 'SBM', 'ER', 'WS', 'kregular', 'bimodal_distribution', 'lognormal', 'cm' and 'twitter' (twitter is not a synthetic network but is uploaded)
    - N: number of nodes
    - mean_degree: average degree
    
    Return:
    - g: the network
    
    """
    
    if network == 'BA':
        E = mean_degree*N/2
        m = E/(N-1)
        m = round(m)
        g = gg.price_network(N, m=m, c=None, gamma=1.5, directed=False, seed_graph=None)

    elif network == 'SBM':
        nber_group = 20
        b = [[k]*int(N/nber_group) for k in range(nber_group)]
        b = list(itertools.chain.from_iterable(b))
        p_between_groups = 0.001
        E = mean_degree*N/2
        nber_intergroup = nber_group*(nber_group-1)/2
        nber_edges_between_groups = p_between_groups*E*nber_intergroup
        nber_edges_in_groups = E - nber_edges_between_groups
        nber_edges_one_group = nber_edges_in_groups / nber_group
        ers = np.ones((nber_group, nber_group))*p_between_groups*E + np.eye(nber_group)*(nber_edges_one_group*2 - p_between_groups*E)
        g = gg.generate_sbm(b, ers)
        gs.remove_parallel_edges(g)
        gs.remove_self_loops(g)

    elif network == 'ER':
        E = mean_degree*N/2
        p = E*2/N/(N-1)
        g = random_graph(N, lambda: np.random.binomial(N, p, size=None), directed=False)
        
    elif network == 'WS':
        # p proportion of edges to rewire
        p = 0.05
        g = gg.circular_graph(N, k=2)
        E = mean_degree*N/2
        list_edges = list(g.edges())
        list_nodes = list(g.vertices())

        # list of edges to rewire
        deleted_edges = random.sample(list_edges,int(p*E))
        
        # rewiring
        for e in deleted_edges:
            g.remove_edge(e)
            new_edge_nodes = random.sample(list_nodes,2)
            g.add_edge(new_edge_nodes[0], new_edge_nodes[1]) 
        
    elif network == 'kregular':
        g = random_graph(N, lambda: mean_degree, directed=False)
        
    elif network == 'bimodal_distribution' :
        mu = mean_degree
        delta = 0.5
        sigma = 0.5
        w1 = mu + sigma*math.sqrt((1-delta)/delta)
        w2 = mu - sigma*math.sqrt(delta/(1-delta))
        
        E1 = w1*N/2
        p1 = E1*2/N/(N-1)

        E2 = w2*N/2
        p2 = E2*2/N/(N-1)
        
        def deg_sample():

            if random.uniform(0, 1) > 0.5:
                return np.random.binomial(N, p1, size=None)
            else:
                return np.random.binomial(N, p2, size=None)
        
        g = random_graph(N, deg_sample, directed = False)
        
    elif (network == 'lognormal'):

        g = random_graph(N, lambda: np.random.lognormal(mean=mean_degree, sigma=1.0, size=None), directed=False)
            
    elif (network[:2] == 'cm'): # configuration model
        
        with open('/mnt/sdb1/elsa/ML_6/twitter_egonet/degreedistribution_'+network.split('_')[1]+'_oneweek.pickle', 'rb') as handle:
            degree_distribution_oneweek = pickle.load(handle)
            
        if ((np.sum(degree_distribution_oneweek) % 2) != 0):
            degree_distribution_oneweek[-1] = degree_distribution_oneweek[-1] + 1
            
        G_oneweek = nx.configuration_model([int(k) for k in degree_distribution_oneweek])
        G_oneweek.remove_edges_from(nx.selfloop_edges(G_oneweek))
        list_edges = list(set(list(G_oneweek.edges())))
        g = Graph(directed=False)
        for edge in list_edges:
            node1=edge[0]
            node2=edge[1]
            g.add_edge(node1, node2)
            
    elif (network == 'twitter'):
        
        with open('g_twitter_mention.pickle', 'rb') as handle:
            g = pickle.load(handle)
            
    elif (network == 'sample_following_fast'):
        
        x_file=os.getcwd().split('/');
        x_file =x_file[0]+'/'+x_file[1]+'/'+'sdc1'+'/'+'CHD_data'+'/'+'Twitter_wholenet'+'/'+'followernetreduced_mutmentnet_2ndLCC_010614-310318.edg'
        with open(x_file) as file:
            list_edges = file.readlines()  
        for k in range(len(list_edges)): #len(list_edges)
            list_edges[k] = [int(i) for i in re.split(',|\n',list_edges[k])[:2]]
        with open('degree_distribution_following_network.pickle', 'rb') as handle:
            degree_distribution = pickle.load(handle)
        with open('dict_set_neighbors_following_network.pickle', 'rb') as handle:
            dict_set_neighbors = pickle.load(handle)
        array_edges = np.asarray(list_edges)
        list_nodes = np.unique(array_edges)
        # get a random sample from the original one
        N_sample = N
        list_selected_nodes = []
        # pick up a random node
        current_node = np.random.choice(list_nodes)
        list_selected_nodes.append(current_node)
        while (len(list_selected_nodes)<N_sample):
            # get its neighbors
            list_neighbors = dict_set_neighbors[current_node]
            list_neighbors = [k for k in list_neighbors if (k in list_selected_nodes)==False]
            while len(list_neighbors)==0:
                current_node = np.random.choice(list_selected_nodes)
                list_neighbors = dict_set_neighbors[current_node]
                list_neighbors = [k for k in list_neighbors if (k in list_selected_nodes)==False] 
            picked_node = np.random.choice(list_neighbors)
            while (1/degree_distribution[picked_node]<random.random()):
                picked_node = np.random.choice(list_neighbors)
            list_selected_nodes.append(picked_node)
            current_node = picked_node
        set_selected_nodes = set(list_selected_nodes)
        list_selected_edges = [k for k in array_edges if (k[0] in set_selected_nodes)&(k[1] in set_selected_nodes)]
        dict_node_index = {list_selected_nodes[k]:k for k in range(len(list_selected_nodes))}
        new_list_selected_edges = [(dict_node_index[a], dict_node_index[b]) for a,b in list_selected_edges]
        g = Graph(directed=False)
        g.add_edge_list(new_list_selected_edges)
        #graph_draw(g,vertex_text=g.vertex_index)
        
    elif (network == 'sample_following'):
        x_file=os.getcwd().split('/');
        x_file =x_file[0]+'/'+x_file[1]+'/'+'sdc1'+'/'+'CHD_data'+'/'+'Twitter_wholenet'+'/'+'followernetreduced_mutmentnet_2ndLCC_010614-310318.edg'
        with open(x_file) as file:
            list_edges = file.readlines()
        for k in range(len(list_edges)):
            list_edges[k] = [int(i) for i in re.split(',|\n',list_edges[k])[:2]]
        array_edges = np.asarray(list_edges)
        list_nodes = np.unique(array_edges)
        with open('degree_distribution_following_network.pickle', 'rb') as handle:
            degree_distribution = pickle.load(handle)
        list_selected_nodes = []
        # pick up a random node
        current_node = np.random.choice(list_nodes)
        list_selected_nodes.append(current_node)
        while (len(list_selected_nodes)<N):
            print(len(list_selected_nodes))
            # get its neighbors
            loc_node = np.where(array_edges == current_node)
            array_edges_samples = array_edges[loc_node, :][0]
            list_neighbors = [k[0] if k[0]!=current_node else k[1] for k in array_edges_samples]
            list_neighbors = [k for k in list_neighbors if (k in list_selected_nodes)==False]
            while len(list_neighbors)==0:
                current_node = np.random.choice(list_selected_nodes)
                loc_node = np.where(array_edges == current_node)
                array_edges_samples = array_edges[loc_node, :][0]
                list_neighbors = [k[0] if k[0]!=current_node else k[1] for k in array_edges_samples]
                list_neighbors = [k for k in list_neighbors if (k in list_selected_nodes)==False]
            picked_node = np.random.choice(list_neighbors)
            while (1/degree_distribution[picked_node]<random.random()):
                picked_node = np.random.choice(list_neighbors)
            list_selected_nodes.append(picked_node)
            current_node = picked_node
        list_selected_edges = [k for k in array_edges if (k[0] in list_selected_nodes)&(k[1] in list_selected_nodes)]
        dict_node_index = {list_selected_nodes[k]:k for k in range(len(list_selected_nodes))}
        new_list_selected_edges = [(dict_node_index[a], dict_node_index[b]) for a,b in list_selected_edges]
        g = Graph(directed=False)
        g.add_edge_list(new_list_selected_edges) 

            
    else:
        print('It is not a name of network')
        
    return(g)

### 3-Function to compute the propagation

In [36]:
class Mixed_Propagation_fast:
    
    """
    This class makes a synthetic propagation

    Parameters:
    - g: network 
    - percentage: percentage of the filtering
    - name_dataset: dataset used for the activity, beta and phi, here 'GiletsJaunes'
    - rate: probability of getting infected through spontaneous adoption

    """
    
    def __init__(self,g,percentage, name_dataset = 'GiletsJaunes', rate = 0.005):
        
        # CHANGE THER RATE

        self.g = g
        self.rate = rate

        # definiton of the quantities
        self.N = self.g.num_vertices()
        self.set_nodes = set(self.g.vertex_index)
        self.dict_degree = {self.g.vertex_index[v]:v.out_degree() for v in self.g.vertices()}
        self.dict_set_neighbors = {self.g.vertex_index[v]:set([self.g.vertex_index[n] for n in v.out_neighbors()]) for v in self.g.vertices()}

        self.dict_activities = {n:get_activity(self.dict_degree[n]) for n in self.set_nodes}
        self.dict_threshold = {n: get_phi(percentage, name_dataset) for n in self.set_nodes}
        self.dict_beta = {n: get_beta(self.dict_degree[n], percentage, name_dataset) for n in self.set_nodes}

        self.dict_type_contagion = {n:np.random.choice([0,1], size = 1)[0] for n in self.set_nodes}
        self.dict_status = {n:'U' for n in self.set_nodes}
        self.dict_status_set = {'U': self.set_nodes.copy(),
                          'A': set([]),
                          'D': set([])}
        self.dict_nber_active_before_infection = {n:0 for n in self.set_nodes}
        self.dict_time_infection = {n:np.nan for n in self.set_nodes}
        self.dict_time_awareness = {n:np.nan for n in self.set_nodes}
        self.dict_is_seed = {n:0 for n in self.set_nodes}
        self.dict_list_stim_nei_time = {n:[] for n in self.set_nodes} 
        self.dict_rank = {n:np.nan for n in self.set_nodes}
        self.r = 0
        
        self.last_time_post_nohash = {n:-1 for n in self.set_nodes}

        seed = random.choices(list(self.dict_activities.keys()), weights=list(self.dict_activities.values()), k=1)[0]

        # contamination of the seeds
        self.dict_status[seed] = 'D'
        self.dict_status_set['U'].remove(seed)
        self.dict_status_set['D'].add(seed)
        self.dict_time_infection[seed] = 0
        self.dict_time_awareness[seed] = 0
        self.dict_is_seed[seed] = 1
        self.dict_rank[seed] = self.r
        self.r+=1
        self.dict_type_contagion[seed] = 2
        self.spread(seed, 0, 1, 1)
        
    def run_fast(self):
        
        t = 1 # initialisation of the time
        target = self.N*0.9

        while (len(self.dict_status_set['D']) < target) :
                
            # select an active node through its activity
            activate_node = random.choices(list(self.dict_activities.keys()), weights=list(self.dict_activities.values()), k=1)[0]
            
            if self.dict_status[activate_node] == 'U':

                if (random.random() <= self.rate):
                    self.detect_spontaneous(activate_node, t)
                    self.spread(activate_node, t, 1, 1)
                else:
                    self.dict_nber_active_before_infection[activate_node] += 1 # new posts not with the #
                    neighbors = self.dict_set_neighbors[activate_node]
                    neighbors_A = neighbors.intersection(self.dict_status_set['A'])
                    neighbors_U = neighbors.intersection(self.dict_status_set['U'])
                    for nei in neighbors_A.union(neighbors_U):
                        self.dict_list_stim_nei_time[nei].append((activate_node, t, 0, 0))
                    self.last_time_post_nohash[activate_node] = t
                    
            elif self.dict_status[activate_node] == 'A':
                self.detect_A(activate_node, t)
                self.spread(activate_node, t, 1, 1)
                
            else:
                self.spread(activate_node, t, 1, 0) 
                
            t+=1

    def detect_spontaneous(self, node, time):
        
        self.dict_status[node] = 'D'
        self.dict_status_set['U'].remove(node)
        self.dict_status_set['D'].add(node)
        self.dict_time_infection[node] = time
        self.dict_time_awareness[node] = time
        self.dict_is_seed[node] = 2
        self.dict_rank[node] = self.r
        self.r+=1
        
    def detect_A(self, node, time):
        
        self.dict_status[node] = 'D'
        self.dict_status_set['A'].remove(node)
        self.dict_status_set['D'].add(node)
        self.dict_time_infection[node] = time
        self.dict_is_seed[node] = 0
        self.dict_rank[node] = self.r
        self.r+=1
        
    def spread(self, node, time, is_hashtag, is_first_infection): # is first infection = 0 or 1
        
        neighbors = self.dict_set_neighbors[node]
        
        for nei in neighbors.intersection(self.dict_status_set['A']): # for the neighbors which are 'A'
            #print(nei, 'nei A')
            self.dict_list_stim_nei_time[nei].append((node, time, is_hashtag, is_first_infection))
            
        for nei in neighbors.intersection(self.dict_status_set['U']): # for the neighbors which are 'U'

            self.dict_list_stim_nei_time[nei].append((node, time, is_hashtag, is_first_infection))
            prop_infected_neighbors_nei = len(self.dict_set_neighbors[nei].intersection(self.dict_status_set['D'])) / self.dict_degree[nei]
            
            if (self.dict_type_contagion[nei] == 0):

                if (self.dict_beta[nei] >= random.random()):

                    self.dict_status[nei] = 'A'
                    self.dict_status_set['U'].remove(nei)
                    self.dict_status_set['A'].add(nei)
                    self.dict_time_awareness[nei] = time
                    
            else:

                prop_infected_neighbors_nei = len(self.dict_set_neighbors[nei].intersection(self.dict_status_set['D'])) / self.dict_degree[nei]
                
                if (prop_infected_neighbors_nei>=self.dict_threshold[nei]): # if it is reached, we make it aware

                    self.dict_status[nei] = 'A'
                    self.dict_status_set['U'].remove(nei)
                    self.dict_status_set['A'].add(nei)
                    self.dict_time_awareness[nei] = time
               

### 4-Function to make the data base

In [37]:
def conception_data_base(name_dataset, network, mean_degree, N_network, N, rate):
    
    """
    This class makes a synthetic propagation

    Parameters:
    - name_dataset: dataset used for the activity, beta and phi, here 'GiletsJaunes'
    - network : type of network between 'BA', 'SBM', 'ER', 'WS', 'kregular', 'bimodal_distribution', 'lognormal', 'cm' and 'twitter'
    - mean_degree: mean degree of the network
    - N_network: number of different network in the sample
    - N: number of nodes
    - rate: probability of getting infected through spontaneous adoption

    """
   
    name_columns = ['degree',
                    'clustering_coefficient',
                    'nber_infected_neighbors',
                    'prop_infected_neighbors',
                    'sum_stimuli',
                    'nber_stimuli_by_nei',
                    'std_stimuli',
                    'max_stimuli_one_nei',
                    'nber_stim_last_infected_nei',
                    'nber_active_before_infection',
                    'is_seed',
                    'beta',
                    'phi',
                    'type_contagion',
                    'trajectory',
                    'index_node',
                    'rank',
                    'time_infection',
                    'time_awareness',
                    'list_nei_time']

    df_set = pd.DataFrame(columns = name_columns) 

    for it in range(N_network): # for a certain number of networks

        g = make_random_network(network,N,mean_degree) # creation of the network
        A = graph_tool.spectral.adjacency(g)
            
        for i in range(1):

            # propagations in the network
            g_work = g.copy()
            N = len(list(g_work.vertices())) # number of nodes
            
            # load the distributions - every 100 degrees
            with open('distri_activities_by_degree.pickle', 'rb') as handle:
                dict_distribution = pickle.load(handle)

            M=Mixed_Propagation(g, percentage)
            
            M.run_fast()

            # features on the structusre of the ego-network
            degree = np.array(g.get_out_degrees(g.get_vertices()))
            clustering_coefficient = np.array(list(gc.local_clustering(g, weight=None, prop=None, undirected=True)))
            
            list_stimuli_node = list(M.list_stimuli_node)
            list_stimuli_time = list(M.list_stimuli_time)
            list_stimuli = [[(node_i, time_i) for node_i, time_i in zip(node, time)] for node, time in zip(list_stimuli_node, list_stimuli_time)]
            
            #############################################################
            
            nber_infected_neighbors = [len(list(set(list(k)))) for k in list_stimuli_node]
            
            prop_infected_neighbors = [nber/deg for nber, deg in zip(nber_infected_neighbors, degree)]
            
            sum_stimuli = [len(k) for k in list_stimuli_node]

            nber_stimuli_by_nei = [stim/deg for stim, deg in zip(sum_stimuli, degree)]
            
            stimuli_by_nei = [list(Counter(k).values()) for k in list_stimuli_node]
            
            std_stimuli = [np.std(k) if len(k)!=0 else 0 for k in stimuli_by_nei]
            
            max_stimuli_one_nei = [max(k) if len(k)!=0 else 0 for k in stimuli_by_nei]
            
            index_last_infected_nei = [k[-1] if len(k)!=0 else np.nan for k in list_stimuli_node]

            nber_stim_last_infected_nei = [Counter(k)[index] for k, index in zip(list_stimuli_node, index_last_infected_nei)]
            
            nber_active_before_infection = M.nber_active_before_infection.a
                
            # is_seed
            is_seed = M.is_seed.a
            
            # phi
            phi_nodes = M.threshold.a
            
            # beta
            beta_nodes = M.beta.a
            
            # type contagion
            type_contagion = M.type_contagion.a
                
            # node index
            index_node = list(g.vertex_index)
                
            # node index
            rank = M.rank.a
            
            # time infection
            time_infection = M.time_infection.a
            
            # time awareness
            time_awareness = M.time_awareness.a
            
            array_one_propagation = np.array([[degree[k], clustering_coefficient[k], nber_infected_neighbors[k], prop_infected_neighbors[k], sum_stimuli[k], nber_stimuli_by_nei[k], std_stimuli[k], max_stimuli_one_nei[k], nber_stim_last_infected_nei[k], nber_active_before_infection[k], is_seed[k], beta_nodes[k], phi_nodes[k], type_contagion[k], M.T, index_node[k], rank[k], time_infection[k], time_awareness[k], list_stimuli[k]] for k in range(N)]) #, adj_list_node[k]
                
            df_set = df_set.append(pd.DataFrame(array_one_propagation,columns = name_columns),ignore_index = True)
                
    return(df_set)

### 5-Function which makes the large data base

In [40]:
def get_data_AD(x):

    name_dataset, network, mean_degree, N, rate, it, percentage = x

    g = make_random_network(network,N,mean_degree) # creation of the network

    M=Mixed_Propagation_fast(g, percentage)
    M.run_fast()

    name_columns = ['degree',
                    'nber_infected_neighbors',
                    'prop_infected_neighbors',
                    'sum_stimuli',
                    'nber_stimuli_by_nei',
                    'std_stimuli',
                    'max_stimuli_one_nei',
                    'nber_stim_last_infected_nei',
                    'nber_active_before_infection',
                    'is_seed',
                    'beta',
                    'phi',
                    'type_contagion',
                    'index_node',
                    'rank',
                    'time_infection',
                    'time_awareness',
                    'time_last_post_nohash',
                    'list_nei_time']

    df = pd.DataFrame(columns = name_columns) 

    for k in range(M.N):

        list_nei_time = M.dict_list_stim_nei_time[k]
        
        list_nei_time_only_hashtag = [k for k in list_nei_time if k[2] == 1]
        
        list_nei_time_only_hashtag_only_first_infection = [k for k in list_nei_time if ((k[2] == 1) & (k[3] == 1))]

        list_list_stimuli = [list(Counter([k[0] for k in list_nei_time if k[2] == 1]).values())]

        if len(list_nei_time_only_hashtag)!=0:
            index_last_infected_nei = list_nei_time_only_hashtag_only_first_infection[-1][0]
            nber_stim_last_infected_nei = Counter([k[0] for k in list_nei_time_only_hashtag])[list_nei_time_only_hashtag[-1][0]]
        else:
            nber_stim_last_infected_nei = 0

        df.loc[len(df.index)] = [M.dict_degree[k],   # degree
                                 len(set([k[0] for k in list_nei_time_only_hashtag])),   # nber_infected_neighbors
                                 len(set([k[0] for k in list_nei_time_only_hashtag])) / M.dict_degree[k],    # prop_infected_neighbors 
                                 len(list_nei_time_only_hashtag), # sum_stimuli
                                 len(list_nei_time_only_hashtag) / M.dict_degree[k],    # nber_stimuli_by_nei
                                 [np.std(k) if len(k)!=0 else 0 for k in list_list_stimuli][0], #np.std(list(dict_node_stimuli.values())),   # std stimuli
                                 [max(k) if len(k)!=0 else 0 for k in list_list_stimuli][0],    # max_stimuli_one_nei
                                 nber_stim_last_infected_nei,     # nber_stim_last_infected_nei
                                 M.dict_nber_active_before_infection[k],    # nber_active_before_infection
                                 M.dict_is_seed[k],  # is_seed
                                 M.dict_beta[k],  # beta
                                 M.dict_threshold[k],  # threshold
                                 M.dict_type_contagion[k], # type_contagion
                                 k,  # index_node
                                 M.dict_rank[k],  # rank
                                 M.dict_time_infection[k],  # time_infection
                                 M.dict_time_awareness[k],  # time_awareness
                                 M.last_time_post_nohash[k],   # time_post_last_no_hashtag
                                 list_nei_time]    # list_nei_time
        
    df = df.dropna().reset_index(drop = True)
    
    with open('df_experiment4/df_'+name_dataset+'_ad_premixed_'+network+'_N_'+str(N)+'_it_'+str(it)+'_percentage_'+str(percentage)+'.pickle', 'wb') as handle:
        pickle.dump(df, handle)
        
    return(df)

## B - Test of the codes

In [42]:
network = 'sample_following_fast' #sample_following
mean_degree = 4
N = 10
rate = 0.005
name_dataset = 'GiletsJaunes'
it = 0
percentage= 80

get_data_AD(tuple([name_dataset, network, mean_degree, N, rate, it, percentage]))

1
2
3
4
5
6
7
8
9
in M <Graph object, undirected, with 10 vertices and 9 edges, at 0x7f8e44f1a340>
GiletsJaunes
GiletsJaunes
GiletsJaunes
GiletsJaunes
GiletsJaunes
GiletsJaunes
GiletsJaunes
GiletsJaunes
GiletsJaunes
GiletsJaunes
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nber infected 1
nbe

Unnamed: 0,degree,nber_infected_neighbors,prop_infected_neighbors,sum_stimuli,nber_stimuli_by_nei,std_stimuli,max_stimuli_one_nei,nber_stim_last_infected_nei,nber_active_before_infection,is_seed,beta,phi,type_contagion,index_node,rank,time_infection,time_awareness,time_last_post_nohash,list_nei_time
0,1,1,1.0,6,6.0,0.0,6,6,24,0,0.014082,0.062218,0,0,5.0,408.0,400,385,"[(1, 5, 0, 0), (1, 10, 0, 0), (1, 14, 0, 0), (..."
1,2,0,0.0,0,0.0,0.0,0,0,76,2,0.012027,0.058058,1,1,3.0,376.0,376,374,"[(0, 1, 0, 0), (2, 2, 0, 0), (2, 7, 0, 0), (2,..."
2,2,1,0.5,1,0.5,0.0,1,1,64,0,0.016091,0.109425,1,2,4.0,377.0,376,369,"[(1, 5, 0, 0), (1, 10, 0, 0), (1, 14, 0, 0), (..."
3,2,2,1.0,92,46.0,11.0,57,57,49,0,0.016048,0.049377,0,3,6.0,711.0,695,684,"[(2, 2, 0, 0), (4, 3, 0, 0), (2, 7, 0, 0), (2,..."
4,2,1,0.5,3,1.5,0.0,3,3,8,0,0.015176,0.112138,1,4,2.0,199.0,187,167,"[(5, 13, 0, 0), (5, 19, 0, 0), (5, 22, 0, 0), ..."
5,2,0,0.0,0,0.0,0.0,0,0,25,2,0.014472,0.027311,1,5,1.0,187.0,187,184,"[(4, 3, 0, 0), (4, 17, 0, 0), (6, 81, 0, 0), (..."
6,2,1,0.5,7,3.5,0.0,7,7,24,0,0.01336,0.035812,1,7,8.0,1081.0,999,990,"[(8, 30, 0, 0), (8, 31, 0, 0), (8, 36, 0, 0), ..."
7,2,1,0.5,169,84.5,0.0,169,169,83,0,0.012513,0.019715,0,8,7.0,999.0,995,979,"[(9, 0, 1, 1), (9, 4, 1, 0), (9, 6, 1, 0), (9,..."
8,1,0,0.0,0,0.0,0.0,0,0,0,1,0.016924,0.030386,2,9,0.0,0.0,0,-1,[]


## C - Make the whole data set

In [None]:
network = 'sample_following_fast' #sample_following
mean_degree = 4
N = 100000
rate = 0.005
name_dataset = 'GiletsJaunes'

list_input = []

for it in range(0,10):
    for percentage in [40, 60, 80, 100]:
        list_input.append(tuple([name_dataset, network, mean_degree, N, rate, it, percentage]))

In [None]:
pool = Pool(processes = 10)
pool.map(get_data_AD, list_input)