In [46]:
import networkx as nx
import math
import numpy as np
import matplotlib as mat
import matplotlib.pyplot as plt 
import random
%matplotlib inline

In [47]:
# plot settings
almost_black = '#262626'
plt.rcParams['text.usetex'] = False
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Arial'
plt.rcParams['axes.edgecolor'] = almost_black
plt.rcParams['text.color'] = almost_black
plt.rcParams['axes.linewidth'] = 0.5
plt.rcParams['axes.labelsize'] = 12

Implement an information cascade model, in which the probability of node i activating node j is proportional to the weight of the edge from i to j.

In [79]:
def information_cascade(G,t_tot,init):
    
    t = 0
    
    max_weight = max([e[2]['weight'] for e in G.edges(data=True)])
    
    activation_times = {}
    for i in init:
        activation_times[i]=0
    
    while t<t_tot:
    
        curr_infectious = [n for n in activation_times if activation_times[n]==t]

        for n in curr_infectious:
            for m in G.neighbors(n):
                if m not in activation_times.keys():
#                     p = G[n][m]['weight']
#                     if p>random.uniform(0,1)*max_weight:
                    activation_times[m] = t+1
                        
        t+=1

    return activation_times
    

# Selecting the Initial activation nodes

In [13]:
import pandas as pd

In [14]:
nodes = pd.read_csv("nodes.csv")

In [15]:
nodes.head()

Unnamed: 0,Id,Label,timeset,communityid,twittername,first name,last name,political affiliation,indegree,outdegree,degree,eigencentrality
0,1417929933800751104,,,1.0,,,,,0,0,0,0.0
1,1052290795942731777,,,1.0,,,,,0,0,0,0.0
2,1247907027650838528,,,1.0,,,,,0,0,0,0.0
3,1103476860187369472,,,1.0,,,,,0,0,0,0.0
4,998590102573023232,,,1.0,,,,,0,0,0,0.0


In [16]:
retweeting_activists_mps = nodes.dropna(subset=["political affiliation"])

In [17]:
retweeting_activists_mps.head()

Unnamed: 0,Id,Label,timeset,communityid,twittername,first name,last name,political affiliation,indegree,outdegree,degree,eigencentrality
21,86384661,,,1.0,Yasir_Naqvi,Yasir,Naqvi,Liberal,73,0,73,0.230067
25,2344419362,,,1.0,SalmaZahid15,Salma,Zahid,Liberal,48,3,51,0.169595
61,170377354,,,1.0,AnthonyRota,Anthony,Rota,Liberal,4,0,4,0.012606
79,2942312619,,,1.0,YRobillardPLC,Yves,Robillard,Liberal,0,0,0,0.0
120,3242606862,,,1.0,MMcLeodNWT,Michael,McLeod,Liberal,13,1,14,0.040971


In [18]:
retweeting_activists_mps.describe()

Unnamed: 0,Id,Label,timeset,communityid,indegree,outdegree,degree,eigencentrality
count,282.0,0.0,0.0,277.0,282.0,282.0,282.0,282.0
mean,1.640037e+17,,,2.833935,25.007092,0.570922,25.578014,0.129402
std,4.055762e+17,,,3.159046,29.98493,1.206568,30.40713,0.176837
min,3358671.0,,,1.0,0.0,0.0,0.0,0.0
25%,156740100.0,,,1.0,4.0,0.0,4.0,0.012606
50%,414996600.0,,,2.0,13.5,0.0,14.0,0.056729
75%,2851923000.0,,,3.0,35.0,1.0,35.0,0.169197
max,1.427259e+18,,,21.0,151.0,8.0,151.0,1.0


In [19]:
retweeting_activists_mps = retweeting_activists_mps[retweeting_activists_mps["outdegree"] > 0]

In [20]:
retweeting_activists_mps.head()

Unnamed: 0,Id,Label,timeset,communityid,twittername,first name,last name,political affiliation,indegree,outdegree,degree,eigencentrality
25,2344419362,,,1.0,SalmaZahid15,Salma,Zahid,Liberal,48,3,51,0.169595
120,3242606862,,,1.0,MMcLeodNWT,Michael,McLeod,Liberal,13,1,14,0.040971
152,360677740,,,1.0,lisahepfner2021,Lisa,Hepfner,Liberal,50,1,51,0.15758
364,25813888,,,1.0,,,,Activist Organization,11,1,12,0.034668
519,2322580746,,,1.0,jimcarr_wpg,Jim,Carr,Liberal,4,1,5,0.012606


In [21]:
retweeting_activists_mps.describe()

Unnamed: 0,Id,Label,timeset,communityid,indegree,outdegree,degree,eigencentrality
count,80.0,0.0,0.0,80.0,80.0,80.0,80.0,80.0
mean,1.964261e+17,,,1.85,40.55,2.0125,42.5625,0.231257
std,4.529133e+17,,,1.159441,34.901634,1.496779,35.249506,0.219005
min,16014400.0,,,1.0,1.0,1.0,2.0,0.003152
25%,239227400.0,,,1.0,12.75,1.0,14.0,0.064608
50%,494142600.0,,,1.0,31.0,1.0,33.0,0.165861
75%,2914601000.0,,,3.0,56.0,3.0,58.25,0.334226
max,1.427259e+18,,,5.0,145.0,8.0,148.0,1.0


In [22]:
len(retweeting_activists_mps["outdegree"])

80

## Selecting the targets from the edges

In [26]:
edges = pd.read_csv("../Null Model Comparison/largest_component_networkx_format.csv", names=["Source", "Target", "Weight"])

In [27]:
edges.head()

Unnamed: 0,Source,Target,Weight
0,1385933370090209280,158095776,"{""weight"":1}"
1,1416535505454338050,158095776,"{""weight"":8}"
2,1416535505454338050,1604931252,"{""weight"":1}"
3,544695802,158095776,"{""weight"":1}"
4,716257068538327040,158095776,"{""weight"":1}"


In [30]:
relevant_paths = pd.merge(edges, retweeting_activists_mps, left_on='Source', right_on='Id')

In [31]:
relevant_paths.head()

Unnamed: 0,Source,Target,Weight,Id,Label,timeset,communityid,twittername,first name,last name,political affiliation,indegree,outdegree,degree,eigencentrality
0,150270263,18681111,"{""weight"":1}",150270263,,,1.0,,,,Activist Organization,7,1,8,0.037561
1,29545977,59686058,"{""weight"":1}",29545977,,,2.0,,,,Activist Organization,2,1,3,0.006303
2,25813888,150270263,"{""weight"":1}",25813888,,,1.0,,,,Activist Organization,11,1,12,0.034668
3,577727470,119925381,"{""weight"":1}",577727470,,,2.0,,,,Activist Organization,10,1,11,0.031516
4,294660973,294660973,"{""weight"":1}",294660973,,,5.0,,,,Activist Organization,3,1,4,0.014769


In [35]:
activation_nodes = relevant_paths['Target'].unique()

In [36]:
activation_nodes

array([           18681111,            59686058,           150270263,
                 119925381,           294660973,            14079041,
                  17969963,          2715275551,           377588094,
       1342125115383939073,           273262205,            24990450,
                3025416359, 1425866189780160514,          2530008414,
                2800741820,            16014404, 1063494232126689280,
                 408072407,           234550882,          3402128080,
                 803381983,           261772246,           417389780,
       1170770038208565248,          1707636642, 1143229947932229632,
                2254171724,          2555308646,           739149720,
        989311745100566529,           268832287,  883774859452579840,
                  15810950,           129395750,  791282631006621696,
                 256552850, 1086084557009575936,            36133644,
                  34606493,           564207331,           414218319,
        943174774154

In [37]:
len(activation_nodes)

91

The edges in this network are going to be flipped so we can let information be travel down to those the person was retweeted by

In [38]:
edges.head()

Unnamed: 0,Source,Target,Weight
0,1385933370090209280,158095776,"{""weight"":1}"
1,1416535505454338050,158095776,"{""weight"":8}"
2,1416535505454338050,1604931252,"{""weight"":1}"
3,544695802,158095776,"{""weight"":1}"
4,716257068538327040,158095776,"{""weight"":1}"


In [40]:
edges = edges[['Target', 'Source', 'Weight']]

In [41]:
edges 


Unnamed: 0,Target,Source,Weight
0,158095776,1385933370090209280,"{""weight"":1}"
1,158095776,1416535505454338050,"{""weight"":8}"
2,1604931252,1416535505454338050,"{""weight"":1}"
3,158095776,544695802,"{""weight"":1}"
4,158095776,716257068538327040,"{""weight"":1}"
...,...,...,...
13996,196717787,77596220,"{""weight"":1}"
13997,196717787,891058415195303939,"{""weight"":1}"
13998,196717787,1095768409449459712,"{""weight"":1}"
13999,196717787,881426857,"{""weight"":1}"


In [42]:
edges = edges.set_index("Target")

In [44]:
edges.to_csv("Source_retweeted_by_target.csv")

# Running an information cascade 

In [49]:
G = nx.read_edgelist('Source_retweeted_by_target.csv', comments='#',
                     create_using=nx.DiGraph(), 
                     delimiter=','"", 
                     nodetype=int, 
                     encoding='utf-8')

Running an initial cascade, starting with the nodes where the path lengths are going to be more than 1 
- G: the Graph with the edges reversed to allow for information to flow down to the retweeters
- Time: let this initally be 4 to see what we get with little spread
- activation nodes: These are the nodes where another MP or activist organization has retweeted them

In [76]:
t5 = information_cascade(G,5,activation_nodes)

In [77]:
t5

{18681111: 0,
 59686058: 0,
 150270263: 0,
 119925381: 0,
 294660973: 0,
 14079041: 0,
 17969963: 0,
 2715275551: 0,
 377588094: 0,
 1342125115383939073: 0,
 273262205: 0,
 24990450: 0,
 3025416359: 0,
 1425866189780160514: 0,
 2530008414: 0,
 2800741820: 0,
 16014404: 0,
 1063494232126689280: 0,
 408072407: 0,
 234550882: 0,
 3402128080: 0,
 803381983: 0,
 261772246: 0,
 417389780: 0,
 1170770038208565248: 0,
 1707636642: 0,
 1143229947932229632: 0,
 2254171724: 0,
 2555308646: 0,
 739149720: 0,
 989311745100566529: 0,
 268832287: 0,
 883774859452579840: 0,
 15810950: 0,
 129395750: 0,
 791282631006621696: 0,
 256552850: 0,
 1086084557009575936: 0,
 36133644: 0,
 34606493: 0,
 564207331: 0,
 414218319: 0,
 943174774154498048: 0,
 412708728: 0,
 2852899113: 0,
 240786249: 0,
 1318671397560979456: 0,
 2937436849: 0,
 283226685: 0,
 45848808: 0,
 720579941184757760: 0,
 22849568: 0,
 1899063048: 0,
 579377522: 0,
 1646334073: 0,
 2242940071: 0,
 3257047456: 0,
 20199202: 0,
 2891740872: 

In [57]:
t6 = information_cascade(G,6,activation_nodes)

In [78]:
len(t5)

241

In [58]:
t6

{18681111: 0,
 59686058: 0,
 150270263: 0,
 119925381: 0,
 294660973: 0,
 14079041: 0,
 17969963: 0,
 2715275551: 0,
 377588094: 0,
 1342125115383939073: 0,
 273262205: 0,
 24990450: 0,
 3025416359: 0,
 1425866189780160514: 0,
 2530008414: 0,
 2800741820: 0,
 16014404: 0,
 1063494232126689280: 0,
 408072407: 0,
 234550882: 0,
 3402128080: 0,
 803381983: 0,
 261772246: 0,
 417389780: 0,
 1170770038208565248: 0,
 1707636642: 0,
 1143229947932229632: 0,
 2254171724: 0,
 2555308646: 0,
 739149720: 0,
 989311745100566529: 0,
 268832287: 0,
 883774859452579840: 0,
 15810950: 0,
 129395750: 0,
 791282631006621696: 0,
 256552850: 0,
 1086084557009575936: 0,
 36133644: 0,
 34606493: 0,
 564207331: 0,
 414218319: 0,
 943174774154498048: 0,
 412708728: 0,
 2852899113: 0,
 240786249: 0,
 1318671397560979456: 0,
 2937436849: 0,
 283226685: 0,
 45848808: 0,
 720579941184757760: 0,
 22849568: 0,
 1899063048: 0,
 579377522: 0,
 1646334073: 0,
 2242940071: 0,
 3257047456: 0,
 20199202: 0,
 2891740872: 

In [64]:
t7 = information_cascade(G,7,activation_nodes)

In [65]:
t7

{18681111: 0,
 59686058: 0,
 150270263: 0,
 119925381: 0,
 294660973: 0,
 14079041: 0,
 17969963: 0,
 2715275551: 0,
 377588094: 0,
 1342125115383939073: 0,
 273262205: 0,
 24990450: 0,
 3025416359: 0,
 1425866189780160514: 0,
 2530008414: 0,
 2800741820: 0,
 16014404: 0,
 1063494232126689280: 0,
 408072407: 0,
 234550882: 0,
 3402128080: 0,
 803381983: 0,
 261772246: 0,
 417389780: 0,
 1170770038208565248: 0,
 1707636642: 0,
 1143229947932229632: 0,
 2254171724: 0,
 2555308646: 0,
 739149720: 0,
 989311745100566529: 0,
 268832287: 0,
 883774859452579840: 0,
 15810950: 0,
 129395750: 0,
 791282631006621696: 0,
 256552850: 0,
 1086084557009575936: 0,
 36133644: 0,
 34606493: 0,
 564207331: 0,
 414218319: 0,
 943174774154498048: 0,
 412708728: 0,
 2852899113: 0,
 240786249: 0,
 1318671397560979456: 0,
 2937436849: 0,
 283226685: 0,
 45848808: 0,
 720579941184757760: 0,
 22849568: 0,
 1899063048: 0,
 579377522: 0,
 1646334073: 0,
 2242940071: 0,
 3257047456: 0,
 20199202: 0,
 2891740872: 

In [68]:
t8 = information_cascade(G,8,activation_nodes)

In [69]:
t8

{18681111: 0,
 59686058: 0,
 150270263: 0,
 119925381: 0,
 294660973: 0,
 14079041: 0,
 17969963: 0,
 2715275551: 0,
 377588094: 0,
 1342125115383939073: 0,
 273262205: 0,
 24990450: 0,
 3025416359: 0,
 1425866189780160514: 0,
 2530008414: 0,
 2800741820: 0,
 16014404: 0,
 1063494232126689280: 0,
 408072407: 0,
 234550882: 0,
 3402128080: 0,
 803381983: 0,
 261772246: 0,
 417389780: 0,
 1170770038208565248: 0,
 1707636642: 0,
 1143229947932229632: 0,
 2254171724: 0,
 2555308646: 0,
 739149720: 0,
 989311745100566529: 0,
 268832287: 0,
 883774859452579840: 0,
 15810950: 0,
 129395750: 0,
 791282631006621696: 0,
 256552850: 0,
 1086084557009575936: 0,
 36133644: 0,
 34606493: 0,
 564207331: 0,
 414218319: 0,
 943174774154498048: 0,
 412708728: 0,
 2852899113: 0,
 240786249: 0,
 1318671397560979456: 0,
 2937436849: 0,
 283226685: 0,
 45848808: 0,
 720579941184757760: 0,
 22849568: 0,
 1899063048: 0,
 579377522: 0,
 1646334073: 0,
 2242940071: 0,
 3257047456: 0,
 20199202: 0,
 2891740872: 

In [70]:
len(t8)

221

In [72]:
len(t6)

233

In [73]:
t6.to_df(axis=1, columns=["id", "activation_time"])

AttributeError: 'dict' object has no attribute 'to_df'

In [80]:
activation_times = pd.DataFrame.from_dict(t6, orient='index',
                       columns=[ "activation_time"])

In [81]:
activation_times.to_csv("activation_times.csv")