In [1]:
import matplotlib
#from graph_tool.all import *
import numpy as np
import pickle
import matplotlib.pyplot as plt
from IPython.display import Image

In [2]:
# datasets/facebook_combined.txt is from https://snap.stanford.edu/data/ego-Facebook.html
DATASET_PATH = "datasets/facebook_combined.txt"; DIRECTED_EDGES = False

# datasets/twitter_combined.txt is from https://snap.stanford.edu/data/ego-Twitter.html
DATASET_PATH = "datasets/twitter_combined.txt"; DIRECTED_EDGES = True

with open(DATASET_PATH,"r") as f:
    edges = np.fromiter((int(n) for x in f.readlines() for n in x.split()), dtype='int')
    
# remap vertices to their indices
unique_vertices = np.unique(edges)
num_vertices = len(unique_vertices)
vertex_mapping = {vertex: i for i, vertex in enumerate(unique_vertices)}
edges = np.fromiter((vertex_mapping[vertex] for vertex in edges), dtype='int')
edges = edges.reshape((-1, 2))
del unique_vertices
del vertex_mapping

# Build graph
g = Graph(directed=DIRECTED_EDGES)  # NOTE: Twitter dataset is directed, Facebook dataset is undirected
g.add_vertex(num_vertices)
g.add_edge_list(edges)
g.vp.colour = g.new_vertex_property("vector<double>")
g.vp.state = g.new_vertex_property("int16_t")
g.vp.is_new_believer = g.new_vertex_property("bool")

plt.figure(figsize=(15, 12))
plt.hist(g.get_out_degrees(g.get_vertices()),bins=100)
plt.xlabel('Degree')
plt.ylabel('Frequency')
plt.title('Degree Distribution')
plt.savefig('sim_output/degree_distribution.png')
plt.show()

p = g.num_vertices()
q = g.num_edges()
density = q  /  (p * (p - 1) / 2)
print('There are {0} vertices'.format(p))
print('There are {0} edges'.format(q))
print('Graph density is {0}'.format(density))

FileNotFoundError: [Errno 2] No such file or directory: 'datasets/twitter_combined.txt'

In [3]:
clustering_coefficient, clustering_coefficient_stddev = global_clustering(g)
print(f"Global clustering coefficient: {clustering_coefficient} (stddev: {clustering_coefficient_stddev})")

NameError: name 'global_clustering' is not defined

In [4]:
pos = graph_tool.draw.sfdp_layout(g)

NameError: name 'graph_tool' is not defined

In [5]:
c = closeness(g)
graph_draw(
    g, 
    pos=pos,
    vertex_fill_color=c,
    vertex_size=prop_to_size(c, mi=1, ma=4),
    vcmap=matplotlib.cm.gist_heat,
    edge_pen_width=0.3,
    vorder=c, 
    output_size=(5000, 5000),
    bg_color=[1,1,1,0.8],
    output="sim_output/SNAP_closeness.png"
)
Image(filename="sim_output/SNAP_closeness.png")

NameError: name 'closeness' is not defined

In [6]:
state = minimize_blockmodel_dl(g)
state.draw(pos=pos, bg_color=[0,0,0,1], output_size=(5000, 5000), output="sim_output/SNAP_blockmodel.png",)
Image(filename="sim_output/SNAP_blockmodel.png")

NameError: name 'minimize_blockmodel_dl' is not defined

## Monte Carlo Simulation

In [7]:
from datetime import datetime
from collections import namedtuple
from enum import Enum

np.random.seed(420)

class UserState(Enum):
    IGNORANT = 0
    SPREADER = 1
    SUPERSPREADER = 2
    STIFLER = 3

BACKGROUND_COLOR = [1, 1, 1, 1]
USER_STATE_COLORS = {
    UserState.IGNORANT: [0.2, 0.8, 0.2, 0.4],
    UserState.SPREADER: [1, 0, 0, 0.7],
    UserState.SUPERSPREADER: [1, 0, 1, 1],
    UserState.STIFLER: [0, 0, 1, 0.4],
}
NEW_BELIEVER_HALO_COLOR = [1, 0.8, 0, 0.9]
EDGE_COLOR = [0, 0, 0, 0.05]

IMAGE_SIZE = (2000, 2000)

SimulationResult = namedtuple("SimulationResult", ["first_spreader_idx", "first_spreader_degree", "time_step", "rumor_size"])

def simulate_rumour_spread(graph, model_parameters):
    time_step = 0
    yield time_step, graph.vp.state.a, graph.vp.is_new_believer.a
#     while True: # use this instead if "stifler_forget_probability" > 0 
    while np.sum(graph.vp.state.a == UserState.SPREADER.value) > 0:
        time_step += 1
        graph.vp.is_new_believer.a = False

        all_spreaders = np.transpose(((graph.vp.state.a == UserState.SPREADER.value) | (graph.vp.state.a == UserState.SUPERSPREADER.value)).nonzero()).flatten()
        all_stiflers = np.transpose((graph.vp.state.a == UserState.STIFLER.value).nonzero()).flatten()
        np.random.shuffle(all_spreaders)
        np.random.shuffle(all_stiflers)
        for spreader in all_spreaders: # visit every spreader, in random order
            friends = graph.get_in_neighbours(spreader)
            friends_state = graph.vp.state.a[friends]
            ignorant_friends = friends[friends_state == UserState.IGNORANT.value]
            spreader_friends = friends[(friends_state == UserState.SPREADER.value) | (friends_state == UserState.SUPERSPREADER.value)]
            stifler_friends = friends[friends_state == UserState.STIFLER.value]

            # when we share the rumour with ignorant friends, some believe it and become spreaders, and some do not and become stiflers
            random_threshold = np.random.random(len(ignorant_friends))
            new_believers = ignorant_friends[random_threshold < model_parameters['ignorant_believe_probability']]
            new_disbelievers = ignorant_friends[random_threshold >= model_parameters['ignorant_believe_probability']]
            graph.vp.state.a[new_believers] = UserState.SPREADER.value
            graph.vp.state.a[new_disbelievers] = UserState.STIFLER.value
            graph.vp.is_new_believer.a[new_believers] = True

            if graph.vp.state.a[spreader] != UserState.SUPERSPREADER.value:  # bots never forget or disbelieve, they just spread rumour
                # when we share the rumour with spreader friends, if any have a different take on the rumour, we start to doubt the rumour and become a stifler
                # we assume each interaction is independent, so for N spreader friends this is P(X > 0) = 1 - P(X = 0) where X ~ Binomial(N, p), so 1 - (1 - p)^n
                if len(spreader_friends) > 0 and np.random.random() < 1 - (1 - model_parameters['spreader_disagreement_probability']) ** len(spreader_friends):
                    graph.vp.state.a[spreader] = UserState.STIFLER.value

                # when we share the rumour with stifler friends, each one has a small chance of convincing us to doubt the rumour and become a stifler
                # we assume each interaction is independent, so for N stifler friends this is P(X > 0) = 1 - P(X = 0) where X ~ Binomial(N, p), so 1 - (1 - p)^n
                if len(stifler_friends) > 0 and np.random.random() < 1 - (1 - model_parameters['spreader_disillusionment_probability']) ** len(stifler_friends):
                    graph.vp.state.a[spreader] = UserState.STIFLER.value

                # there's a random chance that we'll just forget about or lose interest in the rumour
                if np.random.random() < model_parameters['spreader_forget_probability']:
                    graph.vp.state.a[spreader] = UserState.IGNORANT.value
        for stifler in all_stiflers: # visit every stifler, in random order
            # there's a random chance that we'll just forget about the rumour entirely
            if np.random.random() < model_parameters['stifler_forget_probability']:
                graph.vp.state.a[stifler] = UserState.IGNORANT.value
        yield time_step, graph.vp.state.a, graph.vp.is_new_believer.a


def run_simulations(graph, model_parameters, num_sims, draw=False, pos=None):
    sims = []
    if draw and pos is None:
        pos = graph_tool.draw.sfdp_layout(graph)
    for i in range(num_sims):
        if i % 25 == 0:
            print(f'{datetime.now()} RUNNING SIMULATION {i}')

        # initially, no users in the network know about the rumour
        graph.vp.state.a = UserState.IGNORANT.value

        # introduce a single superspreader who introduces the rumour to the network
        # this is essentially a bot that keeps pumping out the rumour nonstop
        # superspreader = np.random.randint(0, graph.num_vertices())
        # graph.vp.state.a[superspreader] = UserState.SUPERSPREADER.value
        
        # introduce the first spreader
        firstspreader = np.random.randint(0, graph.num_vertices())
        graph.vp.state.a[firstspreader] = UserState.SPREADER.value


        heard_rumour_fraction = []
        max_time_step = 0
        for time_step, state, is_new_believer in simulate_rumour_spread(g, model_parameters):
            heard_rumour_fraction.append(np.mean(state != UserState.IGNORANT.value))
            max_time_step = time_step
            if time_step > 200:
                break

            if draw:
                deg = graph.degree_property_map("out")
                
                print(f'{datetime.now()} DRAWING RESULT OF SIMULATION {i} AT TIME {time_step}')
                for v in graph.vertices():
                    graph.vp.colour[v] = USER_STATE_COLORS[UserState(graph.vp.state[v])]
                graph_draw(
                    graph, pos=pos,
                    vertex_fill_color=graph.vp.colour, vertex_color=graph.vp.colour, vertex_size=prop_to_size(deg, mi=1, ma=40),
                    output_size=IMAGE_SIZE, bg_color=BACKGROUND_COLOR,
                    edge_pen_width=0.5, edge_color=EDGE_COLOR,
                    vertex_halo=graph.vp.is_new_believer, vertex_halo_color=NEW_BELIEVER_HALO_COLOR,
                    output=f'sim_output/sir_{i}_{time_step:03}.png'
                )

#         sims += [SimulationResult(superspreader, len(graph.get_in_neighbours(superspreader)), max_time_step, heard_rumour_fraction)]
        sims += [SimulationResult(firstspreader, len(graph.get_in_neighbours(firstspreader)), max_time_step, heard_rumour_fraction)]
    return sims


model_parameters = {
    # when someone hears the rumour for the first time, 20% of the time they will believe it
    "ignorant_believe_probability": 0.2,

    # when spreader interacts with another spreader, 5% of the time the other spreader will have a different take on the rumour and the original spreader will become disillusioned
    "spreader_disagreement_probability": 0.05,

    # when a spreader interacts with a stifler, 10% of the time the spreader will become disillusioned by the stifler
    "spreader_disillusionment_probability": 0.1,

    # at each timestep, a spreader has a 30% chance of forgetting or losing interest in the rumour
    "spreader_forget_probability": 0.3,
    
    # at each timestep, a stifler has a 5% chance of forgetting the rumour entirely
#     "stifler_forget_probability": 0.05,
    "stifler_forget_probability": 0,
}

In [8]:
print(datetime.now(), "START")
sims = run_simulations(g, model_parameters, num_sims=500)
print(datetime.now(), "DONE")

# Pickle sim data to a file
with open("sim_output/sims_test.dat", "wb") as f:
    pickle.dump(sims, f)

2022-04-23 18:51:43.777682 START


NameError: name 'g' is not defined

In [9]:
print(datetime.now(), "START")
sims = run_simulations(g, model_parameters, num_sims=2, draw=True)
print(datetime.now(), "DONE")

2022-04-23 18:51:43.954141 START


NameError: name 'g' is not defined

## Analysis

In [10]:
# Load simulation results file
with open("sim_output/sims_test.dat", "rb") as f:
    sims = pickle.load(f)

print(len(sims), "simulations loaded")

max_time_step = max(sim.time_step for sim in sims)
print("max time steps", max_time_step)

FileNotFoundError: [Errno 2] No such file or directory: 'sim_output/sims_test.dat'

In [11]:
plt.figure(figsize=(15, 12))

for sim in sims:
    plt.plot(sim.rumor_size, alpha=0.1, color='b')

plt.ylim(0,1)
plt.xticks(range(max_time_step + 2))
plt.title(f'Rumor size over time, {len(sims)} simulations')
plt.xlabel('timestep ($t$)')
plt.ylabel('total rumor size')
plt.savefig('sim_output/rumor_size_over_time.png')
plt.show()

NameError: name 'sims' is not defined

<Figure size 1080x864 with 0 Axes>

In [12]:
plt.figure(figsize=(15, 12))

large_sims = [sim for sim in sims if max(sim.rumor_size) > 0.4]
for sim in large_sims:
    plt.plot(sim.rumor_size,alpha=0.1,color='b')

plt.ylim(0, 1)
plt.xticks(range(max_time_step + 2))
plt.title(f'Rumor size over time, all {len(large_sims)} (of {len(sims)}) simulations that reach 40%+ of the population')
plt.xlabel('timestep ($t$)')
plt.ylabel('total rumor size')
plt.savefig('sim_output/rumor_size_over_time_large_sims_only.png')

plt.show()

NameError: name 'sims' is not defined

<Figure size 1080x864 with 0 Axes>

In [13]:
plt.figure(figsize=(15, 12))

plt.hist([sum(sim.rumor_size) / len(sim.rumor_size) for sim in sims], bins=30)

plt.title('Distribution of average rumour sizes')
plt.xlabel('average rumour size')
plt.ylabel('frequency')
plt.savefig('sim_output/rumour_size_distribution.png')
plt.show()

NameError: name 'sims' is not defined

<Figure size 1080x864 with 0 Axes>

In [14]:
plt.figure(figsize=(15, 12))

plt.scatter([sim.first_spreader_degree for sim in sims], [sum(1 for s in sim.rumor_size if s > 0.5) for sim in sims], alpha=0.3)

plt.title('Degree of first spreader vs. number of days rumour size is 50%+')
plt.xlabel('degree of first spreader')
plt.ylabel('days rumour size is 50%+')
plt.savefig('sim_output/first_spreader_degree_vs_large_rumour_days.png')
plt.show()

NameError: name 'sims' is not defined

<Figure size 1080x864 with 0 Axes>

In [15]:
plt.figure(figsize=(15, 12))

plt.scatter([sim.first_spreader_degree for sim in sims], [sum(sim.rumor_size) / len(sim.rumor_size) for sim in sims], alpha=0.3)

plt.title('Degree of first spreader vs. average rumour size')
plt.xlabel('degree of first spreader')
plt.ylabel('average rumour size')
plt.savefig('sim_output/first_spreader_degree_vs_rumour_size.png')
plt.show()

NameError: name 'sims' is not defined

<Figure size 1080x864 with 0 Axes>