## Load libraries and relevant data

In [1]:
import networkx as nx
import pickle
import itertools
import numpy as np
import pandas as pd


from tqdm import trange, tqdm

In [2]:
clusters = [13,48,6,49,104]
labels = ["Democrats","Republicans","Unorthodox","Public Health","Antivaxxers"]

In [3]:
com_data = pd.read_csv("PT-pruned-louvain.gexf.csv")
data = pd.read_csv('community_engagement2.csv')

In [4]:
def get_com_nodes(com):
    com_nodes = []
    subset = com_data[com_data['modularity_class']==com]
    for i in tqdm(subset['Id']):
        com_nodes.append(i)
    return com_nodes

In [5]:
len(get_com_nodes(13))

100%|██████████| 91792/91792 [00:00<00:00, 2788870.36it/s]


91792

In [6]:
data

Unnamed: 0,Retweeted Community Id,Retweeted Label,Retweeting Community Id,Retweeting Label,All Interaction,Before Interaction,After Interaction,Rate_Increase
0,13,Democrats,13,Democrats,826825,187173,639652,3.417437
1,13,Democrats,48,Republicans,22178,5401,16777,3.106277
2,13,Democrats,6,Unorthodox,32809,6443,26366,4.092193
3,13,Democrats,49,Public Health,51264,10914,40350,3.697086
4,13,Democrats,104,Antivaxxers,9257,2111,7146,3.385126
5,48,Republicans,13,Democrats,10982,1989,8993,4.521368
6,48,Republicans,48,Republicans,1446344,147717,1298627,8.791317
7,48,Republicans,6,Unorthodox,8874,874,8000,9.153318
8,48,Republicans,49,Public Health,3456,528,2928,5.545455
9,48,Republicans,104,Antivaxxers,158507,14981,143526,9.580535


In [7]:
df = pd.DataFrame(columns = labels, index = labels)
eng = data[data['Retweeted Label']==labels[0]].iloc[4][4] 
df[labels[0]][4]=eng
df

Unnamed: 0,Democrats,Republicans,Unorthodox,Public Health,Antivaxxers
Democrats,,,,,
Republicans,,,,,
Unorthodox,,,,,
Public Health,,,,,
Antivaxxers,9257.0,,,,


In [23]:
# 4: All interaction, 5: before, 6:after
# Row is retweeting
# column is retweeted
def get_interactions(k):
    df = pd.DataFrame(columns = labels, index = labels)
    for j in range(len(labels)): #j is retweeted, i is retweeting
        for i in range(len(labels)):
            eng = data[data['Retweeted Label']==labels[j]].iloc[i][k] 
            df[labels[j]][i]=eng
    return df

In [28]:
df1 = get_interactions(5)
df2 = get_interactions(6)
df1.to_csv("before_engagement.csv", index=True)
df2.to_csv("after_engagement.csv", index=True)

In [25]:
get_interactions(5)['Democrats']['Antivaxxers']

2111

## Generate Community Networks

In [11]:
def create_empty():
    G = nx.DiGraph()
    for index in range(len(clusters)):
        cluster = clusters[index]
        G.add_node(cluster,size=0)
        G.nodes[cluster]['label']=labels[index]
        G.nodes[cluster]['size']=len(get_com_nodes(cluster))
    return(G)    

In [16]:
def add_edges(G,i): # 4: All interaction, 5: before, 6:after
    for index1 in range(len(clusters)): # retweeting
        for index2 in range(len(clusters)): # retweeted
            G.add_edge(clusters[index1],clusters[index2],weight=get_interactions(i)[labels[index2]][labels[index1]])
    return(G)

In [12]:
G=create_empty()

100%|██████████| 91792/91792 [00:00<00:00, 2704721.30it/s]
100%|██████████| 70247/70247 [00:00<00:00, 2855731.80it/s]
100%|██████████| 60027/60027 [00:00<00:00, 2735843.68it/s]
100%|██████████| 49701/49701 [00:00<00:00, 2618923.88it/s]
100%|██████████| 29018/29018 [00:00<00:00, 2636819.48it/s]


In [17]:
G = add_edges(G,4)
nx.write_gexf(G,'All_Interactions.gexf')

In [18]:
G = add_edges(G,5)
nx.write_gexf(G,'Before_Interactions.gexf')

In [19]:
G = add_edges(G,6)
nx.write_gexf(G,'After_Interactions.gexf')