<a id='top'></a>
# Extract Data for Final Plots of the Paper
This noteboook will create the CSV files needed to generate the plots presented in the paper.

Data files available:

1. [Followers](#followers)
1. [Follow-Back](#follow_back)
1. [Bot score of connections](#botscore)
1. [Ego Networks](#ego)
1. [Exposure to echo chamber](#echo_chamber)
1. [Exposure to low credibity content](#misinformation)
1. [Political Valence and Algorithmic Bias](#political_valence)

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.insert(1, '../exps/')
import posgres_helper as db_helper
sys.path.insert(1, '../metric/')
import time_series_scores as ts_helper
import plot_helper as plt_helper
from collections import OrderedDict
import pandas as pd
import numpy as np
from datetime import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import dates
import glob
import networkx as nx
import json
import hashlib
%matplotlib inline


In [2]:
conn = db_helper.connect_db()

### Personalized data - NEED INPUT

In [3]:
GLOB_TO_EGO_NET = "../data/to_delete/ego_network_graph/*.gexf"

BOTS_RENAME = OrderedDict()
BOTS_RENAME["<DRIFTER_SCREENAME_1>"] = "bot1"
BOTS_RENAME["<DRIFTER_SCREENAME_2>"] = "bot2"
BOTS_RENAME["<DRIFTER_SCREENAME_3>"] = "bot3"
BOTS_RENAME["<DRIFTER_SCREENAME_4>"] = "bot4"
BOTS_RENAME["<DRIFTER_SCREENAME_5>"] = "bot5"
BOTS_RENAME["<DRIFTER_SCREENAME_6>"] = "bot6"
BOTS_RENAME["<DRIFTER_SCREENAME_7>"] = "bot7"
BOTS_RENAME["<DRIFTER_SCREENAME_8>"] = "bot8"
BOTS_RENAME["<DRIFTER_SCREENAME_9>"] = "bot9"
BOTS_RENAME["<DRIFTER_SCREENAME_10>"] = "bot10"
BOTS_RENAME["<DRIFTER_SCREENAME_11>"] = "bot11"
BOTS_RENAME["<DRIFTER_SCREENAME_12>"] = "bot12"
BOTS_RENAME["<DRIFTER_SCREENAME_13>"] = "bot13"
BOTS_RENAME["<DRIFTER_SCREENAME_14>"] = "bot14"
BOTS_RENAME["<DRIFTER_SCREENAME_15>"] = "bot15"

INIT_SEED_MAP = {
  'thenation': ['<DRIFTER_SCREENAME_1>', '<DRIFTER_SCREENAME_2>', '<DRIFTER_SCREENAME_3>'],
  'washingtonpost': ['<DRIFTER_SCREENAME_4>', '<DRIFTER_SCREENAME_5>', '<DRIFTER_SCREENAME_6>'],
  'USATODAY': ['<DRIFTER_SCREENAME_7>', '<DRIFTER_SCREENAME_8>', '<DRIFTER_SCREENAME_9>'],
  'WSJ': ['<DRIFTER_SCREENAME_10>', '<DRIFTER_SCREENAME_11>', '<DRIFTER_SCREENAME_12>'],
  'BreitbartNews': ['<DRIFTER_SCREENAME_13>', '<DRIFTER_SCREENAME_14>', '<DRIFTER_SCREENAME_15>']
}

[back to top](#top) <a id='followers'></a>
## Followers           


In [4]:
followers_query = """
select b.screen_name, c.date, 
COUNT(c.t_usr_id_conn) from (
   select distinct date_trunc('day', time) as date,
                   t_usr_id_ego,
                   conn_type, no_connctions,
                   t_usr_id_conn from connections
) as c
inner join bot b on b.twitter_user_id= c.t_usr_id_ego
where c.conn_type is true and c.no_connctions is false 
group by c.t_usr_id_ego, b.screen_name, date
order by c.t_usr_id_ego, date;
"""

In [5]:
df = db_helper.getDataframeFromQuery(conn, followers_query)
df = df.pivot(index="date",columns="screen_name",values="count")
# anonymize bots
df.columns = [BOTS_RENAME.get(c,c) for c in df.columns]

In [6]:
df.info()
# df[df.index<dt(2019,12,2)].to_csv("followers_data.csv")
df.to_csv("followers_data.csv")
df.head(3)

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 168 entries, 2019-07-13 to 2020-01-14
Data columns (total 15 columns):
bot11    163 non-null float64
bot14    163 non-null float64
bot12    163 non-null float64
bot13    163 non-null float64
bot10    163 non-null float64
bot7     164 non-null float64
bot2     165 non-null float64
bot5     163 non-null float64
bot6     149 non-null float64
bot15    162 non-null float64
bot1     161 non-null float64
bot4     163 non-null float64
bot3     163 non-null float64
bot8     163 non-null float64
bot9     164 non-null float64
dtypes: float64(15)
memory usage: 21.0 KB


Unnamed: 0_level_0,bot11,bot14,bot12,bot13,bot10,bot7,bot2,bot5,bot6,bot15,bot1,bot4,bot3,bot8,bot9
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2019-07-13,1.0,,1.0,3.0,,1.0,,,,1.0,,2.0,1.0,1.0,
2019-07-14,,1.0,,,1.0,,1.0,,,,,,,,1.0
2019-07-15,,,,,,,2.0,,,,,,,,


[back to top](#top) <a id='follow_back'></a>
## Follow-back           


In [None]:
# find connection with conn_type and has tweet_update_time
connections_sql = """
    select 
        b.screen_name as t_usr_id_ego, t_usr_id_conn, conn_type, time::TIMESTAMP::DATE 
    from connections as c
    inner join bot b on b.twitter_user_id= c.t_usr_id_ego
    where 
        conn_tweet_update_time is not null 
        and time < DATE('{}')
    order by time;
""".format("2019/12/2")
connections_df = db_helper.getDataframeFromQuery(conn, connections_sql)
connections_df.time = pd.to_datetime(connections_df.time)
connections_df.t_usr_id_ego = connections_df.t_usr_id_ego.apply(lambda x: BOTS_RENAME.get(x,x))
connections_df = connections_df.groupby([
    "t_usr_id_ego",
    "conn_type",
    "time"
]).t_usr_id_conn.unique().reset_index()

In [None]:
connections_df.conn_type = connections_df.conn_type.apply(
    lambda x: "follower" if x else "friend"
)
connections_df = connections_df.groupby(
    ["t_usr_id_ego","conn_type"]
).t_usr_id_conn.apply(
    lambda conn_ids: pd.Series(np.concatenate(conn_ids.values)).unique()
).apply(pd.Series).stack().rename("user_id").to_frame()#.transpose()
connections_df = connections_df.droplevel(2)
connections_df["has_conn"]=1
connections_df = connections_df.set_index("user_id",append=True).unstack(level=[1,0]).fillna(0)

# count the number of shared account and normalize by the total
connections_df = connections_df.T.dot(connections_df).div(connections_df.sum()).droplevel(0).droplevel(0,axis=1)
connections_df.head()

In [None]:
res=[]
for label,rel in zip(
    ["follow_back","friend_follow"],
    [("follower","friend"),
    ("friend","follower")]
):
    # rel=("friend","follower")
    relation_overlap={}
    for k,v in plt_helper.INIT_SEED_MAP.items():
    #         print(k,v)
        relation_overlap[plt_helper.INIT_SEED_RENAME[k]] = connections_df.loc[rel].loc[v].replace(1,np.nan).apply(np.nanmean)

    relation_overlap2={}
    relation_overlap3={}
    for k,v in plt_helper.INIT_SEED_MAP.items():
        relation_overlap2[plt_helper.INIT_SEED_RENAME[k]] = pd.DataFrame(relation_overlap).loc[v].apply(np.nanmean)
        relation_overlap3[plt_helper.INIT_SEED_RENAME[k]] = pd.DataFrame(relation_overlap).loc[v].apply(np.nanstd)

    mean=pd.Series()
    std = pd.Series()
    for k,v in relation_overlap2.items():
        mean.loc[k] = v[k]
        std.loc[k] = relation_overlap3[k][k]

    tmp2 = pd.DataFrame([mean,std]).transpose().rename(columns={0:"mean",1:"std"})
    tmp2.columns = pd.MultiIndex.from_product([[label],tmp2.columns])
    res.append(tmp2)

res = pd.concat(res,axis=1)
res.to_csv("follow_back_data.csv")
res

[back to top](#top) 
<a id='botscore'></a>
## Bot Score           
To compute the botscore summary, we need to read a csv file containing a list of bot score for each friend and follower of the seeds and drifter bots.

In this exercise, our input file is located in `data/connections_botscores.csv`

In [7]:
connections_overall_botscores = pd.read_csv(
    "connections_botscores.csv",
    index_col=0, 
    header=[0, 1], 
    skipinitialspace=True
)
connections_overall_botscores.info()

overall_botscore={}
colors_for_seeds={}
conn_botscore = []
for relationship in ["friend","follower"]:
    
    for seed in plt_helper.INIT_SEED_MAP.keys():
        overall_botscore[plt_helper.INIT_SEED_RENAME.get(seed)] = np.concatenate(
            connections_overall_botscores.swaplevel(axis=1)[relationship][plt_helper.INIT_SEED_MAP[seed]].values
        )
        colors_for_seeds[plt_helper.INIT_SEED_RENAME.get(seed)] = (plt_helper.ACCOUNT_COLORS.get(seed))

    overall_botscore = pd.DataFrame(overall_botscore)[plt_helper.INIT_SEED_RENAME.values()]
    tmp = overall_botscore.apply(["mean","std","sem"]).transpose()
    tmp["relationship"] = relationship
    conn_botscore.append(tmp.reset_index().groupby(["relationship","index"]).agg("mean"))

connections_overall_botscores.head(3)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1279 entries, 0 to 1278
Data columns (total 40 columns):
(BreitbartNews, friend)       119 non-null float64
(BreitbartNews, follower)     396 non-null float64
(bot11, friend)               287 non-null float64
(bot11, follower)             115 non-null float64
(bot14, friend)               308 non-null float64
(bot14, follower)             155 non-null float64
(bot12, friend)               263 non-null float64
(bot12, follower)             117 non-null float64
(bot13, friend)               265 non-null float64
(bot13, follower)             155 non-null float64
(bot10, friend)               250 non-null float64
(bot10, follower)             65 non-null float64
(USATODAY, friend)            325 non-null float64
(USATODAY, follower)          413 non-null float64
(WSJ, friend)                 366 non-null float64
(WSJ, follower)               445 non-null float64
(bot7, friend)                237 non-null float64
(bot7, follower)           

t_usr_id_ego,BreitbartNews,BreitbartNews,bot11,bot11,bot14,bot14,bot12,bot12,bot13,bot13,...,bot4,bot4,bot3,bot3,bot8,bot8,washingtonpost,washingtonpost,bot9,bot9
conn_type,friend,follower,friend,follower,friend,follower,friend,follower,friend,follower,...,friend,follower,friend,follower,friend,follower,friend,follower,friend,follower
0,0.049184,,0.067835,0.813872,0.421517,0.70642,0.193749,0.067835,0.506192,0.590513,...,0.025472,,,0.569732,0.025472,,0.027678,,,
1,0.027678,,0.193749,0.125889,0.03266,0.421517,0.085924,0.236884,0.030069,0.108277,...,0.079455,0.116795,,0.484865,0.023437,,,,0.025472,
2,0.045344,,0.023437,0.236884,0.421517,0.193749,0.590513,0.569732,0.341219,0.221806,...,0.067835,0.341219,0.057807,,0.669827,0.380552,,,0.035466,


In [8]:
botscore_summary = pd.concat(conn_botscore)
botscore_summary.to_csv("bot_score_summary.csv")
botscore_summary

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,sem
relationship,index,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
friend,Center,0.169657,0.249306,0.009116
friend,Center-left,0.203355,0.254144,0.009362
friend,Center-right,0.256564,0.26223,0.009271
friend,Left,0.268681,0.271701,0.009817
friend,Right,0.315996,0.261187,0.00882
follower,Center,0.505729,0.319875,0.030923
follower,Center-left,0.463571,0.285241,0.022206
follower,Center-right,0.436814,0.263265,0.015276
follower,Left,0.463661,0.265377,0.017059
follower,Right,0.43664,0.238697,0.011717


[back to top](#top) 
<a id='ego'></a>
## Creating Anonimmized Ego Networks
To create the anonimmized ego networks with the respective link and hashtag scores, we need the real (no hashed) ego network as input. The method below will:

1. access the data base to compute the link and hashtag scores.
1. anonimize the nodes.
1. save the anonimized ego networks.

In [9]:
def add_edges_to_bot(g, bot_id="bot"):
    for i in list(g.nodes()):
        if bot_id != i:
            g.add_edge(bot_id,i)
    return g

def hash_user_id(twitter_user_id):
    return int(hashlib.sha1(twitter_user_id.encode('utf-8')).hexdigest(), 16) % (10 ** 8)

def clean_node_attributes(graph):
    clean_graph = nx.from_edgelist(graph.edges)
    for id, attr in pd.DataFrame(
                        dict(graph.nodes(data=True)).values(),
                        index=dict(graph.nodes(data=True)).keys()
                    ).dropna(
                        axis="columns", 
                        how="all"
                    ).iterrows():
        clean_graph.add_node(id, **attr.dropna().to_dict())
    return clean_graph

def anonymize_graph(graph):
    nx.relabel_nodes(graph, lambda x: hash_user_id(x), copy=False)
    nx.set_node_attributes(
        graph,
        values={k:(v["label"] if "bot" in v["label"] else k) for k,v in graph.nodes(data=True)},
        name="label"
    )


In [10]:
bots = db_helper.getDataframeFromQuery(db_helper.connect_db(), "select screen_name, twitter_user_id, seed_screen_name from bot;")
bots.seed_screen_name = bots.seed_screen_name.apply(plt_helper.INIT_SEED_RENAME.get)
bots["mask_name"] = bots.screen_name.apply(BOTS_RENAME.get)

In [11]:
total_graph = nx.Graph()
for filename in glob.glob(GLOB_TO_EGO_NET):
    bot_name = filename.split("/")[-1].replace("_noBotNoHash.gexf","")
    seed = plt_helper.INIT_SEED_RENAME.get(plt_helper.BOT_SEED_MAP.get(BOTS_RENAME.get(bot_name)))
    graph = nx.read_gexf(filename)
    
    ## adding bot to the network
    bot_profile = bots[bots.screen_name==bot_name].iloc[0]
    nx.set_node_attributes(graph,values=True,name=bot_profile.mask_name)
    graph.add_node(
        bot_profile.twitter_user_id, 
        seed=bot_profile.seed_screen_name, 
        label=bot_profile.mask_name,
#         **{bot_profile.mask_name:True}
    )
    add_edges_to_bot(graph, bot_id=bot_profile.twitter_user_id)
    
    total_graph = nx.compose(total_graph, graph)
total_graph.number_of_nodes()

1476

In [12]:
homo_df = db_helper.getDataframeFromQuery(
    db_helper.connect_db(),
    """
select 
    user_id,
    avg(url_score) as url_score,
    avg(hashtag_score) as hashtag_score,
    sum(low_cred_score) as low_cred_score
from 
    tweet 
where 
    user_id in {}
group by
    user_id
;""".format(tuple(total_graph.nodes))
)

homo_df = homo_df.set_index("user_id").apply(pd.to_numeric)
# adjust hashtag score to center at zero
homo_df.hashtag_score = homo_df.hashtag_score.apply(lambda x: x - plt_helper.USATODAY_HASHTAG_SCORE if x else x)
homo_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1383 entries, 1000599852936396800 to 99806132
Data columns (total 3 columns):
url_score         919 non-null float64
hashtag_score     1069 non-null float64
low_cred_score    1175 non-null float64
dtypes: float64(3)
memory usage: 43.2+ KB


In [13]:
for k,v in homo_df.to_dict(orient="index").items():
    total_graph.add_node(k, **v)

homo_df = pd.DataFrame.from_dict(dict(total_graph.nodes(data=True)), orient='index')

anonymize_graph(total_graph)
nx.write_graphml(clean_node_attributes(total_graph),"./ego_networks/homogeneity_network.graphml")

In [14]:
for filename in glob.glob(GLOB_TO_EGO_NET):
    bot_name = filename.split("/")[-1].replace("_noBotNoHash.gexf","")
    seed = plt_helper.INIT_SEED_RENAME.get(plt_helper.BOT_SEED_MAP.get(BOTS_RENAME.get(bot_name)))
    graph = nx.read_gexf(filename)
    
    ## adding bot to the network
    bot_profile = bots[bots.screen_name==bot_name].iloc[0]
    nx.set_node_attributes(graph,values=True,name=bot_profile.mask_name)
    graph.add_node(
        bot_profile.twitter_user_id, 
        seed=bot_profile.seed_screen_name, 
        label=bot_profile.mask_name,
#         **{bot_profile.mask_name:True}
    )
    add_edges_to_bot(graph, bot_id=bot_profile.twitter_user_id)
    
    # adding scores to nodes
    for node in graph.nodes:
        if node in homo_df.index:
            graph.add_node(node, **homo_df.loc[node].to_dict())
        
    # rename nodes ids and labels to preserve anonymity 
    anonymize_graph(graph)

    nx.write_gexf(clean_node_attributes(graph),"./ego_networks/ego_network_with_scores-{}.gexf".format(bot_profile.mask_name))

[back to top](#top) 
<a id='echo_chamber'></a>
## Echo Chambers


In [15]:
results = []
for filename in glob.glob("./ego_networks/ego_network_with_scores-*.gexf"):
    bot_name = filename.split("/")[-1].replace(".gexf","").replace("ego_network_with_scores-","")
    seed = plt_helper.INIT_SEED_RENAME.get(plt_helper.BOT_SEED_MAP.get(bot_name))
    graph = nx.read_gexf(filename)

    columns = ["trans","density","avg_clus","edges","nodes"]
#     graph = add_edges_to_bot(graph)
    bot_res = pd.Series(
        (nx.transitivity(graph),nx.density(graph), nx.average_clustering(graph),len(graph.edges),len(graph.nodes)),
        index = columns
    )
    bot_res["bot"] = bot_name
    bot_res["seed"] = seed
    
    # creating new graph shuffling the edges
    n,d = zip(*list(graph.degree))
    
    trial=[]
    for i in range(30):
#         graph2 = nx.gnm_random_graph(len(graph.nodes), len(graph.edges))
        graph2 = nx.configuration_model(d, create_using=nx.Graph)
        graph2.remove_edges_from(nx.selfloop_edges(graph2))
        while len(graph.edges) - len(graph2.edges):
            source = pd.Series(graph2.nodes).sample(1).values[0]
            neighbors = list(nx.neighbors(graph2,source)) + [source]
            target = pd.Series(
                np.setdiff1d(graph2.nodes, neighbors)
            ).sample(1).values[0]
            graph2.add_edge(source, target)
        trial.append(
            (nx.transitivity(graph2),nx.density(graph2), nx.average_clustering(graph2),len(graph2.edges),len(graph2.nodes)  )
        )
    bot_rand_res = pd.Series(
        pd.DataFrame(trial).apply("mean").values,
        index = [f"{c}_rand" for c in columns]
    )
    
    results.append(pd.concat([bot_res, bot_rand_res]))

In [16]:
final_res = pd.DataFrame(results)
final_res["trans_rand_norm"] = final_res.trans/final_res.trans_rand
final_res["trans_density_norm"] = final_res.trans/final_res.density
final_res = final_res[final_res.columns.sort_values()]

final_res_grp = final_res.groupby("seed").agg(["mean","sem"]).transpose()
final_res_grp = final_res_grp[[c for c in plt_helper.ACCOUNT_COLORS.keys() if c in final_res_grp.columns]]
final_res_grp

Unnamed: 0,seed,Left,Center-left,Center,Center-right,Right
avg_clus,mean,0.563835,0.548106,0.563015,0.573352,0.577113
avg_clus,sem,0.020577,0.0413,0.008027,0.008723,0.013674
avg_clus_rand,mean,0.213801,0.204955,0.174262,0.273524,0.282133
avg_clus_rand,sem,0.014089,0.01987,0.002916,0.015414,0.012559
density,mean,0.084752,0.073993,0.050825,0.141056,0.149241
density,sem,0.012279,0.023568,0.00314,0.015186,0.011465
density_rand,mean,0.084752,0.073993,0.050825,0.141056,0.149241
density_rand,sem,0.012279,0.023568,0.00314,0.015186,0.011465
edges,mean,428.0,373.666667,256.666667,712.333333,753.666667
edges,sem,62.010752,119.020073,15.857,76.690576,57.897419


In [17]:
final_res_grp.to_csv("echo_chamber_data.csv")

[back to top](#top) <a id='misinformation'></a>
## Low Credibility           


In [18]:
start_date = "2019-04-01"
end_date = "2019-12-02"
test="""
SELECT
    DISTINCT b.screen_name, 
    b.seed_screen_name as seed,
    tw.tweet_id,
    tw.low_cred_score,
    DATE(checked_at) checked_at
FROM
    home_timeline ht, home_timeline_tweets ht_tw, tweet tw, bot b
WHERE
    checked_at >= DATE '{}'
    AND checked_at < DATE '{}'
    AND ht.id = ht_tw.htl_id
    AND ht_tw.tw_id = tw.tweet_id
    AND ht.bot_id = b.bot_id
;
"""
low_cred_tw = db_helper.getDataframeFromQuery(conn, test.format(start_date, end_date))
low_cred_tw.checked_at = pd.to_datetime(low_cred_tw.checked_at)
low_cred_tw.low_cred_score = low_cred_tw.low_cred_score.astype("double")
low_cred_tw.info()

low_cred_summary = low_cred_tw.groupby(["screen_name","seed"]).agg(["sum","mean","count"]).reset_index()
low_cred_summary["seed"] = low_cred_summary.seed.apply(
    plt_helper.INIT_SEED_RENAME.get
)
low_cred_summary = low_cred_summary.groupby("seed").agg(["sum","mean","sem"])
low_cred_summary

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 317425 entries, 0 to 317424
Data columns (total 5 columns):
screen_name       317425 non-null object
seed              317425 non-null object
tweet_id          317425 non-null object
low_cred_score    62935 non-null float64
checked_at        317425 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(1), object(3)
memory usage: 12.1+ MB


  new_axis = axis.drop(labels, errors=errors)


Unnamed: 0_level_0,low_cred_score,low_cred_score,low_cred_score,low_cred_score,low_cred_score,low_cred_score,low_cred_score,low_cred_score,low_cred_score
Unnamed: 0_level_1,sum,sum,sum,mean,mean,mean,count,count,count
Unnamed: 0_level_2,sum,mean,sem,sum,mean,sem,sum,mean,sem
seed,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3
Center,128.0,42.666667,18.800118,0.026115,0.008705,0.003626,13716,4572.0,1036.289696
Center-left,162.0,54.0,43.139309,0.029846,0.009949,0.007425,13625,4541.666667,813.457094
Center-right,987.0,329.0,78.117433,0.292726,0.097575,0.006896,10266,3422.0,924.730772
Left,114.0,38.0,24.54248,0.029712,0.009904,0.008004,17461,5820.333333,1245.954297
Right,1129.0,376.333333,66.237787,0.434573,0.144858,0.0041,7867,2622.333333,511.244995


In [19]:
low_cred_summary.to_csv("low_credibility_summary_data.csv")

[back to top](#top) <a id='political_valence'></a>
## Political Valence and Algorithmic Bias


In [20]:
ts_helper.generate_all_time_series(
    db_conn=db_helper.connect_db(), 
    INIT_SEED_MAP=INIT_SEED_MAP, 
    bots_mask=BOTS_RENAME
)

url bot1 url_thenation_sliced_home_tl
url bot2 url_thenation_sliced_home_tl
url bot3 url_thenation_sliced_home_tl
url bot1 url_thenation_sliced_usr_tl
url bot2 url_thenation_sliced_usr_tl
url bot3 url_thenation_sliced_usr_tl
url bot1 url_thenation_sliced_friend_usr_tl
url bot2 url_thenation_sliced_friend_usr_tl
url bot3 url_thenation_sliced_friend_usr_tl
hashtag bot1 hashtag_thenation_sliced_home_tl
hashtag bot2 hashtag_thenation_sliced_home_tl
hashtag bot3 hashtag_thenation_sliced_home_tl
hashtag bot1 hashtag_thenation_sliced_usr_tl
hashtag bot2 hashtag_thenation_sliced_usr_tl
hashtag bot3 hashtag_thenation_sliced_usr_tl
hashtag bot1 hashtag_thenation_sliced_friend_usr_tl
hashtag bot2 hashtag_thenation_sliced_friend_usr_tl
hashtag bot3 hashtag_thenation_sliced_friend_usr_tl
url bot4 url_washingtonpost_sliced_home_tl
url bot5 url_washingtonpost_sliced_home_tl
url bot6 url_washingtonpost_sliced_home_tl
url bot4 url_washingtonpost_sliced_usr_tl
url bot5 url_washingtonpost_sliced_usr_tl
u