Network Features

In [1]:
import sys
sys.path.append('../scripts')
from utils import *

ROOT PATH: ../


In [2]:
class CascadeAnalyzer(object):
    feature_df = pd.DataFrame()  # output

    def __init__(self):
        self.meta_df = pd.DataFrame()  # labels / key: root_tweet_id
        self.cascades_dict = {}  # key: root_tweet_id, value: Cascade()
        self.retrieve_cascade_labels()
        self.load_cascades()

    def retrieve_cascade_labels(self):
        column_names = ['label', 'tweet_id']
        self.meta_df = pd.read_csv(DATA_PATH + "label.txt", sep=':', names=column_names, converters={'tweet_id': str})
        print("-------------------------------------")
        print(self.meta_df.info())
        print("-------------------------------------" * 2)
        print(self.meta_df.shape, self.meta_df['label'].value_counts().to_dict())
        print("-------------------------------------" * 2)
        print(self.meta_df.head())
        print("-------------------------------------\n")

    def load_cascades(self):
        # TODO: handle pickle data
        # iterate tweet trees
        for index, file in enumerate(os.listdir(DATA_PATH + 'tree_u')):
            if not file.endswith('.txt'):
                print("Unexpected Input File:", file)
                continue
            root_tweet_id = file.replace('.txt', '')  # file_id
            cascade_path = os.path.join(DATA_PATH + 'tree_u', file)
            label = self.meta_df.loc[self.meta_df['tweet_id'] == root_tweet_id, 'label'].item()  # label
            self.cascades_dict[root_tweet_id] = Cascade(root_tweet_id, cascade_path, label)
            print(self.cascades_dict[root_tweet_id])

    # Main Outer loop
    def iterate_cascades(self):
        for index, row in self.meta_df.iterrows():
            tweet_id = row['tweet_id']
            cascade = self.cascades_dict[tweet_id]
            print('#', index, row['tweet_id'], row['label'])
            cascade.calc_structural_features()

    def cascade_to_csv(self):  # CascadeAnalyzer
        ensure_directory(OUT_PATH)
        out_file_name = OUT_PATH + 'structural_analysis_' + time.strftime("%Y%m%d_%H%M%S") + ".csv"
        out_file = open(out_file_name, 'w', encoding='utf-8', newline='')
        self.feature_df.to_csv(out_file, sep=',', index=False)

In [3]:
class Cascade:

    # --------------------------
    #      Initiate Cascade
    # --------------------------
    def __init__(self, root_tweet_id, cascade_path, label=None):
        self.file_id = root_tweet_id  # For label.txt
        self.root_tweet_id = root_tweet_id  # Tweet ID with ROOT Keyword (May updated)
        self.root_user_id = 0
        self.cascade_path = cascade_path
        self.label = label

        # ------------
        # Load Cascade
        # ------------
        self.trace_count = None
        self.src_users = set()
        self.dst_users = set()
        self.retweet_users = set()
        self.reply_users = set()
        self.retweet_count = 0
        self.reply_count = 0
        self.network = nx.DiGraph()
        self.network_features = {}
        self.load_cascade()

        # -----------------
        # Calculate Cascade
        # -----------------
        self.src_user_count = None
        self.dst_user_count = None
        self.avg_depth = 0
        self.max_depth = 0

    def load_cascade(self):
        with open(self.cascade_path, 'r') as file:
            # ---- -----------------
            # Set Root: User, Tweet
            # ---------------------
            for index, line in enumerate(file):
                elem_list = [x.strip() for x in re.split(r"[\'\,\->\[\]]", line.strip()) if x.strip()]
                if elem_list[0] == 'ROOT' and elem_list[1] == 'ROOT':
                    self.root_user_id = elem_list[3]
                    if index != 0:
                        print('ROOT TWEET {} by {} @ line # {}'.format(elem_list[4], self.root_user_id, index))
                    break
            if self.root_tweet_id != elem_list[4]:  # Assert file_id == root_tweet_id
                print('\t file_id:{1} -> root_tweet_id:{2} ({0}) '.format(self.label, self.root_tweet_id, elem_list[4]))
                self.root_tweet_id = elem_list[4]
            # ------------
            # Load Cascade
            # ------------
            for index, line in enumerate(file):  # Trace
                elem_list = re.split(r"[\'\,\->\[\]]", line.strip())
                elem_list = [x.strip() for x in elem_list if x.strip()]  # Remove empty elements
                # Error data handling
                if float(elem_list[2]) >= float(elem_list[5]):
                    continue
                src_user_id, src_tweet_id, src_tweet_time, dst_user_id, dst_tweet_id, dst_tweet_time = elem_list
                self.src_users.add(src_user_id)
                self.dst_users.add(dst_user_id)
                # Different types of Tweets - https://help.twitter.com/en/using-twitter/types-of-tweets
                if src_tweet_id == dst_tweet_id:
                    self.retweet_count += 1
                    self.retweet_users.add(dst_user_id)
                else:
                    self.reply_count += 1
                    self.reply_users.add(dst_user_id)
                # NetworkX Graph
                self.network.add_weighted_edges_from(
                    [(src_user_id, dst_user_id, float(dst_tweet_time) - float(src_tweet_time))])
        # Store computed cascade information
        self.trace_count = index

    # =============================
    #      Structural Analysis
    # =============================
    def calc_structural_features(self):
        G = self.network
        # root_user_id = self.root_user_id
        self.src_user_count = len(self.src_users)
        self.dst_user_count = len(self.dst_users)
        hops = []
        max_hop_count = 10
        for i in range(max_hop_count):
            hops.append(len(nx.single_source_shortest_path_length(G, self.root_user_id, cutoff=i)))

        # print(self.retweet_count, self.response_count)
        # print("leaf:", nx.dag_to_branching(G))
        # print('\t root_to_all_depth_length: ', len(nx.single_source_shortest_path_length(G, self.root_user_id)))
        # print('\t user_count:', len(G.nodes()))  # root + dst_user_count
        print('\t depth: ', nx.dag_longest_path(G))  # weight - temporal feature
        print('\t src_user_count: ', self.src_user_count)
        print('\t dst_user_count: ', self.dst_user_count)
        print('\t root_to_all_depth_sum: ', sum(nx.single_source_shortest_path_length(G, self.root_user_id).values()))
        print('\t root_to_all_depth_max: ', max(nx.single_source_shortest_path_length(G, self.root_user_id).values()))
        print('\t one_hop_neighbors:', len(list(G.neighbors(self.root_user_id))))
        print('\t', "user count by hop(s): ", hops[1] - hops[0], hops[2] - hops[1], hops[3] - hops[2],
                                              hops[4] - hops[3], hops[5] - hops[4], hops[6] - hops[5],
                                              hops[7] - hops[6], hops[8] - hops[7], hops[9] - hops[8])

        # df.loc[df['tweet_id'] == root_tweet_id, 'src_user_count'] = len(src_users)
        shortest_path_dict = nx.single_source_shortest_path_length(G, self.root_user_id)
        self.avg_depth = sum(shortest_path_dict.values()) / len(shortest_path_dict)
        self.max_depth = max(shortest_path_dict.values())

        for i in range(max_hop_count - 1):
            self.network_features[str(i) + "_hop_neighbor_count"] = hops[i + 1] - hops[i]



        # features to data frame
        CascadeAnalyzer.feature_df = CascadeAnalyzer.feature_df.append({
            'tweet_id': self.root_tweet_id, 'label': self.label,
            'structural_trace_count': self.trace_count,
            'structural_retweet_count': self.retweet_count,
            'structural_reply_count': self.reply_count,
            'structural_src_user_count': self.src_user_count,
            'structural_dst_user_count': self.dst_user_count,
            'structural_retweet_reply_percent': self.retweet_count / (self.retweet_count + self.reply_count),
            'structural_src_dst_user_percent': self.src_user_count / (self.src_user_count + self.dst_user_count),  # <--
            'structural_retweet_users_count': len(self.retweet_users),
            'structural_reply_users_count': len(self.reply_users),
            'structural_root_to_all_depth_sum': sum(nx.single_source_shortest_path_length(G, self.root_user_id).values()),
            'structural_root_to_all_depth_max': max(nx.single_source_shortest_path_length(G, self.root_user_id).values()),
            'structural_1_hop_neighbor_count': self.network_features['1_hop_neighbor_count'],
            'structural_2_hop_neighbor_count': self.network_features['2_hop_neighbor_count'],
            'structural_3_hop_neighbor_count': self.network_features['3_hop_neighbor_count'],
            'structural_4_hop_neighbor_count': self.network_features['4_hop_neighbor_count'],
            'structural_5_hop_neighbor_count': self.network_features['5_hop_neighbor_count'],
            'structural_6_hop_neighbor_count': self.network_features['6_hop_neighbor_count'],
            'structural_7_hop_neighbor_count': self.network_features['7_hop_neighbor_count'],
            'structural_8_hop_neighbor_count': self.network_features['8_hop_neighbor_count'],
            'structural_avg_depth': self.avg_depth,
            'structural_max_depth': self.max_depth,  # duplicate
            # 'structural_max_depth': self.max_depth,  # duplicate
            'structural_network_density': nx.density(G),  # duplicate
        }, ignore_index=True)


In [4]:
analyzer = CascadeAnalyzer()
analyzer.iterate_cascades()
analyzer.cascade_to_csv()

-------------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 818 entries, 0 to 817
Data columns (total 2 columns):
label       818 non-null object
tweet_id    818 non-null object
dtypes: object(2)
memory usage: 12.9+ KB
None
--------------------------------------------------------------------------
(818, 2) {'non-rumor': 205, 'false': 205, 'true': 207, 'unverified': 201}
--------------------------------------------------------------------------
        label            tweet_id
0       false  656955120626880512
1        true  615689290706595840
2       false  613404935003217920
3  unverified  731166399389962242
4  unverified  714598641827246081
-------------------------------------

<__main__.Cascade object at 0x7f57afca96a0>
<__main__.Cascade object at 0x7f57afc758d0>
<__main__.Cascade object at 0x7f57afc75860>
<__main__.Cascade object at 0x7f57afc75400>
<__main__.Cascade object at 0x7f57afc75588>
<__main__.Cascade object at 0x7f57afb3f358>
<__main__.Cascade

<__main__.Cascade object at 0x7f57adf34b38>
<__main__.Cascade object at 0x7f57adf34b00>
<__main__.Cascade object at 0x7f57adee4c18>
<__main__.Cascade object at 0x7f57adf21908>
<__main__.Cascade object at 0x7f57ade392b0>
<__main__.Cascade object at 0x7f57ade6c2e8>
<__main__.Cascade object at 0x7f57ade6c128>
<__main__.Cascade object at 0x7f57ade6c080>
<__main__.Cascade object at 0x7f57ade6c400>
<__main__.Cascade object at 0x7f57ade39048>
<__main__.Cascade object at 0x7f57ade6c550>
<__main__.Cascade object at 0x7f57ade6c3c8>
<__main__.Cascade object at 0x7f57ade6c668>
<__main__.Cascade object at 0x7f57ade6c160>
<__main__.Cascade object at 0x7f57adcc3080>
<__main__.Cascade object at 0x7f57adcc3320>
<__main__.Cascade object at 0x7f57adcc3588>
<__main__.Cascade object at 0x7f57adcc3358>
<__main__.Cascade object at 0x7f57adc55278>
<__main__.Cascade object at 0x7f57adc55208>
ROOT TWEET 692742353736568833 by 5402612 @ line # 94
	 file_id:692753210692476928 -> root_tweet_id:692742353736568833 (n

<__main__.Cascade object at 0x7f57ac4dcb38>
<__main__.Cascade object at 0x7f57ad899080>
<__main__.Cascade object at 0x7f57ac46ca58>
<__main__.Cascade object at 0x7f57ac3d71d0>
<__main__.Cascade object at 0x7f57ac2a7f98>
<__main__.Cascade object at 0x7f57ac3d7198>
<__main__.Cascade object at 0x7f57ac2a7da0>
<__main__.Cascade object at 0x7f57ac2b2400>
<__main__.Cascade object at 0x7f57ac3d7208>
<__main__.Cascade object at 0x7f57ac2b2320>
<__main__.Cascade object at 0x7f57ac1a7438>
<__main__.Cascade object at 0x7f57ac1a7240>
<__main__.Cascade object at 0x7f57ac1a7278>
<__main__.Cascade object at 0x7f57ac100c18>
<__main__.Cascade object at 0x7f57ac100d30>
<__main__.Cascade object at 0x7f57ac0bca58>
<__main__.Cascade object at 0x7f57ac0bc898>
<__main__.Cascade object at 0x7f57ac0bca20>
<__main__.Cascade object at 0x7f57ac0bc320>
<__main__.Cascade object at 0x7f57ac0bc9e8>
<__main__.Cascade object at 0x7f57ac01e630>
<__main__.Cascade object at 0x7f57abfca3c8>
<__main__.Cascade object at 0x7f

<__main__.Cascade object at 0x7f57aa1d1ba8>
<__main__.Cascade object at 0x7f57aa109080>
<__main__.Cascade object at 0x7f57aa109438>
ROOT TWEET 692664097125601281 by 3646911 @ line # 2
	 file_id:692665281362202624 -> root_tweet_id:692664097125601281 (non-rumor) 
<__main__.Cascade object at 0x7f57aa109358>
<__main__.Cascade object at 0x7f57aa1096a0>
<__main__.Cascade object at 0x7f57aa0c4940>
<__main__.Cascade object at 0x7f57aa08cc88>
<__main__.Cascade object at 0x7f57aa0c4780>
<__main__.Cascade object at 0x7f57aa08cd68>
<__main__.Cascade object at 0x7f57aa08ceb8>
<__main__.Cascade object at 0x7f57aa08cf28>
<__main__.Cascade object at 0x7f57a9ff9080>
<__main__.Cascade object at 0x7f57a9ff9160>
<__main__.Cascade object at 0x7f57a9fd4240>
<__main__.Cascade object at 0x7f57a9ff1f98>
<__main__.Cascade object at 0x7f57a9f52eb8>
<__main__.Cascade object at 0x7f57a9f015c0>
<__main__.Cascade object at 0x7f57a9f01630>
<__main__.Cascade object at 0x7f57a9f015f8>
<__main__.Cascade object at 0x7f57

<__main__.Cascade object at 0x7f57a7c43438>
<__main__.Cascade object at 0x7f57a7c43400>
<__main__.Cascade object at 0x7f57a7c435f8>
<__main__.Cascade object at 0x7f57a7dd65f8>
<__main__.Cascade object at 0x7f57a7c43748>
<__main__.Cascade object at 0x7f57a7b409e8>
<__main__.Cascade object at 0x7f57a7b40a58>
<__main__.Cascade object at 0x7f57a7b40940>
<__main__.Cascade object at 0x7f57a7b406a0>
<__main__.Cascade object at 0x7f57a7992550>
<__main__.Cascade object at 0x7f57a79924e0>
<__main__.Cascade object at 0x7f57a793d8d0>
<__main__.Cascade object at 0x7f57a793da58>
<__main__.Cascade object at 0x7f57a793d9e8>
<__main__.Cascade object at 0x7f57a78aa2e8>
<__main__.Cascade object at 0x7f57a7813f60>
<__main__.Cascade object at 0x7f57a78aa4a8>
<__main__.Cascade object at 0x7f57a78aa048>
<__main__.Cascade object at 0x7f57a781d2b0>
<__main__.Cascade object at 0x7f57a7b40ba8>
<__main__.Cascade object at 0x7f57a781d208>
<__main__.Cascade object at 0x7f57a781d160>
<__main__.Cascade object at 0x7f

	 depth:  ['999453985', '327072700']
	 src_user_count:  49
	 dst_user_count:  378
	 root_to_all_depth_sum:  492
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 276
	 user count by hop(s):  276 90 12 0 0 0 0 0 0
# 26 553588178687655936 true
	 depth:  ['14511951', '478700279']
	 src_user_count:  8
	 dst_user_count:  209
	 root_to_all_depth_sum:  216
	 root_to_all_depth_max:  2
	 one_hop_neighbors: 202
	 user count by hop(s):  202 7 0 0 0 0 0 0 0
# 27 594687353937100801 false
	 depth:  ['2470325805', '415776636']
	 src_user_count:  67
	 dst_user_count:  474
	 root_to_all_depth_sum:  564
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 401
	 user count by hop(s):  401 59 11 3 0 0 0 0 0
# 28 613016993692798977 false
	 depth:  ['1291770157', '26302066']
	 src_user_count:  16
	 dst_user_count:  135
	 root_to_all_depth_sum:  157
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 116
	 user count by hop(s):  116 16 3 0 0 0 0 0 0
# 29 663385747177775105 false
	 depth:  ['2425231', '4924717512']
	

	 depth:  ['292432955', '220251523']
	 src_user_count:  28
	 dst_user_count:  189
	 root_to_all_depth_sum:  244
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 145
	 user count by hop(s):  145 34 9 1 0 0 0 0 0
# 62 675043569367982081 false
	 depth:  ['14662354', '2616371950', '704360721', '4473939262']
	 src_user_count:  195
	 dst_user_count:  677
	 root_to_all_depth_sum:  1134
	 root_to_all_depth_max:  7
	 one_hop_neighbors: 357
	 user count by hop(s):  357 164 66 31 12 10 1 0 0
# 63 544314234541469696 true
	 depth:  ['972651', '298762115']
	 src_user_count:  28
	 dst_user_count:  260
	 root_to_all_depth_sum:  303
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 226
	 user count by hop(s):  226 27 5 2 0 0 0 0 0
# 64 651809229842608128 unverified
	 depth:  ['44862126', '186548278']
	 src_user_count:  7
	 dst_user_count:  161
	 root_to_all_depth_sum:  168
	 root_to_all_depth_max:  2
	 one_hop_neighbors: 154
	 user count by hop(s):  154 7 0 0 0 0 0 0 0
# 65 693140340367187969 non-rumor


	 depth:  ['5402612', '293486411']
	 src_user_count:  106
	 dst_user_count:  683
	 root_to_all_depth_sum:  567
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 474
	 user count by hop(s):  474 39 5 0 0 0 0 0 0
# 95 675193315306905600 false
	 depth:  ['64643056', '488060497']
	 src_user_count:  10
	 dst_user_count:  142
	 root_to_all_depth_sum:  158
	 root_to_all_depth_max:  2
	 one_hop_neighbors: 126
	 user count by hop(s):  126 16 0 0 0 0 0 0 0
# 96 674364257799004160 false
	 depth:  ['126903716', '402365662']
	 src_user_count:  321
	 dst_user_count:  2109
	 root_to_all_depth_sum:  2778
	 root_to_all_depth_max:  6
	 one_hop_neighbors: 1615
	 user count by hop(s):  1615 366 90 23 9 4 0 0 0
# 97 716461257025581056 unverified
	 depth:  ['3337804853', '724605800065327106']
	 src_user_count:  59
	 dst_user_count:  573
	 root_to_all_depth_sum:  717
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 442
	 user count by hop(s):  442 119 11 1 0 0 0 0 0
# 98 662430295254175744 false
	 depth:  ['2

# 131 722885778448121857 unverified
	 depth:  ['14294848', '1107161984', '299188947']
	 src_user_count:  19
	 dst_user_count:  121
	 root_to_all_depth_sum:  162
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 91
	 user count by hop(s):  91 22 5 3 0 0 0 0 0
# 132 614599619310407680 true
	 depth:  ['1367531', '71363215']
	 src_user_count:  39
	 dst_user_count:  391
	 root_to_all_depth_sum:  435
	 root_to_all_depth_max:  2
	 one_hop_neighbors: 347
	 user count by hop(s):  347 44 0 0 0 0 0 0 0
# 133 706933939953344514 unverified
	 depth:  ['2302239423', '2347863411']
	 src_user_count:  23
	 dst_user_count:  97
	 root_to_all_depth_sum:  134
	 root_to_all_depth_max:  5
	 one_hop_neighbors: 70
	 user count by hop(s):  70 20 5 1 1 0 0 0 0
# 134 691285663522648065 non-rumor
	 depth:  ['5402612', '82542014']
	 src_user_count:  53
	 dst_user_count:  857
	 root_to_all_depth_sum:  984
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 740
	 user count by hop(s):  740 107 10 0 0 0 0 0 0
# 135 6612296

# 165 651321040119963648 false
	 depth:  ['3005335703', '3229269037']
	 src_user_count:  16
	 dst_user_count:  210
	 root_to_all_depth_sum:  227
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 195
	 user count by hop(s):  195 13 2 0 0 0 0 0 0
# 166 662151653462790144 false
	 depth:  ['1178700896', '3890882615', '3240724545']
	 src_user_count:  44
	 dst_user_count:  359
	 root_to_all_depth_sum:  451
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 286
	 user count by hop(s):  286 55 17 1 0 0 0 0 0
# 167 693844030589902848 non-rumor
	 depth:  ['2367911', '3380822891']
	 src_user_count:  14
	 dst_user_count:  284
	 root_to_all_depth_sum:  305
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 264
	 user count by hop(s):  264 19 1 0 0 0 0 0 0
# 168 676067381299576832 false
	 depth:  ['50769180', '1703904450']
	 src_user_count:  19
	 dst_user_count:  327
	 root_to_all_depth_sum:  353
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 302
	 user count by hop(s):  302 24 1 0 0 0 0 0 0
# 169 5444

# 203 761999790892806144 non-rumor
	 depth:  ['30313925', '2725169594']
	 src_user_count:  21
	 dst_user_count:  674
	 root_to_all_depth_sum:  704
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 646
	 user count by hop(s):  646 26 2 0 0 0 0 0 0
# 204 553587013409325058 true
	 depth:  ['380648579', '220251523']
	 src_user_count:  502
	 dst_user_count:  2754
	 root_to_all_depth_sum:  4486
	 root_to_all_depth_max:  7
	 one_hop_neighbors: 1616
	 user count by hop(s):  1616 695 325 93 19 4 2 0 0
# 205 544513524438155264 true
	 depth:  ['788524', '1855307683']
	 src_user_count:  12
	 dst_user_count:  102
	 root_to_all_depth_sum:  210
	 root_to_all_depth_max:  5
	 one_hop_neighbors: 42
	 user count by hop(s):  42 31 11 17 1 0 0 0 0
# 206 672539897899577344 false
	 depth:  ['24486612', '553547524']
	 src_user_count:  52
	 dst_user_count:  218
	 root_to_all_depth_sum:  338
	 root_to_all_depth_max:  7
	 one_hop_neighbors: 139
	 user count by hop(s):  139 54 16 5 2 1 1 0 0
# 207 6936914562220523

# 243 674080899055546368 false
	 depth:  ['292816822', '260284001']
	 src_user_count:  28
	 dst_user_count:  120
	 root_to_all_depth_sum:  169
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 82
	 user count by hop(s):  82 29 7 2 0 0 0 0 0
# 244 626770498328895488 false
	 depth:  ['572278319', '577643627']
	 src_user_count:  26
	 dst_user_count:  209
	 root_to_all_depth_sum:  274
	 root_to_all_depth_max:  5
	 one_hop_neighbors: 167
	 user count by hop(s):  167 28 7 5 2 0 0 0 0
# 245 657007736467525632 false
	 depth:  ['2425231', '2558629047']
	 src_user_count:  26
	 dst_user_count:  229
	 root_to_all_depth_sum:  277
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 198
	 user count by hop(s):  198 20 5 6 0 0 0 0 0
# 246 614626710248534016 true
	 depth:  ['2884771', '2768402047']
	 src_user_count:  10
	 dst_user_count:  200
	 root_to_all_depth_sum:  210
	 root_to_all_depth_max:  2
	 one_hop_neighbors: 190
	 user count by hop(s):  190 10 0 0 0 0 0 0 0
# 247 751536167183613952 unverified
	

# 281 629503919098429440 false
	 depth:  ['1547119022', '1673504821', '716777482']
	 src_user_count:  52
	 dst_user_count:  104
	 root_to_all_depth_sum:  533
	 root_to_all_depth_max:  16
	 one_hop_neighbors: 24
	 user count by hop(s):  24 18 8 6 5 7 9 6 3
# 282 692623941131722752 non-rumor
	 depth:  ['428333', '3784344802']
	 src_user_count:  27
	 dst_user_count:  698
	 root_to_all_depth_sum:  801
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 602
	 user count by hop(s):  602 90 5 1 0 0 0 0 0
# 283 626546123713474560 false
	 depth:  ['5695632', '2814926714']
	 src_user_count:  43
	 dst_user_count:  316
	 root_to_all_depth_sum:  387
	 root_to_all_depth_max:  5
	 one_hop_neighbors: 267
	 user count by hop(s):  267 35 7 6 1 0 0 0 0
# 284 672906198434209792 false
	 depth:  ['28785486', '156727600']
	 src_user_count:  16
	 dst_user_count:  182
	 root_to_all_depth_sum:  208
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 157
	 user count by hop(s):  157 24 1 0 0 0 0 0 0
# 285 673615263040

# 319 691632238035886081 non-rumor
	 depth:  ['5402612', '1598737800']
	 src_user_count:  19
	 dst_user_count:  316
	 root_to_all_depth_sum:  360
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 277
	 user count by hop(s):  277 34 5 0 0 0 0 0 0
# 320 524952883343925249 true
	 depth:  ['5402612', '630995837', '2846861857']
	 src_user_count:  85
	 dst_user_count:  976
	 root_to_all_depth_sum:  1259
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 735
	 user count by hop(s):  735 204 32 5 0 0 0 0 0
# 321 688021039322894336 non-rumor
	 depth:  ['5402612', '2919708995']
	 src_user_count:  12
	 dst_user_count:  358
	 root_to_all_depth_sum:  408
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 310
	 user count by hop(s):  310 46 2 0 0 0 0 0 0
# 322 707308274270539777 unverified
	 depth:  ['232901331', '3115924437', '3126563358']
	 src_user_count:  10
	 dst_user_count:  133
	 root_to_all_depth_sum:  164
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 104
	 user count by hop(s):  104 27 2 0 0 

	 depth:  ['14075928', '185832292']
	 src_user_count:  99
	 dst_user_count:  1193
	 root_to_all_depth_sum:  1342
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 1072
	 user count by hop(s):  1072 99 16 6 0 0 0 0 0
# 359 692410832307818497 non-rumor
	 depth:  ['9300262', '594886554', '24122825']
	 src_user_count:  49
	 dst_user_count:  406
	 root_to_all_depth_sum:  561
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 274
	 user count by hop(s):  274 110 21 1 0 0 0 0 0
# 360 662381914842603520 false
	 depth:  ['166329578', '2430341433']
	 src_user_count:  16
	 dst_user_count:  117
	 root_to_all_depth_sum:  151
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 86
	 user count by hop(s):  86 28 3 0 0 0 0 0 0
# 361 615494435074363392 true
	 depth:  ['1018225862', '2857530284']
	 src_user_count:  179
	 dst_user_count:  1361
	 root_to_all_depth_sum:  1619
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 1146
	 user count by hop(s):  1146 176 35 4 0 0 0 0 0
# 362 763520953619918848 unverified


	 depth:  ['51241574', '286208037']
	 src_user_count:  15
	 dst_user_count:  241
	 root_to_all_depth_sum:  256
	 root_to_all_depth_max:  2
	 one_hop_neighbors: 226
	 user count by hop(s):  226 15 0 0 0 0 0 0 0
# 394 693606934050684928 non-rumor
	 depth:  ['2883841', '130886677']
	 src_user_count:  32
	 dst_user_count:  318
	 root_to_all_depth_sum:  403
	 root_to_all_depth_max:  5
	 one_hop_neighbors: 265
	 user count by hop(s):  265 29 18 4 2 0 0 0 0
# 395 666810213274689537 false
	 depth:  ['7702542', '1177822908']
	 src_user_count:  8
	 dst_user_count:  139
	 root_to_all_depth_sum:  144
	 root_to_all_depth_max:  2
	 one_hop_neighbors: 134
	 user count by hop(s):  134 5 0 0 0 0 0 0 0
# 396 692702295281262593 non-rumor
	 depth:  ['5402612', '293486411']
	 src_user_count:  35
	 dst_user_count:  518
	 root_to_all_depth_sum:  445
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 391
	 user count by hop(s):  391 24 2 0 0 0 0 0 0
# 397 692498490249891842 non-rumor
	 depth:  ['30313925', '474

	 depth:  ['1863198290', '1910543420']
	 src_user_count:  2
	 dst_user_count:  382
	 root_to_all_depth_sum:  383
	 root_to_all_depth_max:  2
	 one_hop_neighbors: 381
	 user count by hop(s):  381 1 0 0 0 0 0 0 0
# 427 693811101146963968 non-rumor
	 depth:  ['9300262', '915384589']
	 src_user_count:  62
	 dst_user_count:  631
	 root_to_all_depth_sum:  709
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 562
	 user count by hop(s):  562 60 9 0 0 0 0 0 0
# 428 666497286663503872 false
	 depth:  ['15754281', '3230825498']
	 src_user_count:  25
	 dst_user_count:  326
	 root_to_all_depth_sum:  373
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 287
	 user count by hop(s):  287 31 8 0 0 0 0 0 0
# 429 642432477185867776 false
	 depth:  ['562338854', '3237206662']
	 src_user_count:  7
	 dst_user_count:  165
	 root_to_all_depth_sum:  173
	 root_to_all_depth_max:  2
	 one_hop_neighbors: 157
	 user count by hop(s):  157 8 0 0 0 0 0 0 0
# 430 682999206290829312 non-rumor
	 depth:  ['717313', '68924

	 depth:  ['2557521', '4843836155']
	 src_user_count:  44
	 dst_user_count:  762
	 root_to_all_depth_sum:  815
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 714
	 user count by hop(s):  714 43 5 0 0 0 0 0 0
# 467 692796451987034113 non-rumor
	 depth:  ['14075928', '61289304']
	 src_user_count:  24
	 dst_user_count:  322
	 root_to_all_depth_sum:  362
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 288
	 user count by hop(s):  288 28 6 0 0 0 0 0 0
# 468 703234354579898368 unverified
	 depth:  ['572278319', '2917056896']
	 src_user_count:  31
	 dst_user_count:  146
	 root_to_all_depth_sum:  228
	 root_to_all_depth_max:  6
	 one_hop_neighbors: 105
	 user count by hop(s):  105 19 11 5 4 2 0 0 0
# 469 693466451081060353 non-rumor
	 depth:  ['759251', '37449964']
	 src_user_count:  68
	 dst_user_count:  757
	 root_to_all_depth_sum:  879
	 root_to_all_depth_max:  6
	 one_hop_neighbors: 670
	 user count by hop(s):  670 64 15 5 2 1 0 0 0
# 470 655815788675399680 unverified
	 depth:  ['267721

# 504 691608761128067072 non-rumor
	 depth:  ['14934818', '2949701406']
	 src_user_count:  4
	 dst_user_count:  446
	 root_to_all_depth_sum:  751
	 root_to_all_depth_max:  2
	 one_hop_neighbors: 141
	 user count by hop(s):  141 305 0 0 0 0 0 0 0
# 505 614638036593299456 true
	 depth:  ['2884771', '850633633', '220251523']
	 src_user_count:  16
	 dst_user_count:  272
	 root_to_all_depth_sum:  297
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 253
	 user count by hop(s):  253 15 2 2 0 0 0 0 0
# 506 553587672137334785 true
	 depth:  ['87416722', '2354245921']
	 src_user_count:  263
	 dst_user_count:  1361
	 root_to_all_depth_sum:  2127
	 root_to_all_depth_max:  6
	 one_hop_neighbors: 849
	 user count by hop(s):  849 321 141 40 7 3 0 0 0
# 507 552811386259386370 true
	 depth:  ['3108351', '2935927745']
	 src_user_count:  19
	 dst_user_count:  312
	 root_to_all_depth_sum:  336
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 289
	 user count by hop(s):  289 22 1 0 0 0 0 0 0
# 508 67282840

# 542 614624331717545984 true
	 depth:  ['97474887', '215650354', '163690604']
	 src_user_count:  32
	 dst_user_count:  370
	 root_to_all_depth_sum:  748
	 root_to_all_depth_max:  5
	 one_hop_neighbors: 168
	 user count by hop(s):  168 46 137 18 1 0 0 0 0
# 543 615840865815298048 true
	 depth:  ['3022045469', '496047483']
	 src_user_count:  23
	 dst_user_count:  366
	 root_to_all_depth_sum:  389
	 root_to_all_depth_max:  2
	 one_hop_neighbors: 343
	 user count by hop(s):  343 23 0 0 0 0 0 0 0
# 544 645356735545364480 false
	 depth:  ['3098760325', '715310419540312064']
	 src_user_count:  11
	 dst_user_count:  90
	 root_to_all_depth_sum:  109
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 76
	 user count by hop(s):  76 9 5 0 0 0 0 0 0
# 545 693651239486263296 non-rumor
	 depth:  ['428333', '2820515815']
	 src_user_count:  44
	 dst_user_count:  533
	 root_to_all_depth_sum:  608
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 471
	 user count by hop(s):  471 50 11 1 0 0 0 0 0
# 546 544

# 578 697992796565741569 unverified
	 depth:  ['213291472', '2207451133', '15593850']
	 src_user_count:  100
	 dst_user_count:  864
	 root_to_all_depth_sum:  1746
	 root_to_all_depth_max:  5
	 one_hop_neighbors: 341
	 user count by hop(s):  341 310 75 130 8 0 0 0 0
# 579 714577521992343552 unverified
	 depth:  ['198746638', '2818565532']
	 src_user_count:  11
	 dst_user_count:  118
	 root_to_all_depth_sum:  278
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 33
	 user count by hop(s):  33 15 65 5 0 0 0 0 0
# 580 760120409429643266 unverified
	 depth:  ['44945327', '60722573']
	 src_user_count:  18
	 dst_user_count:  428
	 root_to_all_depth_sum:  464
	 root_to_all_depth_max:  2
	 one_hop_neighbors: 392
	 user count by hop(s):  392 36 0 0 0 0 0 0 0
# 581 692368829368918017 non-rumor
	 depth:  ['5402612', '293486411']
	 src_user_count:  16
	 dst_user_count:  391
	 root_to_all_depth_sum:  428
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 357
	 user count by hop(s):  357 31 3 0 0 0 0 0 

	 depth:  ['2883841', '3299857557']
	 src_user_count:  98
	 dst_user_count:  1006
	 root_to_all_depth_sum:  1175
	 root_to_all_depth_max:  5
	 one_hop_neighbors: 873
	 user count by hop(s):  873 104 23 5 1 0 0 0 0
# 617 544520273718812672 true
	 depth:  ['100629223', '2816397492']
	 src_user_count:  13
	 dst_user_count:  118
	 root_to_all_depth_sum:  142
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 96
	 user count by hop(s):  96 20 2 0 0 0 0 0 0
# 618 748558349139058688 unverified
	 depth:  ['272114651', '632372819']
	 src_user_count:  22
	 dst_user_count:  177
	 root_to_all_depth_sum:  217
	 root_to_all_depth_max:  2
	 one_hop_neighbors: 137
	 user count by hop(s):  137 40 0 0 0 0 0 0 0
# 619 544350712365207552 true
	 depth:  ['9235982', '487636923', '2248477764']
	 src_user_count:  19
	 dst_user_count:  132
	 root_to_all_depth_sum:  191
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 88
	 user count by hop(s):  88 29 15 0 0 0 0 0 0
# 620 688741019979001856 non-rumor
	 depth:  ['

# 654 614594259900080128 true
	 depth:  ['1917731', '630818269']
	 src_user_count:  21
	 dst_user_count:  168
	 root_to_all_depth_sum:  198
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 140
	 user count by hop(s):  140 26 2 0 0 0 0 0 0
# 655 614614133410033664 true
	 depth:  ['14173315', '1308116874', '1307998092']
	 src_user_count:  73
	 dst_user_count:  643
	 root_to_all_depth_sum:  777
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 523
	 user count by hop(s):  523 107 12 1 0 0 0 0 0
# 656 544278335455776769 true
	 depth:  ['380648579', '29018320', '551342421']
	 src_user_count:  28
	 dst_user_count:  212
	 root_to_all_depth_sum:  266
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 165
	 user count by hop(s):  165 41 5 1 0 0 0 0 0
# 657 665309822208729088 non-rumor
	 depth:  ['15108702', '835747213']
	 src_user_count:  36
	 dst_user_count:  259
	 root_to_all_depth_sum:  321
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 205
	 user count by hop(s):  205 46 8 0 0 0 0 0 0
# 658 

	 depth:  ['5402612', '187475837', '3022334712']
	 src_user_count:  72
	 dst_user_count:  1340
	 root_to_all_depth_sum:  1538
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 1155
	 user count by hop(s):  1155 175 7 3 0 0 0 0 0
# 689 723504069814444033 unverified
	 depth:  ['111556701', '392593149']
	 src_user_count:  19
	 dst_user_count:  213
	 root_to_all_depth_sum:  296
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 144
	 user count by hop(s):  144 57 10 2 0 0 0 0 0
# 690 553586860334010368 true
	 depth:  ['1299769218', '435777178', '2471375798']
	 src_user_count:  44
	 dst_user_count:  268
	 root_to_all_depth_sum:  374
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 178
	 user count by hop(s):  178 76 12 2 0 0 0 0 0
# 691 640118021101604864 unverified
	 depth:  ['20562637', '159320818']
	 src_user_count:  8
	 dst_user_count:  121
	 root_to_all_depth_sum:  139
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 108
	 user count by hop(s):  108 9 3 1 0 0 0 0 0
# 692 68875106150344294

# 724 614616994499788800 true
	 depth:  ['288215711', '866804630']
	 src_user_count:  27
	 dst_user_count:  251
	 root_to_all_depth_sum:  286
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 223
	 user count by hop(s):  223 22 5 1 0 0 0 0 0
# 725 693185853867294721 non-rumor
	 depth:  ['759251', '54400569', '735097672089149441']
	 src_user_count:  199
	 dst_user_count:  1145
	 root_to_all_depth_sum:  1709
	 root_to_all_depth_max:  7
	 one_hop_neighbors: 837
	 user count by hop(s):  837 190 53 22 19 18 6 0 0
# 726 553506608203169792 true
	 depth:  ['5402612', '535765349']
	 src_user_count:  32
	 dst_user_count:  561
	 root_to_all_depth_sum:  623
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 513
	 user count by hop(s):  513 37 8 3 0 0 0 0 0
# 727 751856580874960897 unverified
	 depth:  ['198286584', '573842268', '20479813', '3691110204']
	 src_user_count:  11
	 dst_user_count:  165
	 root_to_all_depth_sum:  334
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 56
	 user count by hop(s)

	 root_to_all_depth_max:  3
	 one_hop_neighbors: 2197
	 user count by hop(s):  2197 215 6 0 0 0 0 0 0
# 760 686666933949837312 non-rumor
	 depth:  ['788524', '79030269']
	 src_user_count:  97
	 dst_user_count:  667
	 root_to_all_depth_sum:  1241
	 root_to_all_depth_max:  7
	 one_hop_neighbors: 325
	 user count by hop(s):  325 197 90 36 8 2 8 0 0
# 761 767710042816602112 non-rumor
	 depth:  ['1367531', '4675424281']
	 src_user_count:  40
	 dst_user_count:  953
	 root_to_all_depth_sum:  1005
	 root_to_all_depth_max:  3
	 one_hop_neighbors: 906
	 user count by hop(s):  906 42 5 0 0 0 0 0 0
# 762 714811995573325828 unverified
	 depth:  ['20748745', '2306191482']
	 src_user_count:  9
	 dst_user_count:  125
	 root_to_all_depth_sum:  133
	 root_to_all_depth_max:  2
	 one_hop_neighbors: 117
	 user count by hop(s):  117 8 0 0 0 0 0 0 0
# 763 728101712762834944 unverified
	 depth:  ['2267865715', '724544443517263872']
	 src_user_count:  10
	 dst_user_count:  143
	 root_to_all_depth_sum:  151
	 r

	 depth:  ['16664681', '265705962', '1163873018']
	 src_user_count:  181
	 dst_user_count:  1045
	 root_to_all_depth_sum:  1787
	 root_to_all_depth_max:  6
	 one_hop_neighbors: 581
	 user count by hop(s):  581 309 54 82 16 3 0 0 0
# 799 552806757672964097 true
	 depth:  ['15754281', '14728168', '2479977919']
	 src_user_count:  29
	 dst_user_count:  258
	 root_to_all_depth_sum:  333
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 196
	 user count by hop(s):  196 50 11 1 0 0 0 0 0
# 800 693761289601060864 non-rumor
	 depth:  ['5402612', '132155424']
	 src_user_count:  49
	 dst_user_count:  934
	 root_to_all_depth_sum:  1264
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 624
	 user count by hop(s):  624 294 12 4 0 0 0 0 0
# 801 544350567183556608 true
	 depth:  ['25210650', '774393817', '318588826']
	 src_user_count:  14
	 dst_user_count:  118
	 root_to_all_depth_sum:  177
	 root_to_all_depth_max:  4
	 one_hop_neighbors: 64
	 user count by hop(s):  64 51 1 2 0 0 0 0 0
# 802 75882553548

In [5]:
analyzer.cascades_dict

{'498430783699554305': <__main__.Cascade at 0x7f57af77ad68>,
 '500378223977721856': <__main__.Cascade at 0x7f57adcc3080>,
 '524922729485848576': <__main__.Cascade at 0x7f57acb37860>,
 '524923462398513152': <__main__.Cascade at 0x7f57ad856ac8>,
 '524923676484177920': <__main__.Cascade at 0x7f57aa1d1d30>,
 '524925050739490816': <__main__.Cascade at 0x7f57a804f198>,
 '524925215235911680': <__main__.Cascade at 0x7f57ac839668>,
 '524925987239120897': <__main__.Cascade at 0x7f57aac5cd68>,
 '524926235030589440': <__main__.Cascade at 0x7f57a870e358>,
 '524931324763992064': <__main__.Cascade at 0x7f57ad36bd30>,
 '524932935137628160': <__main__.Cascade at 0x7f57aa3c9470>,
 '524935485370929152': <__main__.Cascade at 0x7f57a9deaa58>,
 '524936872666353664': <__main__.Cascade at 0x7f57ab1e0c88>,
 '524941132237910016': <__main__.Cascade at 0x7f57a7fa3f98>,
 '524942470472548352': <__main__.Cascade at 0x7f57ac0bc9e8>,
 '524943490887991296': <__main__.Cascade at 0x7f57ad46a7f0>,
 '524944399890124801': <

<module 'networkx' from '/home/jihochoi/.local/lib/python3.5/site-packages/networkx/__init__.py'>

In [15]:
G = analyzer.cascades_dict['498430783699554305'].network
G2 = analyzer.cascades_dict['673615400655970304'].network

In [16]:
np.mean(list(nx.betweenness_centrality(G).values()))  # average betweenness centrality

8.023025552008068e-06

In [17]:
np.mean(list(nx.betweenness_centrality(G2).values()))

2.771960397654025e-07

In [21]:
analyzer.cascades_dict['498430783699554305'].root_user_id

'24165761'

In [23]:
sum(nx.single_source_shortest_path_length(G, analyzer.cascades_dict['498430783699554305'].root_user_id).values()) / analyzer.cascades_dict['498430783699554305'].dst_user_count

1.4360902255639099

In [25]:
sum(nx.single_source_shortest_path_length(G2, analyzer.cascades_dict['673615400655970304'].root_user_id).values()) / analyzer.cascades_dict['673615400655970304'].dst_user_count

1.1038338658146964