In [1]:
import pandas as pd
import json
import csv
from  os import path

from datetime import datetime

import networkx as nx
import numpy as np

In [2]:
DATASET_BASE = "../fakenewsnet_dataset"
NEWS_PATH = f"{DATASET_BASE}/politifact/fake"

In [3]:
train_id_df = pd.read_csv("../exported/train_ids.csv", header = 0)
cv_id_df = pd.read_csv("../exported/cv_ids.csv", header = 0)
test_id_df = pd.read_csv("../exported/test_ids.csv", header = 0)

In [4]:
# convert month string to number
def month_str_to_num(month_string):
    month_dict = {
        "Jan": 1,
        "Feb": 2,
        "Mar": 3,
        "Apr": 4,
        "May": 5,
        "Jun": 6,
        "Jul": 7,
        "Aug": 8,
        "Sep": 9,
        "Oct": 10,
        "Nov": 11,
        "Dec": 12
    }

    if month_string in month_dict:
        return month_dict[month_string]
    else:
        return -1

def get_post_datetime(datatime_str):

    # example of date string: "Fri Dec 08 17:08:28 +0000 2017"

    datetime_slices = datatime_str.split(" ")

    year = int(datetime_slices[5])
    month = month_str_to_num(datetime_slices[1])
    assert month >= 1
    day = int(datetime_slices[2])
    hour = int(datetime_slices[3].split(":")[0])
    minute = int(datetime_slices[3].split(":")[1])
    second = int(datetime_slices[3].split(":")[2])

    return datetime(year, month, day, hour, minute, second)

def time_diff_in_minute(t1, t2):
    return (t2-t1).seconds / 60

### Trainset

In [5]:
network_h1_nodes = [] # list of json strings
network_h1_edges = [] # list of json strings
network_h2_nodes = [] # list of json strings
network_h2_edges = [] # list of json strings
network_h3_nodes = [] # list of json strings
network_h3_edges = [] # list of json strings
max_time_diffs = []
min_time_diffs = []
avg_time_diffs = []
max_degrees = []
min_degrees = []
avg_degrees = []
node_numbers = []
edge_numbers = []

for idx, row in train_id_df.iterrows():
    
    if idx % 1000 == 0:
        print(".", end="")
        
    network_h1 = nx.Graph()
    network_h2 = nx.Graph()
    network_h3 = nx.Graph()
    
    time_diffs = [0]
    
    with open(f"{NEWS_PATH}/{row['news_id']}/tweets/{row['source_tweet_id']}.json", "r") as f:
        t = json.loads(f.read())
        u = t['user']
        cascade_datetime = get_post_datetime(t['created_at'])
        
        feature_vector = [u['followers_count'],u['listed_count'],u['favourites_count'],u['statuses_count'],
                            u['protected'],u['geo_enabled'],u['verified'], 0]
        
        network_h1.add_nodes_from([(u['id'], { 'feature': feature_vector})])
        network_h2.add_nodes_from([(u['id'], { 'feature': feature_vector})])
        network_h3.add_nodes_from([(u['id'], { 'feature': feature_vector})])
        
        with open(f"{NEWS_PATH}/{row['news_id']}/retweets/{row['source_tweet_id']}.json", "r") as ff:
            rts = json.loads(ff.read())['retweets']
            
            for rt in rts:
                u = rt['user']
                
                time_diff = time_diff_in_minute(cascade_datetime, get_post_datetime(rt['created_at']))
                feature_vector = [u['followers_count'],u['listed_count'],u['favourites_count'],u['statuses_count'],
                                  u['protected'],u['geo_enabled'],u['verified'], time_diff]

                if time_diff <= 20:
                    network_h1.add_nodes_from([(u['id'], { 'feature': feature_vector})])
                if time_diff <= 40:
                    network_h2.add_nodes_from([(u['id'], { 'feature': feature_vector})])
                if time_diff <= 60:
                    network_h3.add_nodes_from([(u['id'], { 'feature': feature_vector})])
                else:
                    continue
                    
                time_diffs.append(time_diff)
                    
            # build edges for network 1, 2, 3
            for network in [network_h1, network_h2, network_h3]:
                nodes = list(network.nodes)
                for n in nodes:
                    
                    if not path.exists(f"{DATASET_BASE}/user_followers/{n}.json"):
                        print("not exists, skip")
                        continue
                    
                    with open(f"{DATASET_BASE}/user_followers/{n}.json", "r") as social_f:
                        followers = json.loads(social_f.read())['followers']
                        for follower in followers:
                            if (follower in nodes) and (follower != n):
                                network.add_edges_from([(n, follower)])
            
            # HOW I EXPORT graph INTO pd.Dataframe?
            # edges/nodes-> JSON -> replace double quotes with single quotes
            
            network_h1_nodes.append(json.dumps({n: network_h1.nodes.get(n)['feature'] for n in list(network_h1.nodes)}).replace('"', "'"))
            network_h1_edges.append(json.dumps( [list(e) for e in list(network_h1.edges)] ))
            network_h2_nodes.append(json.dumps({n: network_h2.nodes.get(n)['feature'] for n in list(network_h2.nodes)}).replace('"', "'"))
            network_h2_edges.append(json.dumps( [list(e) for e in list(network_h2.edges)] ))
            network_h3_nodes.append(json.dumps({n: network_h3.nodes.get(n)['feature'] for n in list(network_h3.nodes)}).replace('"', "'"))
            network_h3_edges.append(json.dumps( [list(e) for e in list(network_h3.edges)] ))
            
            time_diffs = np.array(time_diffs)
            max_time_diffs.append(time_diffs.max())
            min_time_diffs.append(time_diffs.min())
            avg_time_diffs.append(time_diffs.mean())
            
            degrees = np.array([d for (n, d) in list(network_h3.degree())])
            max_degrees.append(degrees.max())
            min_degrees.append(degrees.min())
            avg_degrees.append(degrees.mean())
            
            node_numbers.append(len(list(network_h3.nodes)))
            edge_numbers.append(len(list(network_h3.edges)))
            
train_df = train_id_df.copy()
train_df['max_deg'] = max_degrees
train_df['min_deg'] = min_degrees
train_df['avg_deg'] = avg_degrees
train_df['max_timediff'] = max_time_diffs
train_df['min_timediff'] = min_time_diffs
train_df['avg_timediff'] = avg_time_diffs
train_df['node_number'] = node_numbers
train_df['edge_number'] = edge_numbers
train_df['h1_nodes'] = network_h1_nodes
train_df['h1_edges'] = network_h1_edges
train_df['h2_nodes'] = network_h2_nodes
train_df['h2_edges'] = network_h2_edges
train_df['h3_nodes'] = network_h3_nodes
train_df['h3_edges'] = network_h3_edges

..............................................................................................

In [6]:
train_df = train_df[['news_id', 'source_tweet_id',
                     'max_deg', 'avg_deg', 'min_deg',
                    'max_timediff', 'min_timediff', 'avg_timediff',
                     'node_number', 'edge_number',
                    'cascade_size',
                    'h1_nodes','h1_edges', 'h2_nodes', 'h2_edges', 'h3_nodes', 'h3_edges']]
train_df

Unnamed: 0,news_id,source_tweet_id,max_deg,avg_deg,min_deg,max_timediff,min_timediff,avg_timediff,node_number,edge_number,cascade_size,h1_nodes,h1_edges,h2_nodes,h2_edges,h3_nodes,h3_edges
0,politifact14667,1013440971034517504,0,0.0,0,0.0,0.0,0.0,1,0,2,"{'1000783269464543235': [7, 0, 1121, 1266, fal...",[],"{'1000783269464543235': [7, 0, 1121, 1266, fal...",[],"{'1000783269464543235': [7, 0, 1121, 1266, fal...",[]
1,politifact14664,920361276244508672,0,0.0,0,0.0,0.0,0.0,1,0,1,"{'837786553': [567, 7, 31377, 37374, false, fa...",[],"{'837786553': [567, 7, 31377, 37374, false, fa...",[],"{'837786553': [567, 7, 31377, 37374, false, fa...",[]
2,politifact14548,1021032222873333760,0,0.0,0,0.0,0.0,0.0,1,0,1,"{'76992333': [2929, 236, 6644, 74696, false, f...",[],"{'76992333': [2929, 236, 6644, 74696, false, f...",[],"{'76992333': [2929, 236, 6644, 74696, false, f...",[]
3,politifact14548,1021447823487717377,0,0.0,0,0.0,0.0,0.0,1,0,1,"{'758751618175074304': [1776, 48, 196396, 1952...",[],"{'758751618175074304': [1776, 48, 196396, 1952...",[],"{'758751618175074304': [1776, 48, 196396, 1952...",[]
4,politifact13921,685818721856831488,0,0.0,0,0.0,0.0,0.0,1,0,1,"{'282923803': [218, 7, 0, 12995, false, true, ...",[],"{'282923803': [218, 7, 0, 12995, false, true, ...",[],"{'282923803': [218, 7, 0, 12995, false, true, ...",[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93627,politifact14856,940954228050325504,0,0.0,0,0.0,0.0,0.0,1,0,1,"{'4162014502': [175, 1, 20569, 13135, false, f...",[],"{'4162014502': [175, 1, 20569, 13135, false, f...",[],"{'4162014502': [175, 1, 20569, 13135, false, f...",[]
93628,politifact15287,929339306463940610,0,0.0,0,0.0,0.0,0.0,1,0,1,"{'287647703': [5483, 173, 1149, 20147, false, ...",[],"{'287647703': [5483, 173, 1149, 20147, false, ...",[],"{'287647703': [5483, 173, 1149, 20147, false, ...",[]
93629,politifact14548,1019470337392734209,0,0.0,0,0.0,0.0,0.0,1,0,1,"{'1363478880': [763, 12, 10506, 32427, false, ...",[],"{'1363478880': [763, 12, 10506, 32427, false, ...",[],"{'1363478880': [763, 12, 10506, 32427, false, ...",[]
93630,politifact13949,996273827490414592,0,0.0,0,0.0,0.0,0.0,1,0,1,"{'76583346': [156, 10, 1361, 6573, false, fals...",[],"{'76583346': [156, 10, 1361, 6573, false, fals...",[],"{'76583346': [156, 10, 1361, 6573, false, fals...",[]


In [7]:
train_df.to_csv("../exported/cascade_network_1h_train.csv", header = True, index = False, quoting = csv.QUOTE_NONNUMERIC, quotechar='"')

### Testset

In [8]:
network_h1_nodes = [] # list of json strings
network_h1_edges = [] # list of json strings
network_h2_nodes = [] # list of json strings
network_h2_edges = [] # list of json strings
network_h3_nodes = [] # list of json strings
network_h3_edges = [] # list of json strings
max_time_diffs = []
min_time_diffs = []
avg_time_diffs = []
max_degrees = []
min_degrees = []
avg_degrees = []
node_numbers = []
edge_numbers = []

for idx, row in test_id_df.iterrows():
    
    if idx % 1000 == 0:
        print(".", end="")
        
    network_h1 = nx.Graph()
    network_h2 = nx.Graph()
    network_h3 = nx.Graph()
    
    time_diffs = [0]
    
    with open(f"{NEWS_PATH}/{row['news_id']}/tweets/{row['source_tweet_id']}.json", "r") as f:
        t = json.loads(f.read())
        u = t['user']
        cascade_datetime = get_post_datetime(t['created_at'])
        
        feature_vector = [u['followers_count'],u['listed_count'],u['favourites_count'],u['statuses_count'],
                            u['protected'],u['geo_enabled'],u['verified'], 0]
        
        network_h1.add_nodes_from([(u['id'], { 'feature': feature_vector})])
        network_h2.add_nodes_from([(u['id'], { 'feature': feature_vector})])
        network_h3.add_nodes_from([(u['id'], { 'feature': feature_vector})])
        
        with open(f"{NEWS_PATH}/{row['news_id']}/retweets/{row['source_tweet_id']}.json", "r") as ff:
            rts = json.loads(ff.read())['retweets']
            
            for rt in rts:
                u = rt['user']
                
                time_diff = time_diff_in_minute(cascade_datetime, get_post_datetime(rt['created_at']))
                feature_vector = [u['followers_count'],u['listed_count'],u['favourites_count'],u['statuses_count'],
                                  u['protected'],u['geo_enabled'],u['verified'], time_diff]

                if time_diff <= 20:
                    network_h1.add_nodes_from([(u['id'], { 'feature': feature_vector})])
                if time_diff <= 40:
                    network_h2.add_nodes_from([(u['id'], { 'feature': feature_vector})])
                if time_diff <= 60:
                    network_h3.add_nodes_from([(u['id'], { 'feature': feature_vector})])
                else:
                    continue
                    
                time_diffs.append(time_diff)
                    
            # build edges for network 1, 2, 3
            for network in [network_h1, network_h2, network_h3]:
                nodes = list(network.nodes)
                for n in nodes:
                    
                    if not path.exists(f"{DATASET_BASE}/user_followers/{n}.json"):
                        print("not exists, skip")
                        continue
                    
                    with open(f"{DATASET_BASE}/user_followers/{n}.json", "r") as social_f:
                        followers = json.loads(social_f.read())['followers']
                        for follower in followers:
                            if (follower in nodes) and (follower != n):
                                network.add_edges_from([(n, follower)])
            
            # HOW I EXPORT graph INTO pd.Dataframe?
            # edges/nodes-> JSON -> replace double quotes with single quotes
            
            network_h1_nodes.append(json.dumps({n: network_h1.nodes.get(n)['feature'] for n in list(network_h1.nodes)}).replace('"', "'"))
            network_h1_edges.append(json.dumps( [list(e) for e in list(network_h1.edges)] ))
            network_h2_nodes.append(json.dumps({n: network_h2.nodes.get(n)['feature'] for n in list(network_h2.nodes)}).replace('"', "'"))
            network_h2_edges.append(json.dumps( [list(e) for e in list(network_h2.edges)] ))
            network_h3_nodes.append(json.dumps({n: network_h3.nodes.get(n)['feature'] for n in list(network_h3.nodes)}).replace('"', "'"))
            network_h3_edges.append(json.dumps( [list(e) for e in list(network_h3.edges)] ))
            
            time_diffs = np.array(time_diffs)
            max_time_diffs.append(time_diffs.max())
            min_time_diffs.append(time_diffs.min())
            avg_time_diffs.append(time_diffs.mean())
            
            degrees = np.array([d for (n, d) in list(network_h3.degree())])
            max_degrees.append(degrees.max())
            min_degrees.append(degrees.min())
            avg_degrees.append(degrees.mean())
            
            node_numbers.append(len(list(network_h3.nodes)))
            edge_numbers.append(len(list(network_h3.edges)))
            
test_df = test_id_df.copy()
test_df['max_deg'] = max_degrees
test_df['min_deg'] = min_degrees
test_df['avg_deg'] = avg_degrees
test_df['max_timediff'] = max_time_diffs
test_df['min_timediff'] = min_time_diffs
test_df['avg_timediff'] = avg_time_diffs
test_df['node_number'] = node_numbers
test_df['edge_number'] = edge_numbers
test_df['h1_nodes'] = network_h1_nodes
test_df['h1_edges'] = network_h1_edges
test_df['h2_nodes'] = network_h2_nodes
test_df['h2_edges'] = network_h2_edges
test_df['h3_nodes'] = network_h3_nodes
test_df['h3_edges'] = network_h3_edges

............

In [9]:
test_df = test_df[['news_id', 'source_tweet_id',
                     'max_deg', 'avg_deg', 'min_deg',
                    'max_timediff', 'min_timediff', 'avg_timediff',
                     'node_number', 'edge_number',
                    'cascade_size',
                    'h1_nodes','h1_edges', 'h2_nodes', 'h2_edges', 'h3_nodes', 'h3_edges']]
test_df

Unnamed: 0,news_id,source_tweet_id,max_deg,avg_deg,min_deg,max_timediff,min_timediff,avg_timediff,node_number,edge_number,cascade_size,h1_nodes,h1_edges,h2_nodes,h2_edges,h3_nodes,h3_edges
0,politifact13921,685229269136617472,0,0.000000,0,0.00,0.0,0.000000,1,0,1,"{'69552048': [177, 2, 1979, 8210, false, false...",[],"{'69552048': [177, 2, 1979, 8210, false, false...",[],"{'69552048': [177, 2, 1979, 8210, false, false...",[]
1,politifact14548,1019223856613978113,0,0.000000,0,0.00,0.0,0.000000,1,0,1,"{'749646362': [148, 1, 0, 3330, false, false, ...",[],"{'749646362': [148, 1, 0, 3330, false, false, ...",[],"{'749646362': [148, 1, 0, 3330, false, false, ...",[]
2,politifact15370,987013993645527040,0,0.000000,0,0.00,0.0,0.000000,1,0,1,"{'892337239753084928': [8, 1, 0, 24959, false,...",[],"{'892337239753084928': [8, 1, 0, 24959, false,...",[],"{'892337239753084928': [8, 1, 0, 24959, false,...",[]
3,politifact14286,879581268450529280,0,0.000000,0,0.00,0.0,0.000000,1,0,1,"{'154974214': [3434, 21, 20186, 37947, false, ...",[],"{'154974214': [3434, 21, 20186, 37947, false, ...",[],"{'154974214': [3434, 21, 20186, 37947, false, ...",[]
4,politifact13773,494350617737822208,0,0.000000,0,0.00,0.0,0.000000,1,0,1,"{'375005403': [211, 27, 512, 10590, false, tru...",[],"{'375005403': [211, 27, 512, 10590, false, tru...",[],"{'375005403': [211, 27, 512, 10590, false, tru...",[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11699,politifact14548,1021015199719948293,0,0.000000,0,0.00,0.0,0.000000,1,0,1,"{'865046794866106369': [6, 0, 1250, 9048, fals...",[],"{'865046794866106369': [6, 0, 1250, 9048, fals...",[],"{'865046794866106369': [6, 0, 1250, 9048, fals...",[]
11700,politifact15539,993194259737075712,0,0.000000,0,0.00,0.0,0.000000,1,0,1,"{'2947475305': [237, 9, 13539, 12954, false, f...",[],"{'2947475305': [237, 9, 13539, 12954, false, f...",[],"{'2947475305': [237, 9, 13539, 12954, false, f...",[]
11701,politifact14548,1007344139929677824,0,0.000000,0,0.00,0.0,0.000000,1,0,1,"{'4697374993': [1191, 11, 24726, 11235, false,...",[],"{'4697374993': [1191, 11, 24726, 11235, false,...",[],"{'4697374993': [1191, 11, 24726, 11235, false,...",[]
11702,politifact14667,1012449631941414914,1,0.666667,0,16.75,0.0,9.338889,3,1,3,"{'842566427794137088': [268, 0, 9560, 2186, fa...","[[385735929, 26382985]]","{'842566427794137088': [268, 0, 9560, 2186, fa...","[[385735929, 26382985]]","{'842566427794137088': [268, 0, 9560, 2186, fa...","[[385735929, 26382985]]"


In [10]:
test_df.to_csv("../exported/cascade_network_1h_test.csv", header = True, index = False, quoting = csv.QUOTE_NONNUMERIC, quotechar='"')

### validation set

In [11]:
network_h1_nodes = [] # list of json strings
network_h1_edges = [] # list of json strings
network_h2_nodes = [] # list of json strings
network_h2_edges = [] # list of json strings
network_h3_nodes = [] # list of json strings
network_h3_edges = [] # list of json strings
max_time_diffs = []
min_time_diffs = []
avg_time_diffs = []
max_degrees = []
min_degrees = []
avg_degrees = []
node_numbers = []
edge_numbers = []

for idx, row in cv_id_df.iterrows():
    
    if idx % 1000 == 0:
        print(".", end="")
        
    network_h1 = nx.Graph()
    network_h2 = nx.Graph()
    network_h3 = nx.Graph()
    
    time_diffs = [0]
    
    with open(f"{NEWS_PATH}/{row['news_id']}/tweets/{row['source_tweet_id']}.json", "r") as f:
        t = json.loads(f.read())
        u = t['user']
        cascade_datetime = get_post_datetime(t['created_at'])
        
        feature_vector = [u['followers_count'],u['listed_count'],u['favourites_count'],u['statuses_count'],
                            u['protected'],u['geo_enabled'],u['verified'], 0]
        
        network_h1.add_nodes_from([(u['id'], { 'feature': feature_vector})])
        network_h2.add_nodes_from([(u['id'], { 'feature': feature_vector})])
        network_h3.add_nodes_from([(u['id'], { 'feature': feature_vector})])
        
        with open(f"{NEWS_PATH}/{row['news_id']}/retweets/{row['source_tweet_id']}.json", "r") as ff:
            rts = json.loads(ff.read())['retweets']
            
            for rt in rts:
                u = rt['user']
                
                time_diff = time_diff_in_minute(cascade_datetime, get_post_datetime(rt['created_at']))
                feature_vector = [u['followers_count'],u['listed_count'],u['favourites_count'],u['statuses_count'],
                                  u['protected'],u['geo_enabled'],u['verified'], time_diff]

                if time_diff <= 20:
                    network_h1.add_nodes_from([(u['id'], { 'feature': feature_vector})])
                if time_diff <= 40:
                    network_h2.add_nodes_from([(u['id'], { 'feature': feature_vector})])
                if time_diff <= 60:
                    network_h3.add_nodes_from([(u['id'], { 'feature': feature_vector})])
                else:
                    continue
                    
                time_diffs.append(time_diff)
                    
            # build edges for network 1, 2, 3
            for network in [network_h1, network_h2, network_h3]:
                nodes = list(network.nodes)
                for n in nodes:
                    
                    if not path.exists(f"{DATASET_BASE}/user_followers/{n}.json"):
                        print("not exists, skip")
                        continue
                    
                    with open(f"{DATASET_BASE}/user_followers/{n}.json", "r") as social_f:
                        followers = json.loads(social_f.read())['followers']
                        for follower in followers:
                            if (follower in nodes) and (follower != n):
                                network.add_edges_from([(n, follower)])
            
            # HOW I EXPORT graph INTO pd.Dataframe?
            # edges/nodes-> JSON -> replace double quotes with single quotes
            
            network_h1_nodes.append(json.dumps({n: network_h1.nodes.get(n)['feature'] for n in list(network_h1.nodes)}).replace('"', "'"))
            network_h1_edges.append(json.dumps( [list(e) for e in list(network_h1.edges)] ))
            network_h2_nodes.append(json.dumps({n: network_h2.nodes.get(n)['feature'] for n in list(network_h2.nodes)}).replace('"', "'"))
            network_h2_edges.append(json.dumps( [list(e) for e in list(network_h2.edges)] ))
            network_h3_nodes.append(json.dumps({n: network_h3.nodes.get(n)['feature'] for n in list(network_h3.nodes)}).replace('"', "'"))
            network_h3_edges.append(json.dumps( [list(e) for e in list(network_h3.edges)] ))
            
            time_diffs = np.array(time_diffs)
            max_time_diffs.append(time_diffs.max())
            min_time_diffs.append(time_diffs.min())
            avg_time_diffs.append(time_diffs.mean())
            
            degrees = np.array([d for (n, d) in list(network_h3.degree())])
            max_degrees.append(degrees.max())
            min_degrees.append(degrees.min())
            avg_degrees.append(degrees.mean())
            
            node_numbers.append(len(list(network_h3.nodes)))
            edge_numbers.append(len(list(network_h3.edges)))
            
cv_df = cv_id_df.copy()
cv_df['max_deg'] = max_degrees
cv_df['min_deg'] = min_degrees
cv_df['avg_deg'] = avg_degrees
cv_df['max_timediff'] = max_time_diffs
cv_df['min_timediff'] = min_time_diffs
cv_df['avg_timediff'] = avg_time_diffs
cv_df['node_number'] = node_numbers
cv_df['edge_number'] = edge_numbers
cv_df['h1_nodes'] = network_h1_nodes
cv_df['h1_edges'] = network_h1_edges
cv_df['h2_nodes'] = network_h2_nodes
cv_df['h2_edges'] = network_h2_edges
cv_df['h3_nodes'] = network_h3_nodes
cv_df['h3_edges'] = network_h3_edges

............

In [12]:
cv_df = cv_df[['news_id', 'source_tweet_id',
                     'max_deg', 'avg_deg', 'min_deg',
                    'max_timediff', 'min_timediff', 'avg_timediff',
                     'node_number', 'edge_number',
                    'cascade_size',
                    'h1_nodes','h1_edges', 'h2_nodes', 'h2_edges', 'h3_nodes', 'h3_edges']]
cv_df

Unnamed: 0,news_id,source_tweet_id,max_deg,avg_deg,min_deg,max_timediff,min_timediff,avg_timediff,node_number,edge_number,cascade_size,h1_nodes,h1_edges,h2_nodes,h2_edges,h3_nodes,h3_edges
0,politifact14043,834714940992737280,0,0.0,0,0.0,0.0,0.0,1,0,1,"{'2364510636': [197, 4, 36, 16131, false, true...",[],"{'2364510636': [197, 4, 36, 16131, false, true...",[],"{'2364510636': [197, 4, 36, 16131, false, true...",[]
1,politifact14548,1056988855251783680,0,0.0,0,0.0,0.0,0.0,1,0,1,"{'2428580940': [790, 16, 967, 5685, false, tru...",[],"{'2428580940': [790, 16, 967, 5685, false, tru...",[],"{'2428580940': [790, 16, 967, 5685, false, tru...",[]
2,politifact14213,900122582153822209,0,0.0,0,0.0,0.0,0.0,1,0,1,"{'1513358730': [179, 8, 1139, 20532, false, tr...",[],"{'1513358730': [179, 8, 1139, 20532, false, tr...",[],"{'1513358730': [179, 8, 1139, 20532, false, tr...",[]
3,politifact15129,941488985532260352,0,0.0,0,0.0,0.0,0.0,1,0,1,"{'24626122': [4210, 45, 5352, 99945, false, fa...",[],"{'24626122': [4210, 45, 5352, 99945, false, fa...",[],"{'24626122': [4210, 45, 5352, 99945, false, fa...",[]
4,politifact15573,1067956794583072768,0,0.0,0,0.0,0.0,0.0,1,0,1,"{'789917878552322049': [2246, 4, 257496, 29312...",[],"{'789917878552322049': [2246, 4, 257496, 29312...",[],"{'789917878552322049': [2246, 4, 257496, 29312...",[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11699,politifact15604,804527146685394944,0,0.0,0,0.0,0.0,0.0,1,0,1,"{'2458246014': [28, 0, 5, 10762, false, false,...",[],"{'2458246014': [28, 0, 5, 10762, false, false,...",[],"{'2458246014': [28, 0, 5, 10762, false, false,...",[]
11700,politifact14548,991007918068649992,0,0.0,0,0.0,0.0,0.0,1,0,2,"{'708878708': [4596, 44, 39217, 34111, false, ...",[],"{'708878708': [4596, 44, 39217, 34111, false, ...",[],"{'708878708': [4596, 44, 39217, 34111, false, ...",[]
11701,politifact14213,900216889032495104,0,0.0,0,0.0,0.0,0.0,1,0,1,"{'597725883': [170, 11, 83, 4671, false, true,...",[],"{'597725883': [170, 11, 83, 4671, false, true,...",[],"{'597725883': [170, 11, 83, 4671, false, true,...",[]
11702,politifact15262,983676873166282752,0,0.0,0,0.0,0.0,0.0,1,0,1,"{'21451833': [2032, 19, 1882, 2697, false, tru...",[],"{'21451833': [2032, 19, 1882, 2697, false, tru...",[],"{'21451833': [2032, 19, 1882, 2697, false, tru...",[]


In [13]:
cv_df.to_csv("../exported/cascade_network_1h_cv.csv", header = True, index = False, quoting = csv.QUOTE_NONNUMERIC, quotechar='"')