In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

%matplotlib inline

Since we want to focus on one particular event, we don't need to work on the complete dataset. We download the tweets from the cluster to be able to work on them remotely.

In [5]:
# Download the dataset from Hadoop

import glob
from os.path import basename, splitext
import requests as rx
from urllib.request import urlretrieve
from time import sleep

swiss_tweet_url = 'http://iccluster060.iccluster.epfl.ch:50070/webhdfs/v1/datasets/swiss-tweet'
operation_list = '?op=LISTSTATUS'
operation_open = '?op=OPEN'

req = rx.get(swiss_tweet_url + operation_list)
if req.status_code != 200:
    raise Exception("Failed to load list of files")

remote_swiss_tweet_files = set(map(lambda f: f['pathSuffix'], req.json()['FileStatuses']['FileStatus']))
local_swiss_tweet_files = set(map(lambda f: basename(f), glob.glob('data/harvest3r_twitter_data_*.json')))

missing_swiss_tweet_files = remote_swiss_tweet_files - local_swiss_tweet_files
missing_index, missing_count = 0, len(missing_swiss_tweet_files)

if missing_count == 0:
    print("Your dataset is complete, nothing to download!")

for swiss_tweet_file in missing_swiss_tweet_files:
    missing_index += 1
    print("Downloading {} ({} of {} files)".format(swiss_tweet_file, missing_index, missing_count))
    
    frm = swiss_tweet_url + '/' + swiss_tweet_file + operation_open
    to = 'data/' + swiss_tweet_file
    
    urlretrieve(frm, to)
    sleep(1)

ConnectionError: HTTPConnectionPool(host='iccluster060.iccluster.epfl.ch', port=50070): Max retries exceeded with url: /webhdfs/v1/datasets/swiss-tweet?op=LISTSTATUS (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x7f546359d0f0>: Failed to establish a new connection: [Errno -2] Name or service not known',))

In [24]:
i = 0
datasets = glob.glob('data/harvest3r_twitter_data_*.json')
for datapath in datasets:
    i += 1
    print("Working on file {} of {}: {}".format(i, len(datasets), datapath))
    tweets = pd.DataFrame( pd.read_json(datapath)._source.tolist() )
    tagged_tweets = tweets.dropna(subset=['tags'])
    print("Amount of tagged tweets found: {}".format(tagged_tweets.size))
    taggedpath = 'data/tagged_' + splitext(basename(datapath))[0] + '.json'
    print("Tagged tweets written to: {}".format(taggedpath))
    tagged_tweets.to_json(taggedpath)

Working on file 1 of 1: data/harvest3r_twitter_data_24-02_0.json
Amount of tagged tweets found: 462970
Tagged tweets written to: data/tagged_harvest3r_twitter_data_24-02_0.json


In [29]:
i = 0
datasets = glob.glob('data/tagged_harvest3r_twitter_data_*.json')
for datapath in datasets:
    i += 1
    print("Working on file {} of {}: {}".format(i, len(datasets), datapath))
    tweets = pd.read_json(datapath)
    tweets['tags_str'] = tweets.tags.apply(lambda t: ':'.join(t))
    chvotes = tweets.select(lambda t: tweets.loc[t].tags_str.lower().find('chvote') != -1)
    print("Amount of #chvote found: {} / {}".format(chvotes.size, tweets.size))
    chvotepath = datapath.replace('tagged', 'chvote')
    print("#chvote written to: {}".format(chvotepath))
    chvotes.to_json(chvotepath)

Working on file 1 of 302: data/tagged_harvest3r_twitter_data_14-08_0.json
Amount of #chvote found: 445 / 1179428
#chvote written to: data/chvote_harvest3r_twitter_data_14-08_0.json
Working on file 2 of 302: data/tagged_harvest3r_twitter_data_23-02_0.json
Amount of #chvote found: 1020 / 459952
#chvote written to: data/chvote_harvest3r_twitter_data_23-02_0.json
Working on file 3 of 302: data/tagged_harvest3r_twitter_data_07-09_0.json
Amount of #chvote found: 2232 / 1709340
#chvote written to: data/chvote_harvest3r_twitter_data_07-09_0.json
Working on file 4 of 302: data/tagged_harvest3r_twitter_data_18-08_0.json
Amount of #chvote found: 930 / 1549287
#chvote written to: data/chvote_harvest3r_twitter_data_18-08_0.json
Working on file 5 of 302: data/tagged_harvest3r_twitter_data_15-10_0.json
Amount of #chvote found: 282 / 1126590
#chvote written to: data/chvote_harvest3r_twitter_data_15-10_0.json
Working on file 6 of 302: data/tagged_harvest3r_twitter_data_01-09_0.json
Amount of #chvote fo

Amount of #chvote found: 744 / 1542870
#chvote written to: data/chvote_harvest3r_twitter_data_17-08_0.json
Working on file 47 of 302: data/tagged_harvest3r_twitter_data_20-02_0.json
Amount of #chvote found: 204 / 378964
#chvote written to: data/chvote_harvest3r_twitter_data_20-02_0.json
Working on file 48 of 302: data/tagged_harvest3r_twitter_data_30-06_0.json
Amount of #chvote found: 0 / 373248
#chvote written to: data/chvote_harvest3r_twitter_data_30-06_0.json
Working on file 49 of 302: data/tagged_harvest3r_twitter_data_06-08_0.json
Amount of #chvote found: 0 / 1004454
#chvote written to: data/chvote_harvest3r_twitter_data_06-08_0.json
Working on file 50 of 302: data/tagged_harvest3r_twitter_data_20-05_0.json
Amount of #chvote found: 2400 / 601200
#chvote written to: data/chvote_harvest3r_twitter_data_20-05_0.json
Working on file 51 of 302: data/tagged_harvest3r_twitter_data_05-06_0.json
Amount of #chvote found: 23545 / 561000
#chvote written to: data/chvote_harvest3r_twitter_data_0

Amount of #chvote found: 255 / 534055
#chvote written to: data/chvote_harvest3r_twitter_data_25-06_0.json
Working on file 93 of 302: data/tagged_harvest3r_twitter_data_16-06_0.json
Amount of #chvote found: 162 / 613575
#chvote written to: data/chvote_harvest3r_twitter_data_16-06_0.json
Working on file 94 of 302: data/tagged_harvest3r_twitter_data_29-01_0.json
Amount of #chvote found: 744 / 495380
#chvote written to: data/chvote_harvest3r_twitter_data_29-01_0.json
Working on file 95 of 302: data/tagged_harvest3r_twitter_data_18-07_0.json
Amount of #chvote found: 0 / 868907
#chvote written to: data/chvote_harvest3r_twitter_data_18-07_0.json
Working on file 96 of 302: data/tagged_harvest3r_twitter_data_05-05_0.json
Amount of #chvote found: 949 / 433547
#chvote written to: data/chvote_harvest3r_twitter_data_05-05_0.json
Working on file 97 of 302: data/tagged_harvest3r_twitter_data_21-02_0.json
Amount of #chvote found: 544 / 347684
#chvote written to: data/chvote_harvest3r_twitter_data_21-0

Amount of #chvote found: 1360 / 470356
#chvote written to: data/chvote_harvest3r_twitter_data_29-02_0.json
Working on file 139 of 302: data/tagged_harvest3r_twitter_data_04-10_0.json
Amount of #chvote found: 744 / 1513854
#chvote written to: data/chvote_harvest3r_twitter_data_04-10_0.json
Working on file 140 of 302: data/tagged_harvest3r_twitter_data_07-10_0.json
Amount of #chvote found: 846 / 1597342
#chvote written to: data/chvote_harvest3r_twitter_data_07-10_0.json
Working on file 141 of 302: data/tagged_harvest3r_twitter_data_05-07_0.json
Amount of #chvote found: 0 / 358263
#chvote written to: data/chvote_harvest3r_twitter_data_05-07_0.json
Working on file 142 of 302: data/tagged_harvest3r_twitter_data_21-05_0.json
Amount of #chvote found: 480 / 384400
#chvote written to: data/chvote_harvest3r_twitter_data_21-05_0.json
Working on file 143 of 302: data/tagged_harvest3r_twitter_data_25-08_0.json
Amount of #chvote found: 465 / 1584441
#chvote written to: data/chvote_harvest3r_twitter_

Amount of #chvote found: 372 / 1153851
#chvote written to: data/chvote_harvest3r_twitter_data_21-08_0.json
Working on file 184 of 302: data/tagged_harvest3r_twitter_data_19-02_0.json
Amount of #chvote found: 952 / 493272
#chvote written to: data/chvote_harvest3r_twitter_data_19-02_0.json
Working on file 185 of 302: data/tagged_harvest3r_twitter_data_17-03_0.json
Amount of #chvote found: 71 / 516028
#chvote written to: data/chvote_harvest3r_twitter_data_17-03_0.json
Working on file 186 of 302: data/tagged_harvest3r_twitter_data_27-02_0.json
Amount of #chvote found: 1088 / 326400
#chvote written to: data/chvote_harvest3r_twitter_data_27-02_0.json
Working on file 187 of 302: data/tagged_harvest3r_twitter_data_18-04_0.json
Amount of #chvote found: 71 / 388512
#chvote written to: data/chvote_harvest3r_twitter_data_18-04_0.json
Working on file 188 of 302: data/tagged_harvest3r_twitter_data_27-01_0.json
Amount of #chvote found: 310 / 530844
#chvote written to: data/chvote_harvest3r_twitter_da

Amount of #chvote found: 170 / 483480
#chvote written to: data/chvote_harvest3r_twitter_data_12-06_0.json
Working on file 229 of 302: data/tagged_harvest3r_twitter_data_15-02_0.json
Amount of #chvote found: 938 / 422033
#chvote written to: data/chvote_harvest3r_twitter_data_15-02_0.json
Working on file 230 of 302: data/tagged_harvest3r_twitter_data_01-08_0.json
Amount of #chvote found: 0 / 843542
#chvote written to: data/chvote_harvest3r_twitter_data_01-08_0.json
Working on file 231 of 302: data/tagged_harvest3r_twitter_data_29-05_0.json
Amount of #chvote found: 510 / 400690
#chvote written to: data/chvote_harvest3r_twitter_data_29-05_0.json
Working on file 232 of 302: data/tagged_harvest3r_twitter_data_23-04_0.json
Amount of #chvote found: 216 / 290880
#chvote written to: data/chvote_harvest3r_twitter_data_23-04_0.json
Working on file 233 of 302: data/tagged_harvest3r_twitter_data_19-09_0.json
Amount of #chvote found: 1488 / 1530501
#chvote written to: data/chvote_harvest3r_twitter_da

Amount of #chvote found: 567 / 470925
#chvote written to: data/chvote_harvest3r_twitter_data_03-02_0.json
Working on file 274 of 302: data/tagged_harvest3r_twitter_data_11-01_0.json
Amount of #chvote found: 63 / 175518
#chvote written to: data/chvote_harvest3r_twitter_data_11-01_0.json
Working on file 275 of 302: data/tagged_harvest3r_twitter_data_05-08_0.json
Amount of #chvote found: 0 / 1230158
#chvote written to: data/chvote_harvest3r_twitter_data_05-08_0.json
Working on file 276 of 302: data/tagged_harvest3r_twitter_data_12-01_0.json
Amount of #chvote found: 504 / 467901
#chvote written to: data/chvote_harvest3r_twitter_data_12-01_0.json
Working on file 277 of 302: data/tagged_harvest3r_twitter_data_27-07_0.json
Amount of #chvote found: 93 / 1003470
#chvote written to: data/chvote_harvest3r_twitter_data_27-07_0.json
Working on file 278 of 302: data/tagged_harvest3r_twitter_data_28-04_0.json
Amount of #chvote found: 511 / 554435
#chvote written to: data/chvote_harvest3r_twitter_data

In [52]:
i = 0
chvote_tweets = pd.DataFrame()
datasets = glob.glob('data/chvote_harvest3r_twitter_data_*.json')
for datapath in datasets:
    i += 1
    print("Working on file {} of {}: {}".format(i, len(datasets), datapath))
    tweets = pd.read_json(datapath)
    print("Amount of tweets with #chvote", )
    chvote_tweets = chvote_tweets.append(tweets, ignore_index=True)

Working on file 1 of 302: data/chvote_harvest3r_twitter_data_09-08_0.json
Amount of tweets with #chvote
Working on file 2 of 302: data/chvote_harvest3r_twitter_data_13-06_0.json
Amount of tweets with #chvote
Working on file 3 of 302: data/chvote_harvest3r_twitter_data_30-05_0.json
Amount of tweets with #chvote
Working on file 4 of 302: data/chvote_harvest3r_twitter_data_23-04_0.json
Amount of tweets with #chvote
Working on file 5 of 302: data/chvote_harvest3r_twitter_data_15-03_0.json
Amount of tweets with #chvote
Working on file 6 of 302: data/chvote_harvest3r_twitter_data_05-08_0.json
Amount of tweets with #chvote
Working on file 7 of 302: data/chvote_harvest3r_twitter_data_01-07_0.json
Amount of tweets with #chvote
Working on file 8 of 302: data/chvote_harvest3r_twitter_data_02-06_0.json
Amount of tweets with #chvote
Working on file 9 of 302: data/chvote_harvest3r_twitter_data_21-04_0.json
Amount of tweets with #chvote
Working on file 10 of 302: data/chvote_harvest3r_twitter_data_31

Amount of tweets with #chvote
Working on file 81 of 302: data/chvote_harvest3r_twitter_data_19-05_0.json
Amount of tweets with #chvote
Working on file 82 of 302: data/chvote_harvest3r_twitter_data_24-03_0.json
Amount of tweets with #chvote
Working on file 83 of 302: data/chvote_harvest3r_twitter_data_11-05_0.json
Amount of tweets with #chvote
Working on file 84 of 302: data/chvote_harvest3r_twitter_data_20-09_0.json
Amount of tweets with #chvote
Working on file 85 of 302: data/chvote_harvest3r_twitter_data_21-07_0.json
Amount of tweets with #chvote
Working on file 86 of 302: data/chvote_harvest3r_twitter_data_13-10_0.json
Amount of tweets with #chvote
Working on file 87 of 302: data/chvote_harvest3r_twitter_data_16-03_0.json
Amount of tweets with #chvote
Working on file 88 of 302: data/chvote_harvest3r_twitter_data_08-04_0.json
Amount of tweets with #chvote
Working on file 89 of 302: data/chvote_harvest3r_twitter_data_21-10_0.json
Amount of tweets with #chvote
Working on file 90 of 302

Working on file 159 of 302: data/chvote_harvest3r_twitter_data_30-01_0.json
Amount of tweets with #chvote
Working on file 160 of 302: data/chvote_harvest3r_twitter_data_17-07_0.json
Amount of tweets with #chvote
Working on file 161 of 302: data/chvote_harvest3r_twitter_data_27-02_0.json
Amount of tweets with #chvote
Working on file 162 of 302: data/chvote_harvest3r_twitter_data_20-03_0.json
Amount of tweets with #chvote
Working on file 163 of 302: data/chvote_harvest3r_twitter_data_04-03_0.json
Amount of tweets with #chvote
Working on file 164 of 302: data/chvote_harvest3r_twitter_data_04-08_0.json
Amount of tweets with #chvote
Working on file 165 of 302: data/chvote_harvest3r_twitter_data_13-09_0.json
Amount of tweets with #chvote
Working on file 166 of 302: data/chvote_harvest3r_twitter_data_12-01_0.json
Amount of tweets with #chvote
Working on file 167 of 302: data/chvote_harvest3r_twitter_data_02-04_0.json
Amount of tweets with #chvote
Working on file 168 of 302: data/chvote_harves

Amount of tweets with #chvote
Working on file 239 of 302: data/chvote_harvest3r_twitter_data_07-02_0.json
Amount of tweets with #chvote
Working on file 240 of 302: data/chvote_harvest3r_twitter_data_11-07_0.json
Amount of tweets with #chvote
Working on file 241 of 302: data/chvote_harvest3r_twitter_data_19-03_0.json
Amount of tweets with #chvote
Working on file 242 of 302: data/chvote_harvest3r_twitter_data_26-04_0.json
Amount of tweets with #chvote
Working on file 243 of 302: data/chvote_harvest3r_twitter_data_22-05_0.json
Amount of tweets with #chvote
Working on file 244 of 302: data/chvote_harvest3r_twitter_data_25-07_0.json
Amount of tweets with #chvote
Working on file 245 of 302: data/chvote_harvest3r_twitter_data_12-09_0.json
Amount of tweets with #chvote
Working on file 246 of 302: data/chvote_harvest3r_twitter_data_16-10_0.json
Amount of tweets with #chvote
Working on file 247 of 302: data/chvote_harvest3r_twitter_data_14-08_0.json
Amount of tweets with #chvote
Working on file 

In [54]:
chvote_tags = list()
for tags in chvote_tweets.tags:
    chvote_tags += [t.lower() for t in tags]

chvote_tags = pd.Series(chvote_tags)
chvote_tags.value_counts().head(10)

chvote_tweets.to_json('data/tweets-chvote.json')

In [44]:
i = 0
tags = ['abst16', 'avsplus', 'lrens', 'rbi']
tweets_by_tag = {}
for t in tags:
    tweets_by_tag[t] = pd.DataFrame()

datasets = glob.glob('data/tagged_harvest3r_twitter_data_*.json')
for datapath in datasets:
    i += 1
    print("Working on file {} of {}: {}".format(i, len(datasets), datapath))
    tweets = pd.read_json(datapath)
    tweets['tags_str'] = tweets.tags.apply(lambda t: ':'.join(t))
    for tag in tags:
        tweets_found = tweets.select(lambda t: tweets.loc[t].tags_str.lower().find(tag) != -1)
        tweets_by_tag[tag] = tweets_by_tag[tag].append(tweets_found, ignore_index=True)
        print("Amount of #{} found: {} / {}".format(tag, tweets_found.size, tweets.size))
    #chvotepath = datapath.replace('tagged', 'chvote')
    #print("#chvote written to: {}".format(chvotepath))
    #chvotes.to_json(chvotepath)

Working on file 1 of 302: data/tagged_harvest3r_twitter_data_14-08_0.json
Amount of #abst16 found: 0 / 1179428
Amount of #avsplus found: 0 / 1179428
Amount of #lrens found: 89 / 1179428
Amount of #rbi found: 1424 / 1179428
Working on file 2 of 302: data/tagged_harvest3r_twitter_data_23-02_0.json
Amount of #abst16 found: 884 / 459952
Amount of #avsplus found: 0 / 459952
Amount of #lrens found: 0 / 459952
Amount of #rbi found: 340 / 459952
Working on file 3 of 302: data/tagged_harvest3r_twitter_data_07-09_0.json
Amount of #abst16 found: 930 / 1709340
Amount of #avsplus found: 93 / 1709340
Amount of #lrens found: 2790 / 1709340
Amount of #rbi found: 1581 / 1709340
Working on file 4 of 302: data/tagged_harvest3r_twitter_data_18-08_0.json
Amount of #abst16 found: 279 / 1549287
Amount of #avsplus found: 372 / 1549287
Amount of #lrens found: 279 / 1549287
Amount of #rbi found: 2697 / 1549287
Working on file 5 of 302: data/tagged_harvest3r_twitter_data_15-10_0.json
Amount of #abst16 found: 188

Amount of #abst16 found: 63 / 131355
Amount of #avsplus found: 0 / 131355
Amount of #lrens found: 0 / 131355
Amount of #rbi found: 63 / 131355
Working on file 39 of 302: data/tagged_harvest3r_twitter_data_18-05_0.json
Amount of #abst16 found: 640 / 747920
Amount of #avsplus found: 0 / 747920
Amount of #lrens found: 0 / 747920
Amount of #rbi found: 800 / 747920
Working on file 40 of 302: data/tagged_harvest3r_twitter_data_08-08_0.json
Amount of #abst16 found: 0 / 1410561
Amount of #avsplus found: 0 / 1410561
Amount of #lrens found: 0 / 1410561
Amount of #rbi found: 1424 / 1410561
Working on file 41 of 302: data/tagged_harvest3r_twitter_data_27-10_0.json
Amount of #abst16 found: 1140 / 2652780
Amount of #avsplus found: 0 / 2652780
Amount of #lrens found: 0 / 2652780
Amount of #rbi found: 4845 / 2652780
Working on file 42 of 302: data/tagged_harvest3r_twitter_data_14-07_0.json
Amount of #abst16 found: 0 / 442288
Amount of #avsplus found: 0 / 442288
Amount of #lrens found: 0 / 442288
Amoun

Amount of #abst16 found: 868 / 462396
Amount of #avsplus found: 0 / 462396
Amount of #lrens found: 0 / 462396
Amount of #rbi found: 496 / 462396
Working on file 76 of 302: data/tagged_harvest3r_twitter_data_28-09_0.json
Amount of #abst16 found: 186 / 1638381
Amount of #avsplus found: 93 / 1638381
Amount of #lrens found: 93 / 1638381
Amount of #rbi found: 1302 / 1638381
Working on file 77 of 302: data/tagged_harvest3r_twitter_data_28-05_0.json
Amount of #abst16 found: 595 / 421770
Amount of #avsplus found: 0 / 421770
Amount of #lrens found: 0 / 421770
Amount of #rbi found: 765 / 421770
Working on file 78 of 302: data/tagged_harvest3r_twitter_data_31-08_0.json
Amount of #abst16 found: 1302 / 1622943
Amount of #avsplus found: 1488 / 1622943
Amount of #lrens found: 186 / 1622943
Amount of #rbi found: 837 / 1622943
Working on file 79 of 302: data/tagged_harvest3r_twitter_data_07-03_0.json
Amount of #abst16 found: 68 / 447508
Amount of #avsplus found: 0 / 447508
Amount of #lrens found: 0 / 4

Amount of #abst16 found: 0 / 513576
Amount of #avsplus found: 0 / 513576
Amount of #lrens found: 0 / 513576
Amount of #rbi found: 288 / 513576
Working on file 113 of 302: data/tagged_harvest3r_twitter_data_04-09_0.json
Amount of #abst16 found: 1116 / 1131810
Amount of #avsplus found: 0 / 1131810
Amount of #lrens found: 93 / 1131810
Amount of #rbi found: 2883 / 1131810
Working on file 114 of 302: data/tagged_harvest3r_twitter_data_18-02_0.json
Amount of #abst16 found: 938 / 465918
Amount of #avsplus found: 0 / 465918
Amount of #lrens found: 0 / 465918
Amount of #rbi found: 536 / 465918
Working on file 115 of 302: data/tagged_harvest3r_twitter_data_09-02_0.json
Amount of #abst16 found: 576 / 483712
Amount of #avsplus found: 0 / 483712
Amount of #lrens found: 0 / 483712
Amount of #rbi found: 448 / 483712
Working on file 116 of 302: data/tagged_harvest3r_twitter_data_10-02_0.json
Amount of #abst16 found: 704 / 387968
Amount of #avsplus found: 0 / 387968
Amount of #lrens found: 0 / 387968
A

Amount of #abst16 found: 285 / 1614145
Amount of #avsplus found: 0 / 1614145
Amount of #lrens found: 95 / 1614145
Amount of #rbi found: 2280 / 1614145
Working on file 150 of 302: data/tagged_harvest3r_twitter_data_31-03_0.json
Amount of #abst16 found: 0 / 452128
Amount of #avsplus found: 0 / 452128
Amount of #lrens found: 0 / 452128
Amount of #rbi found: 568 / 452128
Working on file 151 of 302: data/tagged_harvest3r_twitter_data_10-08_0.json
Amount of #abst16 found: 0 / 1534805
Amount of #avsplus found: 0 / 1534805
Amount of #lrens found: 0 / 1534805
Amount of #rbi found: 1424 / 1534805
Working on file 152 of 302: data/tagged_harvest3r_twitter_data_06-01_0.json
Amount of #abst16 found: 63 / 328923
Amount of #avsplus found: 0 / 328923
Amount of #lrens found: 0 / 328923
Amount of #rbi found: 630 / 328923
Working on file 153 of 302: data/tagged_harvest3r_twitter_data_20-08_0.json
Amount of #abst16 found: 930 / 1216440
Amount of #avsplus found: 465 / 1216440
Amount of #lrens found: 186 / 1

Amount of #abst16 found: 1428 / 326400
Amount of #avsplus found: 0 / 326400
Amount of #lrens found: 0 / 326400
Amount of #rbi found: 884 / 326400
Working on file 187 of 302: data/tagged_harvest3r_twitter_data_18-04_0.json
Amount of #abst16 found: 71 / 388512
Amount of #avsplus found: 0 / 388512
Amount of #lrens found: 0 / 388512
Amount of #rbi found: 426 / 388512
Working on file 188 of 302: data/tagged_harvest3r_twitter_data_27-01_0.json
Amount of #abst16 found: 558 / 530844
Amount of #avsplus found: 0 / 530844
Amount of #lrens found: 0 / 530844
Amount of #rbi found: 186 / 530844
Working on file 189 of 302: data/tagged_harvest3r_twitter_data_07-06_0.json
Amount of #abst16 found: 405 / 577125
Amount of #avsplus found: 0 / 577125
Amount of #lrens found: 0 / 577125
Amount of #rbi found: 810 / 577125
Working on file 190 of 302: data/tagged_harvest3r_twitter_data_31-05_0.json
Amount of #abst16 found: 1020 / 582250
Amount of #avsplus found: 0 / 582250
Amount of #lrens found: 85 / 582250
Amou

Amount of #abst16 found: 0 / 455949
Amount of #avsplus found: 0 / 455949
Amount of #lrens found: 0 / 455949
Amount of #rbi found: 162 / 455949
Working on file 224 of 302: data/tagged_harvest3r_twitter_data_23-01_0.json
Amount of #abst16 found: 124 / 211110
Amount of #avsplus found: 0 / 211110
Amount of #lrens found: 0 / 211110
Amount of #rbi found: 434 / 211110
Working on file 225 of 302: data/tagged_harvest3r_twitter_data_27-09_0.json
Amount of #abst16 found: 558 / 1648146
Amount of #avsplus found: 93 / 1648146
Amount of #lrens found: 279 / 1648146
Amount of #rbi found: 1581 / 1648146
Working on file 226 of 302: data/tagged_harvest3r_twitter_data_06-03_0.json
Amount of #abst16 found: 68 / 330004
Amount of #avsplus found: 0 / 330004
Amount of #lrens found: 0 / 330004
Amount of #rbi found: 680 / 330004
Working on file 227 of 302: data/tagged_harvest3r_twitter_data_29-09_0.json
Amount of #abst16 found: 0 / 1656981
Amount of #avsplus found: 0 / 1656981
Amount of #lrens found: 279 / 165698

Amount of #abst16 found: 760 / 2400935
Amount of #avsplus found: 0 / 2400935
Amount of #lrens found: 0 / 2400935
Amount of #rbi found: 2850 / 2400935
Working on file 261 of 302: data/tagged_harvest3r_twitter_data_12-07_0.json
Amount of #abst16 found: 0 / 408496
Amount of #avsplus found: 0 / 408496
Amount of #lrens found: 0 / 408496
Amount of #rbi found: 440 / 408496
Working on file 262 of 302: data/tagged_harvest3r_twitter_data_02-08_0.json
Amount of #abst16 found: 0 / 739857
Amount of #avsplus found: 0 / 739857
Amount of #lrens found: 0 / 739857
Amount of #rbi found: 623 / 739857
Working on file 263 of 302: data/tagged_harvest3r_twitter_data_03-06_0.json
Amount of #abst16 found: 510 / 647530
Amount of #avsplus found: 0 / 647530
Amount of #lrens found: 0 / 647530
Amount of #rbi found: 1190 / 647530
Working on file 264 of 302: data/tagged_harvest3r_twitter_data_04-02_0.json
Amount of #abst16 found: 448 / 146944
Amount of #avsplus found: 0 / 146944
Amount of #lrens found: 0 / 146944
Amou

Amount of #abst16 found: 0 / 440792
Amount of #avsplus found: 0 / 440792
Amount of #lrens found: 0 / 440792
Amount of #rbi found: 440 / 440792
Working on file 298 of 302: data/tagged_harvest3r_twitter_data_12-04_0.json
Amount of #abst16 found: 284 / 526536
Amount of #avsplus found: 0 / 526536
Amount of #lrens found: 0 / 526536
Amount of #rbi found: 1065 / 526536
Working on file 299 of 302: data/tagged_harvest3r_twitter_data_24-07_0.json
Amount of #abst16 found: 0 / 1041972
Amount of #avsplus found: 0 / 1041972
Amount of #lrens found: 0 / 1041972
Amount of #rbi found: 2325 / 1041972
Working on file 300 of 302: data/tagged_harvest3r_twitter_data_29-03_0.json
Amount of #abst16 found: 142 / 478398
Amount of #avsplus found: 0 / 478398
Amount of #lrens found: 142 / 478398
Amount of #rbi found: 568 / 478398
Working on file 301 of 302: data/tagged_harvest3r_twitter_data_02-04_0.json
Amount of #abst16 found: 426 / 350243
Amount of #avsplus found: 0 / 350243
Amount of #lrens found: 0 / 350243
Am

In [51]:
for tag in tweets_by_tag:
    tagpath = "data/tweets-{}.json".format(tag)
    print("Writting {} tweets to {}".format(tweets_by_tag[tag].size, tagpath))
    tweets_by_tag[tag].to_json(tagpath)

Writting 352735 tweets to data/tweets-abst16.json
Writting 33915 tweets to data/tweets-avsplus.json
Writting 49020 tweets to data/tweets-lrens.json
Writting 379715 tweets to data/tweets-rbi.json
