In [1]:
from timeit import default_timer as timer
import itertools
import os
import sys
import uuid
from glob import glob
from pathlib import Path
import json
import tweepy
import numpy as np
import pandas as pd
import multiprocessing as mp
import psutil
import socket
from functools import partial
import pyarrow.parquet as pq

# Params

In [2]:
cutoff = 1000
print('Save Data After Downloading',cutoff,'Timelines')

Save Data After Downloading 1000 Timelines


In [3]:
def get_env_var(varname,default):
    
    if os.environ.get(varname) != None:
        var = int(os.environ.get(varname))
        print(varname,':', var)
    else:
        var = default
        print(varname,':', var,'(Default)')
    return var

# Choose Number of Nodes To Distribute Credentials: e.g. jobarray=0-4, cpu_per_task=20, credentials = 90 (<100)
SLURM_JOB_ID            = get_env_var('SLURM_JOB_ID',0)
SLURM_ARRAY_TASK_ID     = get_env_var('SLURM_ARRAY_TASK_ID',0)
SLURM_ARRAY_TASK_COUNT  = get_env_var('SLURM_ARRAY_TASK_COUNT',1)
SLURM_JOB_CPUS_PER_NODE = get_env_var('SLURM_JOB_CPUS_PER_NODE',mp.cpu_count())

SLURM_JOB_ID : 0 (Default)
SLURM_ARRAY_TASK_ID : 0 (Default)
SLURM_ARRAY_TASK_COUNT : 1 (Default)
SLURM_JOB_CPUS_PER_NODE : 16 (Default)


In [4]:
path_to_data='../data'
path_to_users_ids = os.path.join(path_to_data + '/users')
path_to_users_old = os.path.join(path_to_data,'timelines','API','all_users_ids')
path_to_users_recent = os.path.join(path_to_data,'timelines','API','most_recent_id')
path_to_keys = os.path.join('../keys')
path_to_timelines = os.path.join(path_to_data,'timelines','API','IDF-updates')
os.makedirs(path_to_timelines, exist_ok=True)
print(path_to_users_recent)
print(path_to_keys)
print(path_to_timelines)

../data/timelines/API/most_recent_id
../keys
../data/timelines/API/IDF-updates


# Credentials

In [5]:
def get_key_files(SLURM_ARRAY_TASK_ID,SLURM_ARRAY_TASK_COUNT,SLURM_JOB_CPUS_PER_NODE):

    # Randomize set of key files using constant seed
    np.random.seed(0)
    all_key_files = list(np.random.permutation(glob(os.path.join(path_to_keys,'key*'))))
   # all_key_files.reverse()
    auth_file = np.random.permutation(glob(os.path.join(path_to_keys,'auth*')))
    
    # Split file list by node
    key_files = np.array_split(all_key_files,SLURM_ARRAY_TASK_COUNT)[SLURM_ARRAY_TASK_ID]
    
    # Check that node has more CPU than key file 
    if len(key_files) <= SLURM_JOB_CPUS_PER_NODE:
        print('# Credentials Allocated To Node:', len(key_files)) 
    else:
        print('Check environment variables:')
        print('# Credentials (',len(key_files),') > # CPU (', SLURM_JOB_CPUS_PER_NODE,')')
        print('Only keeping', SLURM_JOB_CPUS_PER_NODE, 'credentials')
        key_files = key_files[:SLURM_JOB_CPUS_PER_NODE]
        
    return key_files, auth_file

key_files, auth_file = get_key_files(SLURM_ARRAY_TASK_ID,SLURM_ARRAY_TASK_COUNT,SLURM_JOB_CPUS_PER_NODE)
print('\n'.join(key_files))
print('\n'.join(auth_file))

# Credentials Allocated To Node: 15
../keys/key_cyril.json
../keys/key_noemie.json
../keys/key_othmane.json
../keys/key_sam.json
../keys/key_liubov.json
../keys/key_naila.json
../keys/key_jihanne.json
../keys/key_chakresh.json
../keys/key_charlotte.json
../keys/key_youssr.json
../keys/key_felix.json
../keys/key_marc.json
../keys/key_clemence.json
../keys/key_naila2.json
../keys/key_zoe.json
../keys/auth_naila.json


In [6]:
def get_auth(key_file):
    
    # Import Auth keys
    for auth_file in glob(os.path.join(path_to_keys,'auth*')) :
        with open (auth_file) as f:
            auth_key = json.load(f)
    
    # Import token pairs
    with open(key_file) as f:
        key = json.load(f)

    # OAuth process, using the keys and tokens
    auth = tweepy.OAuthHandler(auth_key['consumer_key'], auth_key['consumer_secret'])
    auth.set_access_token(key['access_token'], key['access_token_secret'])

    # Creation of the actual interface, using authentication
    api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
    
    try:
        api.verify_credentials()
        print(key_file,": Authentication checked")
    except:
        print(key_file,": error during authentication")
        sys.exit('Exit')
    
    return auth, api

In [7]:
for file in key_files:
    get_auth(file)

../keys/key_cyril.json : Authentication checked
../keys/key_noemie.json : Authentication checked
../keys/key_othmane.json : Authentication checked
../keys/key_sam.json : Authentication checked
../keys/key_liubov.json : Authentication checked
../keys/key_naila.json : Authentication checked
../keys/key_jihanne.json : Authentication checked
../keys/key_chakresh.json : Authentication checked
../keys/key_charlotte.json : Authentication checked
../keys/key_youssr.json : Authentication checked
../keys/key_felix.json : Authentication checked
../keys/key_marc.json : Authentication checked
../keys/key_clemence.json : Authentication checked
../keys/key_naila2.json : Authentication checked
../keys/key_zoe.json : Authentication checked


# Users list

In [8]:
start = timer()
print('Select Users...')
users_old=pq.ParquetDataset([x for x in Path(path_to_users_old).glob("**/*.parquet")]).read().to_pandas()
users_recent=pq.ParquetDataset([x for x in Path(path_to_users_recent).glob("**/*.parquet")]).read().to_pandas()
#users=pq.ParquetDataset(glob(os.path.join(path_to_users,'*.parquet'))).read().to_pandas()

users=pd.concat([users_old, users_recent])
users=users.sort_values(by=['user_id','created_at'], ascending=True)
users=users.drop_duplicates(subset='user_id', keep='last')

# Randomize users
users=users.sample(frac=1,random_state=0)

print('# Users :', len(users)) 

end = timer()
print('Computing Time:', round(end - start), 'sec')

Select Users...
# Users : 30651
Computing Time: 1 sec


In [9]:
start = timer()
print('Remove users whose timeline were successfully downloaded...')

def get_success():
    
    if not os.path.exists(os.path.join(path_to_timelines, 'success')):
        return set()
    else:
        success = set()
        with open(os.path.join(path_to_timelines, 'success'), 'r', encoding='utf-8') as file:
            for line in file:
                success.add(line.strip('\n').split('\t')[0])
        return set(success)

success=get_success()
print('# downloaded timelines:', len(success))

users=users[-users.user_id.isin(success)].copy()

end = timer()
print('Computing Time:', round(end - start), 'sec')

Remove users whose timeline were successfully downloaded...
# downloaded timelines: 0
Computing Time: 0 sec


## Get timelines

In [10]:
def get_timeline(user_id,tweet_id,api):
    
    timeline = []
    error = None
    
    # Collect All Statuses in Timeline
    try:
        cursor = tweepy.Cursor(
        api.user_timeline, 
        user_id=user_id, 
        since_id=tweet_id,
        count=3200,
        tweet_mode="extended", 
        include_rts=True).items()
        
        for status in cursor:
            timeline.append(status._json)
     
    except tweepy.error.TweepError as e:
        error = str(e)
        
    timeline = pd.DataFrame(timeline, columns = ['id_str','user','full_text','created_at','lang','place'])
    timeline['user_id'] = timeline['user'].apply(lambda x: x.get('id_str'))
    timeline['user_name'] = timeline['user'].apply(lambda x: x.get('name'))
    timeline['country_code'] = timeline.loc[~timeline['place'].isna(), 'place'].apply(lambda x: x.get('country_code'))
    timeline['bounding_box'] = timeline.loc[~timeline['place'].isna(), 'place'].apply(lambda x: x.get('bounding_box'))
    #timeline['coordinates'] = timeline.loc[~timeline['bouding_box'].isna(), 'bouding_box'].apply(lambda x: x.get('coordinates'))
    timeline['city'] = timeline.loc[~timeline['place'].isna(), 'place'].apply(lambda x: x.get('full_name'))
    
    return timeline[['id_str','user_id','user_name','full_text','created_at','lang','country_code',
                     'city','bounding_box']], error


#timeline = get_timeline('12','1266367509055209473',get_auth(key_files[0])[1])

In [11]:
timeline = get_timeline('2606411695','1267575252898459653',get_auth(key_files[0])[1])

../keys/key_cyril.json : Authentication checked


In [12]:
timeline

(                 id_str     user_id                        user_name  \
 0   1288050186502242304  2606411695  Nicolas Marguerie Photographies   
 1   1283462230814711809  2606411695  Nicolas Marguerie Photographies   
 2   1283461388053209093  2606411695  Nicolas Marguerie Photographies   
 3   1280923643049119744  2606411695  Nicolas Marguerie Photographies   
 4   1280866923631304705  2606411695  Nicolas Marguerie Photographies   
 5   1280802757029625856  2606411695  Nicolas Marguerie Photographies   
 6   1280800442243563520  2606411695  Nicolas Marguerie Photographies   
 7   1280633383375290368  2606411695  Nicolas Marguerie Photographies   
 8   1278613949395668992  2606411695  Nicolas Marguerie Photographies   
 9   1278354085046075393  2606411695  Nicolas Marguerie Photographies   
 10  1278079343944765441  2606411695  Nicolas Marguerie Photographies   
 11  1278026876947255296  2606411695  Nicolas Marguerie Photographies   
 12  1277649623386140673  2606411695  Nicolas Margu

In [13]:
users

Unnamed: 0,id_str,created_at,full_text,lang,user_id,user_name,country_code,city,coordinates,bounding_box_type
43305,1271069157346459649,2020-06-11 13:17:57,RT @liamgallagher: Pints of lager c’mon Boris ...,en,283204345,Torabisu Sukotto,,,,
44557,1267184323251273728,2020-05-31 20:01:00,@LaurRin_ @CAgiusILD @DenoyelleW @WrestleMania...,und,563365714,dick riviere,FR,"Paris, France","[[[2.2241006, 48.8155214], [2.4699053, 48.8155...",Polygon
8409,1273016822665818112,2020-06-16 22:17:16,Mercredi de bonheur \n#BLACK_LIVES_MATTER http...,fr,883826000261459968,mack ten,FR,"Dammarie-les-Lys, France","[[[2.5848515, 48.4946347], [2.6551137, 48.4946...",Polygon
44893,1270604929992925184,2020-06-10 06:33:16,RT @spopow9: ALORS COMME ÇA QUAND C’EST UN REB...,fr,736811823429746688,shikamaru,,,,
33494,1266145402530693121,2020-05-28 23:12:42,"RT @Harvard: Today, Harvard awarded a total of...",en,202501528,Harvard Club France,,,,
...,...,...,...,...,...,...,...,...,...,...
6725,1285870131030167552,2020-07-22 09:31:44,RT @berarddemalavas: Mise à jour du FAQ de @Tr...,fr,240194563,Cgt Interforum Editis Informer-Alerter-Mobiliser,,,,
7740,1278791674362200066,2020-07-02 20:44:28,🤍 \n\n(🔜 #Calogero @calogerofficiel),und,401703279,cyrille sauzéat,,,,
6203,1285188824084160514,2020-07-20 12:24:27,RT @05_Arslanbekov: Hahahha ca m’tue à la fin ...,fr,1606669932,Aldina,,,,
6352,1285518247484366849,2020-07-21 10:13:28,RT @FloLecointre: Accueil #presse @VisitCoteda...,fr,1872787320,Nathalie Dalmasso,,,,


In [14]:
def download_timelines(index_key):

    # Create Access For Block of Users
    api = get_auth(key_files[index_key])[1]    
    
    # Select Block of Users
    users_block = np.array_split(users,len(key_files))[index_key][['user_id','id_str']].values.tolist()
    
    # Initialize Output File ID
    output_id = str(uuid.uuid4())
    
    # Initialize DataFrame
    timelines = pd.DataFrame()
    
    # Initialize Downloaded User List
    downloaded_ids = []
    
    for (user_id,tweet_id) in users_block:
        
        # Try Downloading Timeline
        timeline, error = get_timeline(user_id,tweet_id,api)
        
        if error!=None:
            print(user_id,index_key,error)
            continue
            
        # Append
        else:
            #print('Success : ' + str(user_id))
            timelines = pd.concat([timelines, timeline],sort=False)
            downloaded_ids.append(user_id)
            
        # Save after <cutoff> timelines or when reaching last user
        if len(downloaded_ids) == cutoff or user_id == users_block[-1][0]:
            
            filename = 'timelines-'+ str(SLURM_JOB_ID)+'-'+ str(SLURM_ARRAY_TASK_ID)+'-'+ str(index_key)+'-'+ \
            str(len(downloaded_ids))+'-'+ output_id+'.json.bz2'
            
            print('Process', index_key, 'saving', len(downloaded_ids), 'timelines with output file:', 
            os.path.join(path_to_timelines,filename))
            
            # Save as list of dict discarding index
            timelines.to_json(
            os.path.join(path_to_timelines,filename),
            orient='records',
            force_ascii=False,
            date_format=None,
            double_precision=15,
            compression='bz2')
            #print(timelines)
            
            # Save User Id and File In Which Its Timeline Was Saved
            with open(os.path.join(path_to_timelines,'success'), 'a', encoding='utf-8') as file:
                for downloaded_id in downloaded_ids:
                    file.write(downloaded_id+'\t'+filename+'\n')
            
            # Reset Output File ID, Data, and Downloaded Users
            del timelines, downloaded_ids
            output_id = str(uuid.uuid4())
            timelines = pd.DataFrame()
            downloaded_ids = []
            
    return 0

In [15]:
print('Extract Timelines...\n')
with mp.Pool() as pool:
    pool.map(download_timelines, range(len(key_files)))

Extract Timelines...

../keys/key_cyril.json../keys/key_jihanne.json ../keys/key_naila.json../keys/key_felix.json ../keys/key_naila2.json ../keys/key_charlotte.json../keys/key_clemence.json ../keys/key_noemie.json ../keys/key_chakresh.json../keys/key_sam.json../keys/key_marc.json../keys/key_liubov.json../keys/key_youssr.json: Authentication checked  : Authentication checked : Authentication checked   : Authentication checked
: Authentication checked
 : Authentication checked: Authentication checked: Authentication checked: Authentication checked
: Authentication checked
../keys/key_othmane.json
: Authentication checked


../keys/key_zoe.json  
 : Authentication checked

: Authentication checked

: Authentication checked: Authentication checked

1655477510 13 283204345 Twitter error response: status code = 4010 
Twitter error response: status code = 401
41245718 14 Twitter error response: status code = 401
936211104442675200 11 Twitter error response: status code = 401
1160526756 10 Twi

Rate limit reached. Sleeping for: 595


Twitter error response: status code = 401


Rate limit reached. Sleeping for: 595


2901606771 9 Twitter error response: status code = 401
1057765927557779456 10 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 586


4221425063 0 Twitter error response: status code = 401
1104486701827874816 13 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 579


3013741770 11 Twitter error response: status code = 401
1460048581 11 Twitter error response: status code = 401
351576537 11 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 574
Rate limit reached. Sleeping for: 573
Rate limit reached. Sleeping for: 572
Rate limit reached. Sleeping for: 573
Rate limit reached. Sleeping for: 571
Rate limit reached. Sleeping for: 570
Rate limit reached. Sleeping for: 569
Rate limit reached. Sleeping for: 570
Rate limit reached. Sleeping for: 569
Rate limit reached. Sleeping for: 547
Rate limit reached. Sleeping for: 547


1237471237145755651 6 Twitter error response: status code = 401
3194780793 13 Twitter error response: status code = 401
100982618 1 Twitter error response: status code = 404
1272143452105572355 8 Twitter error response: status code = 404
2323752151 1 Twitter error response: status code = 404
23763540 11 Twitter error response: status code = 401
900059501121896448 11 Twitter error response: status code = 401
1012954787527254016 12 Twitter error response: status code = 401
4060935423 13 Twitter error response: status code = 401
1248424313495183360 8 Twitter error response: status code = 401
1200577423726829569 8 Twitter error response: status code = 401
3119430146 10 Twitter error response: status code = 401
437794899 11 Twitter error response: status code = 401
4822212105 1 Twitter error response: status code = 401
1198328387535867908 10 Twitter error response: status code = 401
940633419322667008 8 Twitter error response: status code = 404
1263406283748761600 6 Twitter error response: 

Rate limit reached. Sleeping for: 594


1008171770132787200 7 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 592


1045643334 5 Twitter error response: status code = 404
894112386 5 Twitter error response: status code = 401
36441822 2 Twitter error response: status code = 404
4311840815 9 Twitter error response: status code = 401
909649840291946496 5 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 563


1248762093186289665 5 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 578
Rate limit reached. Sleeping for: 576
Rate limit reached. Sleeping for: 574


1251403032396533763 10 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 573
Rate limit reached. Sleeping for: 573


1236762059070672896 7 Twitter error response: status code = 401
1240050715743092742 13 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 564
Rate limit reached. Sleeping for: 564


2902740718 12 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 561


1246975653305626627 5 Twitter error response: status code = 404
1246827226772819970 5 Twitter error response: status code = 404


Rate limit reached. Sleeping for: 554
Rate limit reached. Sleeping for: 552
Rate limit reached. Sleeping for: 550
Rate limit reached. Sleeping for: 551


1166984932092272640 0 Twitter error response: status code = 401
1248275146680475649 12 Twitter error response: status code = 401
1149049613195915265 10 Twitter error response: status code = 401
1212071758850711552 12 Twitter error response: status code = 404
1112732344627183617 7 Twitter error response: status code = 401
2573687288 7 Twitter error response: status code = 404
2527041571 10 Twitter error response: status code = 401
2626063733 3 Twitter error response: status code = 401
840361855 10 Twitter error response: status code = 401
1069179475907231744 8 Twitter error response: status code = 401
1181650425403072515 12 Twitter error response: status code = 401
522592822 0 Twitter error response: status code = 401
2846746601 8 Twitter error response: status code = 401
1254900350370426882 2 Twitter error response: status code = 401
29437029 9 Twitter error response: status code = 401
1193912498191249408 6 Twitter error response: status code = 401
223122926 0 Twitter error response: s

1009596380 13 Twitter error response: status code = 401
2609141204 11 Twitter error response: status code = 401
419514199 7 Twitter error response: status code = 401
186846903 12 Twitter error response: status code = 404
1193918875978665984 10 Twitter error response: status code = 401
594661639 4 Twitter error response: status code = 401
1037309265952878593 0 Twitter error response: status code = 401
773505551431036928 10 Twitter error response: status code = 401
Process 8 saving 1000 timelines with output file: ../data/timelines/API/IDF-updates/timelines-0-0-8-1000-12b42030-fcfe-4902-af52-b1c45d2187b7.json.bz2
546989279 11 Twitter error response: status code = 401
736495843826618368 6 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 596


323569200 6 Twitter error response: status code = 401
3048186688 1 Twitter error response: status code = 404
1233916421735878656 13 Twitter error response: status code = 401
853738822223499265 7 Twitter error response: status code = 401
152187748 5 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 605


241334345 3 Twitter error response: status code = 401
1105836355 3 Twitter error response: status code = 401
60581096 12 Twitter error response: status code = 401
484076037 14 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 597
Rate limit reached. Sleeping for: 598


1138479951861223425 8 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 594
Rate limit reached. Sleeping for: 594
Rate limit reached. Sleeping for: 591
Rate limit reached. Sleeping for: 591
Rate limit reached. Sleeping for: 590
Rate limit reached. Sleeping for: 589
Rate limit reached. Sleeping for: 589
Rate limit reached. Sleeping for: 587
Rate limit reached. Sleeping for: 586


1260598115901595655 4 Twitter error response: status code = 401
1150670131 9 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 580
Rate limit reached. Sleeping for: 573


472182239 0 Twitter error response: status code = 401
396913758 12 Twitter error response: status code = 401
Process 3 saving 1000 timelines with output file: ../data/timelines/API/IDF-updates/timelines-0-0-3-1000-b9ea9775-a635-4e7e-b50f-d7332f4fef7a.json.bz2
Process 11 saving 1000 timelines with output file: ../data/timelines/API/IDF-updates/timelines-0-0-11-1000-bf00ffd4-384e-48a9-9f3c-e81a011d02b5.json.bz2
2971602471 4 Twitter error response: status code = 401
Process 10 saving 1000 timelines with output file: ../data/timelines/API/IDF-updates/timelines-0-0-10-1000-4a508363-72d9-4ade-a215-ec534e24922a.json.bz2
2359480949 7 Twitter error response: status code = 401
4004282741 13 Twitter error response: status code = 401
1121087268750528515 6 Twitter error response: status code = 404
1193552311777411072 2 Twitter error response: status code = 401
Process 2 saving 1000 timelines with output file: ../data/timelines/API/IDF-updates/timelines-0-0-2-1000-a42e0daf-b2e5-4f10-b84c-5a507e7e068

1081292229845680136 13 Twitter error response: status code = 401
401963396 13 Twitter error response: status code = 401
1067580255173570560 1 Twitter error response: status code = 404
1189918510476079106 14 Twitter error response: status code = 404
1011035544846036992 6 Twitter error response: status code = 401
1268298300832309251 14 Twitter error response: status code = 401
131279398 0 Twitter error response: status code = 401
1268854735290994688 14 Twitter error response: status code = 401
1126446962440536064 7 Twitter error response: status code = 401
1258338138449227776 6 Twitter error response: status code = 401
1235271000016683008 1 Twitter error response: status code = 404
823148219714650112 3 Twitter error response: status code = 401
1270370873518764033 0 Twitter error response: status code = 401
275024282 13 Twitter error response: status code = 401
66681594 2 Twitter error response: status code = 401
52139273 13 Twitter error response: status code = 401
809423262421659649 4 T

Rate limit reached. Sleeping for: 635
Rate limit reached. Sleeping for: 634


2840495013 13 Twitter error response: status code = 401
3964622182 13 Twitter error response: status code = 401
1005554802 2 Twitter error response: status code = 401
1006172549556908033 8 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 625


731587830753972224 6 Twitter error response: status code = 401
229533414 5 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 623
Rate limit reached. Sleeping for: 622
Rate limit reached. Sleeping for: 621


1214659119405314049 5 Twitter error response: status code = 404
572435780 12 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 599


41618935 7 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 617


1216869875660009472 4 Twitter error response: status code = 401
1026879828 3 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 615


1209192029445611522 5 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 612


1172234366472396807 9 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 610


714197502979923968 5 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 606
Rate limit reached. Sleeping for: 606
Rate limit reached. Sleeping for: 601


1129872328484163585 4 Twitter error response: status code = 404


Rate limit reached. Sleeping for: 591


1114996953786540032 5 Twitter error response: status code = 404
998196644461019138 2 Twitter error response: status code = 401
2962637189 12 Twitter error response: status code = 401
913105190215196673 1 Twitter error response: status code = 401
1058155359569874944 3 Twitter error response: status code = 401
3381430900 5 Twitter error response: status code = 401
1690427119 9 Twitter error response: status code = 401
705105914731360256 13 Twitter error response: status code = 404
1264922893986693120 3 Twitter error response: status code = 401
264407436 0 Twitter error response: status code = 404
1211585050812149761 1 Twitter error response: status code = 401
1242839618740989955 3 Twitter error response: status code = 401
1082508280197509120 12 Twitter error response: status code = 404
240426570 11 Twitter error response: status code = 401
1207354990811058176 2 Twitter error response: status code = 401
364182634 10 Twitter error response: status code = 401
1216050586493358081 13 Twitter 

Rate limit reached. Sleeping for: 639


840237627769049090 11 Twitter error response: status code = 401
1180208140182274049 1 Twitter error response: status code = 404
3131026925 12 Twitter error response: status code = 401
599530464 2 Twitter error response: status code = 401
1255531341963288579 9 Twitter error response: status code = 401
1154555500698185729 7 Twitter error response: status code = 401
2464720091 1 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 638
Rate limit reached. Sleeping for: 638
Rate limit reached. Sleeping for: 637


1236596643602083840 3 Twitter error response: status code = 404


Rate limit reached. Sleeping for: 632


1175044767606018048 3 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 631
Rate limit reached. Sleeping for: 629


1238981012153159680 2 Twitter error response: status code = 401


Rate limit reached. Sleeping for: 623
Rate limit reached. Sleeping for: 621
Rate limit reached. Sleeping for: 617
Rate limit reached. Sleeping for: 617
Rate limit reached. Sleeping for: 616
Rate limit reached. Sleeping for: 617
Rate limit reached. Sleeping for: 607
Rate limit reached. Sleeping for: 603


464763642 10 Twitter error response: status code = 401
960095245278109696 6 Twitter error response: status code = 401
1272136182999252992 6 Twitter error response: status code = 404
1249482629851648001 1 Twitter error response: status code = 401
1245457024093167617 6 Twitter error response: status code = 404
1242039574697836544 12 Twitter error response: status code = 401
1269887786607403009 12 Twitter error response: status code = 401
1416148094 11 Twitter error response: status code = 401
3579555262 7 Twitter error response: status code = 401
1197596642788216833 12 Twitter error response: status code = 401
4243263257 12 Twitter error response: status code = 401
776167533674172416 13 Twitter error response: status code = 401
134962843 2 Twitter error response: status code = 401
121114618 9 Twitter error response: status code = 401
1220121876468510720 4 Twitter error response: status code = 401
1247220976821075974 8 Twitter error response: status code = 401
1223415484315553793 2 Twitte

Process 13 saving 990 timelines with output file: ../data/timelines/API/IDF-updates/timelines-0-0-13-990-921a0650-fff5-4375-b633-246c879fcd6c.json.bz2
Process 1 saving 1000 timelines with output file: ../data/timelines/API/IDF-updates/timelines-0-0-1-1000-cb33a656-7e7b-4aca-ba39-8b61db05486b.json.bz2
Process 7 saving 991 timelines with output file: ../data/timelines/API/IDF-updates/timelines-0-0-7-991-332092ad-b8a1-4af1-9abe-c7a26cef0aef.json.bz2
3749396362 4 Twitter error response: status code = 401
Process 1 saving 3 timelines with output file: ../data/timelines/API/IDF-updates/timelines-0-0-1-3-b5004ddc-0b3d-45a8-91b7-08a39eacf1d4.json.bz2
Process 4 saving 1000 timelines with output file: ../data/timelines/API/IDF-updates/timelines-0-0-4-1000-bb5d3bc4-756b-4c87-9693-95fe8aa48651.json.bz2
1076624514031800321 5 Twitter error response: status code = 401
1219400642160209927 5 Twitter error response: status code = 401
Process 4 saving 4 timelines with output file: ../data/timelines/API/I

Rate limit reached. Sleeping for: 563


1247544995860615169 5 Twitter error response: status code = 401
1273939445956444163 5 Twitter error response: status code = 401
14640579 5 Twitter error response: status code = 404
