In [1]:
# https://docs.tweepy.org/en/v3.5.0/getting_started.html

In [1]:
# Import keys from file not commited to GitHub to keep my credentials secret
from setup_api import consumerKey, consumerSecret, accessToken, accessTokenSecret

import tweepy
import datetime
import pandas as pd

In [2]:
auth = tweepy.OAuthHandler(consumerKey, consumerSecret)
auth.set_access_token(accessToken, accessTokenSecret)

api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

In [16]:
# https://stackoverflow.com/questions/49731259/tweepy-get-tweets-between-two-dates

# Function to get all tweet ID's from a given date range for a SINGLE USER
def tweets_in_dates(user, first_id, last_id):
    '''
    user - a twitter user
    first_id - tweet id of a tweet published at the start date + time we want
    last_id - tweet id of a tweet published at the end date + time we want
    '''

    tweets = []
    # Original
    # tmpTweets = tweepy.Cursor(api.search,
    #                        q = user,
    #                        since = startDateStr,
    #                        until = endDateStr,
    #                        lang = "en"
    #                        #lang = "es"
    #                        ).items()

    # FIrst attempt with IDs
    # Works but only returns 20
    # tmpTweets = api.user_timeline(
    #                         user,
    #                         since_id=first_id, # tweet 2019-10-17
    #                         max_id=last_id  # tweet 2019-11-18
    #                         )

    # COmbined?
    tmpTweets = tweepy.Cursor(api.search,
                        q = user,
                        since_id=first_id, # tweet 2019-10-17
                        max_id=last_id,  # tweet 2019-11-18
                        #lang = "en"
                        #lang = "es"
                        ).items()

    #https://stackoverflow.com/questions/48362198/twitter-user-timeline-not-returning-enough-tweets

    tmpTweets = tweepy.Cursor(
                        api.user_timeline, 
                        user, 
                        since_id=first_id, 
                        max_id=last_id
                        ).items()

    for tweet in tmpTweets:
        tweets.append([user, tweet.created_at, tweet.id])

        # Only print once for every 100 records 
        if len(tweets) % 100 == 0:
            print(f'Tweet {len(tweets)} from {tweet.created_at}')

    return tweets

In [8]:

# Function to get all tweet ID's from a given date range for a list of users

def get_users_tweet_id_in_range(users, first_id, last_id):
    '''
    users - a list of twitter user
    first_id - tweet id of a tweet published at the start date + time we want
    last_id - tweet id of a tweet published at the end date + time we want
    '''

    res = []

    for user in users:
        
        print('-'*30)
        print(f"User: {user} Number: {users.index(user)+1} of {len(users)}")
        print('-'*30)
        print()
        # Append results of function to results
        # Extend will insure everying is in the same list
        # Append would create a list of lists
        res.extend( tweets_in_dates(user, first_id, last_id ) )

    #Convert results from list of list to DataFrame
    tweets_df = pd.DataFrame.from_records(res, columns=['User', 'TweetCreated', 'TweetId'])

    return tweets_df


In [12]:
uk_news_users = ['BBC', 'BBCWales', 'BBCEngland', 'BBCScotland', 'ITVWales']

# Cant look back further than the past 7 days.
# Can hack around this by searching twitterId's from the dates we want
# https://stackoverflow.com/questions/26205102/making-very-specific-time-requests-to-the-second-on-twitter-api-using-python

#tweet from 2019-10-17 # https://twitter.com/harry_styles/status/1185187691857678337?lang=en
#tweet from 2019-11-16 # https://twitter.com/sean_o100/status/1195811463706873857
#tweet from 2021-05-21 # https://twitter.com/saund97/status/1395883494652383232

start_id = 1185187691857678337  # 2019-10-17
end_id = 1195811463706873857    # 2019-11-16
#end_id = 1395883494652383232# 2021-05-21

results = get_users_tweet_id_in_range(uk_news_users, start_id, end_id)

results

------------------------------
User: BBC Number: 1 of 5
------------------------------

Returned 1
Tweet from 2019-11-16 19:05:27
Returned 2
Tweet from 2019-11-16 16:59:06
Returned 3
Tweet from 2019-11-16 16:35:28
Returned 4
Tweet from 2019-11-16 15:59:02
Returned 5
Tweet from 2019-11-16 13:58:03
Returned 6
Tweet from 2019-11-16 13:30:00
Returned 7
Tweet from 2019-11-16 11:58:03
Returned 8
Tweet from 2019-11-16 11:01:00
Returned 9
Tweet from 2019-11-16 01:33:09
Returned 10
Tweet from 2019-11-16 01:32:53
Returned 11
Tweet from 2019-11-16 01:24:06
Returned 12
Tweet from 2019-11-16 00:41:00
Returned 13
Tweet from 2019-11-16 00:31:09
Returned 14
Tweet from 2019-11-15 23:39:54
Returned 15
Tweet from 2019-11-15 23:15:30
Returned 16
Tweet from 2019-11-15 23:09:09
Returned 17
Tweet from 2019-11-15 23:05:52
Returned 18
Tweet from 2019-11-15 22:39:40
Returned 19
Tweet from 2019-11-15 22:36:10
Returned 20
Tweet from 2019-11-15 22:28:50
Returned 21
Tweet from 2019-11-15 22:19:28
Returned 22
Tweet 

Unnamed: 0,User,TweetCreated,TweetId
0,BBC,2019-11-16 19:05:27,1195779936050655237
1,BBC,2019-11-16 16:59:06,1195748135580114945
2,BBC,2019-11-16 16:35:28,1195742188929474561
3,BBC,2019-11-16 15:59:02,1195733022680764416
4,BBC,2019-11-16 13:58:03,1195702575997341697
...,...,...,...
356,BBC,2019-10-18 15:13:21,1185212275629740033
357,BBC,2019-10-18 15:12:55,1185212168964399104
358,BBC,2019-10-18 15:11:35,1185211832933474310
359,BBC,2019-10-18 14:31:32,1185201751290454018


In [13]:
chile_news_users = ['ElMercurio_cl',
                    'latercera',
                    'La_Segunda',
                    'lun',
                    'lacuarta',
                    'meganoticiascl',
                    'canal13',
                    'TVN',
                    '24HorasTVN',
                    'nacioncl',
                    'DFinanciero',
                    'chilevision',
                    'ucvradio',
                    'LaRedTV',
                    'Cooperativa',
                    'RadioPudahuel',
                    'biobio',
                    'FMConquistador',
                    'INFORMADORCHILE',
                    'CNNChile',
                    'CHVNoticias',
                    'elmostrador',
                    'PublimetroChile',
                    'eldesconcierto',
                    'el_dinamo',
                    'El_Ciudadano',
                    'elliberocl',
                    'eo_enlinea']

start_id = 1185187691857678337  # 2019-10-17
end_id = 1195811463706873857    # 2019-11-16
now_id = 1395883494652383232# 2021-05-21

results = get_users_tweet_id_in_range(chile_news_users, start_id, now_id)

results

ned 2360
Tweet from 2020-11-17 15:13:57
Returned 2361
Tweet from 2020-11-17 15:04:23
Returned 2362
Tweet from 2020-11-17 14:05:57
Returned 2363
Tweet from 2020-11-17 13:37:52
Returned 2364
Tweet from 2020-11-17 13:15:51
Returned 2365
Tweet from 2020-11-17 08:00:01
Returned 2366
Tweet from 2020-11-17 08:00:00
Returned 2367
Tweet from 2020-11-17 01:21:20
Returned 2368
Tweet from 2020-11-17 00:41:18
Returned 2369
Tweet from 2020-11-17 00:38:14
Returned 2370
Tweet from 2020-11-17 00:00:29
Returned 2371
Tweet from 2020-11-16 23:36:14
Returned 2372
Tweet from 2020-11-16 21:09:42
Returned 2373
Tweet from 2020-11-16 20:41:11
Returned 2374
Tweet from 2020-11-16 20:27:00
Returned 2375
Tweet from 2020-11-16 18:57:23
Returned 2376
Tweet from 2020-11-16 18:27:18
Returned 2377
Tweet from 2020-11-16 18:08:35
Returned 2378
Tweet from 2020-11-16 16:42:58
Returned 2379
Tweet from 2020-11-16 16:42:02
Returned 2380
Tweet from 2020-11-16 16:26:49
Returned 2381
Tweet from 2020-11-16 15:30:08
Returned 2382
T

Unnamed: 0,User,TweetCreated,TweetId
0,ElMercurio_cl,2021-05-21 23:00:01,1395877067804266499
1,ElMercurio_cl,2021-05-21 22:30:00,1395869512205930497
2,ElMercurio_cl,2021-05-21 22:00:01,1395861965906472962
3,ElMercurio_cl,2021-05-21 21:30:00,1395854413634355200
4,ElMercurio_cl,2021-05-21 21:00:00,1395846863979532291
...,...,...,...
89949,eo_enlinea,2020-09-15 13:59:02,1305868733290893313
89950,eo_enlinea,2020-09-15 13:24:50,1305860125547257856
89951,eo_enlinea,2020-09-15 13:10:29,1305856513580425222
89952,eo_enlinea,2020-09-15 08:00:01,1305778382064549890


In [14]:
results.shape

(89954, 3)

In [15]:
results.to_csv('InitialChile.csv')

# Chilian News sources

### So far these are the sources that have been identified as potentially interesting:



El Mercurio (@ElMercurio_cl) / Twitter

La Tercera (@latercera) / Twitter

laSegunda (@La_Segunda) / Twitter

Las Últimas Noticias (@lun) / Twitter

La Cuarta (@lacuarta) / Twitter

Meganoticias (@meganoticiascl) / Twitter

Canal 13 (@canal13) / Twitter

TVN (@TVN) / Twitter

24 Horas (@24HorasTVN) / Twitter

La Nación Chile (@nacioncl) / Twitter

Diario Financiero (@DFinanciero) / Twitter

Chilevisión (@chilevision) / Twitter

UCV Radio 103.5 FM (@ucvradio) / Twitter

LaRed (@LaRedTV) / Twitter

Cooperativa (@Cooperativa) / Twitter

Radio Pudahuel (@RadioPudahuel) / Twitter

BioBioChile (@biobio) / Twitter

El Conquistador 91.3 (@FMConquistador) / Twitter

EL INFORMADORCHILE🇨🇱 (@INFORMADORCHILE) / Twitter

CNN Chile (@CNNChile) / Twitter

CHV Noticias (@CHVNoticias) / Twitter

El Mostrador (@elmostrador) / Twitter

Publimetro (@PublimetroChile) / Twitter

El Desconcierto (@eldesconcierto) / Twitter

El Dínamo (@el_dinamo) / Twitter

El Ciudadano (@El_Ciudadano) / Twitter

El Líbero (@elliberocl) / Twitter

(4) Diario El Observador (@eo_enlinea) / Twitter



In terms of the time period, if you could look between October 18, 2019 and November 18, 2019, that would be some cool gravy.




In [23]:
chile_news_users = [#'ElMercurio_cl',
                    'latercera',
                    'La_Segunda',
                    'lun',
                    'lacuarta',
                    'meganoticiascl',
                    'canal13',
                    'TVN',
                    '24HorasTVN',
                    'nacioncl',
                    'DFinanciero',
                    'chilevision',
                    'ucvradio',
                    'LaRedTV',
                    'Cooperativa',
                    'RadioPudahuel',
                    'biobio',
                    'FMConquistador',
                    'INFORMADORCHILE',
                    'CNNChile',
                    'CHVNoticias',
                    'elmostrador',
                    'PublimetroChile',
                    'eldesconcierto',
                    'el_dinamo',
                    'El_Ciudadano',
                    'elliberocl',
                    'eo_enlinea']

#start_date = '2019-10-18'
#end_date = '2019-11-18'
start_date = '2020-05-10'
end_date = '2020-05-15' # As a test lets just run 1 day

results = get_users_tweet_id_in_range(chile_news_users, start_date, end_date)

results

------------------------------
User: latercera Number: 1 of 27
------------------------------



RateLimitError: [{'message': 'Rate limit exceeded', 'code': 88}]

In [None]:
results = get_users_tweet_id_in_range(uk_news_users, '2021-05-20', '2021-05-22')

In [None]:
results.to_csv('twitterIDs.csv')