In [1]:
# https://docs.tweepy.org/en/v3.5.0/getting_started.html

In [2]:
# Import keys from file not commited to GitHub to keep my credentials secret
from setup_api import consumerKey, consumerSecret, accessToken, accessTokenSecret

import tweepy
import datetime
import pandas as pd

In [3]:
auth = tweepy.OAuthHandler(consumerKey, consumerSecret)
auth.set_access_token(accessToken, accessTokenSecret)

api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

In [21]:
# https://stackoverflow.com/questions/49731259/tweepy-get-tweets-between-two-dates

# Function to get all tweet ID's from a given date range for a SINGLE USER
def tweets_in_dates(user, startDateStr, endDateStr):
    '''
    user - a twitter user
    startDate - earlist tweet we want in format 'YYYY-MM-DD'
    endDate - most re ent tweet we want in format 'YYYY-MM-DD'
    '''

    # Convert date strings to datetime objects
    startDate = datetime.datetime.strptime(startDateStr, '%Y-%m-%d')
    endDate = datetime.datetime.strptime(endDateStr, '%Y-%m-%d')

    tweets = []
    # tmpTweets = tweepy.Cursor(api.search,
    #                        q = user,
    #                        since = startDateStr,
    #                        until = endDateStr,
    #                        lang = "en"
    #                        #lang = "es"
    #                        ).items()

    tmpTweets = api.user_timeline(
                            username,
                            since_id=1185187691857678337, # tweet 2019-10-17
                            max_id=1195811463706873857  # tweet 2019-11-18
                            )

    for tweet in tmpTweets:
        tweets.append([user, tweet.created_at, tweet.id])
        print(f'Returning {len(tweets)} so far')
        print(f'Tweet from {tweet.created_at}')

    return tweets

In [24]:

# Function to get all tweet ID's from a given date range for a list of users

def get_users_tweet_id_in_range(users, startDate, endDate):
    '''
    users - a list of twitter user
    startDate - earlist tweet we want in format 'YYYY-MM-DD'
    endDate - most re ent tweet we want in format 'YYYY-MM-DD'
    '''

    res = []

    for user in users:
        
        print('-'*30)
        print(f"User: {user} Number: {users.index(user)+1} of {len(users)}")
        print('-'*30)
        print()
        # Append results of function to results
        # Extend will insure everying is in the same list
        # Append would create a list of lists
        res.extend( tweets_in_dates(user, startDate, endDate ) )

    #Convert results from list of list to DataFrame
    tweets_df = pd.DataFrame.from_records(res, columns=['User', 'TweetCreated', 'TweetId'])

    return tweets_df


In [25]:
uk_news_users = ['BBC', 'BBCWales', 'BBCEngland', 'BBCScotland', 'ITVWales']
#uk_news_users = ['BBCWales']

results = get_users_tweet_id_in_range(uk_news_users, '2021-05-20', '2021-05-22')

results

------------------------------
User: BBC Number: 1 of 5
------------------------------

Returning 1 so far
Tweet from 2019-11-16 19:05:27
Returning 2 so far
Tweet from 2019-11-16 16:59:06
Returning 3 so far
Tweet from 2019-11-16 16:35:28
Returning 4 so far
Tweet from 2019-11-16 15:59:02
Returning 5 so far
Tweet from 2019-11-16 13:58:03
Returning 6 so far
Tweet from 2019-11-16 13:30:00
Returning 7 so far
Tweet from 2019-11-16 11:58:03
Returning 8 so far
Tweet from 2019-11-16 11:01:00
Returning 9 so far
Tweet from 2019-11-16 01:33:09
Returning 10 so far
Tweet from 2019-11-16 01:32:53
Returning 11 so far
Tweet from 2019-11-16 01:24:06
Returning 12 so far
Tweet from 2019-11-16 00:41:00
Returning 13 so far
Tweet from 2019-11-16 00:31:09
Returning 14 so far
Tweet from 2019-11-15 23:39:54
Returning 15 so far
Tweet from 2019-11-15 23:15:30
Returning 16 so far
Tweet from 2019-11-15 23:09:09
Returning 17 so far
Tweet from 2019-11-15 23:05:52
Returning 18 so far
Tweet from 2019-11-15 22:39:40
Ret

Unnamed: 0,User,TweetCreated,TweetId
0,BBC,2019-11-16 19:05:27,1195779936050655237
1,BBC,2019-11-16 16:59:06,1195748135580114945
2,BBC,2019-11-16 16:35:28,1195742188929474561
3,BBC,2019-11-16 15:59:02,1195733022680764416
4,BBC,2019-11-16 13:58:03,1195702575997341697
...,...,...,...
95,ITVWales,2019-11-15 23:09:09,1195478877545648128
96,ITVWales,2019-11-15 23:05:52,1195478047912271872
97,ITVWales,2019-11-15 22:39:40,1195471454399291394
98,ITVWales,2019-11-15 22:36:10,1195470576279646208


In [20]:
chile_news_users = ['ElMercurio_cl',
                    'latercera',
                    'La_Segunda',
                    'lun',
                    'lacuarta',
                    'meganoticiascl',
                    'canal13',
                    'TVN',
                    '24HorasTVN',
                    'nacioncl',
                    'DFinanciero',
                    'chilevision',
                    'ucvradio',
                    'LaRedTV',
                    'Cooperativa',
                    'RadioPudahuel',
                    'biobio',
                    'FMConquistador',
                    'INFORMADORCHILE',
                    'CNNChile',
                    'CHVNoticias',
                    'elmostrador',
                    'PublimetroChile',
                    'eldesconcierto',
                    'el_dinamo',
                    'El_Ciudadano',
                    'elliberocl',
                    'eo_enlinea']


results = get_users_tweet_id_in_range(chile_news_users, '2020-12-20', '2020-12-20')

results

0 so far
Tweet from 2019-11-15 22:28:50
------------------------------
User: DFinanciero Number: 11 of 28
------------------------------

Returning 1 so far
Tweet from 2019-11-16 19:05:27
Returning 2 so far
Tweet from 2019-11-16 16:59:06
Returning 3 so far
Tweet from 2019-11-16 16:35:28
Returning 4 so far
Tweet from 2019-11-16 15:59:02
Returning 5 so far
Tweet from 2019-11-16 13:58:03
Returning 6 so far
Tweet from 2019-11-16 13:30:00
Returning 7 so far
Tweet from 2019-11-16 11:58:03
Returning 8 so far
Tweet from 2019-11-16 11:01:00
Returning 9 so far
Tweet from 2019-11-16 01:33:09
Returning 10 so far
Tweet from 2019-11-16 01:32:53
Returning 11 so far
Tweet from 2019-11-16 01:24:06
Returning 12 so far
Tweet from 2019-11-16 00:41:00
Returning 13 so far
Tweet from 2019-11-16 00:31:09
Returning 14 so far
Tweet from 2019-11-15 23:39:54
Returning 15 so far
Tweet from 2019-11-15 23:15:30
Returning 16 so far
Tweet from 2019-11-15 23:09:09
Returning 17 so far
Tweet from 2019-11-15 23:05:52
Retu

Unnamed: 0,User,TweetId
0,ElMercurio_cl,1195779936050655237
1,ElMercurio_cl,1195748135580114945
2,ElMercurio_cl,1195742188929474561
3,ElMercurio_cl,1195733022680764416
4,ElMercurio_cl,1195702575997341697
...,...,...
555,eo_enlinea,1195478877545648128
556,eo_enlinea,1195478047912271872
557,eo_enlinea,1195471454399291394
558,eo_enlinea,1195470576279646208


In [10]:
# Cant look back further than the past 7 days.
# Can hack around this by searching twitterId's from the dates we want
# https://stackoverflow.com/questions/26205102/making-very-specific-time-requests-to-the-second-on-twitter-api-using-python

#tweet from 2019-10-17 # https://twitter.com/harry_styles/status/1185187691857678337?lang=en
#tweet from 2019-11-16 # https://twitter.com/sean_o100/status/1195811463706873857
#tweet from 2021-05-21 # https://twitter.com/saund97/status/1395883494652383232

results = api.search(q="Chile", since_id=1185187691857678337, max_id=1395883494652383232)

for result in results:
    print(result.text)


RT @Hugo_Gutierrez_: Arturo Prat es a la @Armada_Chile, lo que Salvador Allende es al @PSChile y ninguno es digno de ellos! https://t.co/EC…
@MarajToliver time for me to do my detective work chile…
RT @Francis25830521: Plantando arbolitos. Yo no me voy de Chile, nos quedaremos para batallar.

#ChileNoCaerá https://t.co/oAm3P86E8M
RT @Hugo_Gutierrez_: Arturo Prat es a la @Armada_Chile, lo que Salvador Allende es al @PSChile y ninguno es digno de ellos! https://t.co/EC…
@augusto_cesar49 @patriotagd Excelente vox comenzó  así y vimos la pateadura que le dieron a iglesias en madrid, es… https://t.co/aPIWG9Cvbx
RT @rossanablanco: @PDI_CHILE https://t.co/zkuv2bSJ3Y
RT @gaaatoso: ¡las ciencias al servicio de los pueblos de Chile!
RT @FontirroigPablo: ❓"¿Que pasó?" "¡Chile se fue a la p...!"
❓"¡Eran los mejores de Latinoamérica!"
❓"Mis parientes trabajaban en minería…
RT @Akkofemm: Y le habrán contado a esta escombro que las 10 familias ya no tienen ni un 20% de su patrimonio en Chile?
https:/

In [11]:
len(results)

15

In [14]:
username = 'BBC'
startDate = datetime.datetime(2019, 10, 18, 0, 0, 0)
endDate =   datetime.datetime(2019, 11, 18, 0, 0, 0)

tweets = []
tmpTweets = api.user_timeline(
                            username,
                            since_id=1185187691857678337,
                            max_id=1195811463706873857
                            )

for tweet in tmpTweets:
    if tweet.created_at < endDate and tweet.created_at > startDate:
        tweets.append(tweet)
        print(tweet.created_at, tweet.id)

2019-11-16 19:05:27 1195779936050655237
2019-11-16 16:59:06 1195748135580114945
2019-11-16 16:35:28 1195742188929474561
2019-11-16 15:59:02 1195733022680764416
2019-11-16 13:58:03 1195702575997341697
2019-11-16 13:30:00 1195695514467086337
2019-11-16 11:58:03 1195672374844239872
2019-11-16 11:01:00 1195658017552969734
2019-11-16 01:33:09 1195515114788704256
2019-11-16 01:32:53 1195515045989552130
2019-11-16 01:24:06 1195512837415854080
2019-11-16 00:41:00 1195501989179875328
2019-11-16 00:31:09 1195499511952629760
2019-11-15 23:39:54 1195486612349751296
2019-11-15 23:15:30 1195480474661085184
2019-11-15 23:09:09 1195478877545648128
2019-11-15 23:05:52 1195478047912271872
2019-11-15 22:39:40 1195471454399291394
2019-11-15 22:36:10 1195470576279646208
2019-11-15 22:28:50 1195468730832183297


# Chilian News sources

### So far these are the sources that have been identified as potentially interesting:



El Mercurio (@ElMercurio_cl) / Twitter

La Tercera (@latercera) / Twitter

laSegunda (@La_Segunda) / Twitter

Las Últimas Noticias (@lun) / Twitter

La Cuarta (@lacuarta) / Twitter

Meganoticias (@meganoticiascl) / Twitter

Canal 13 (@canal13) / Twitter

TVN (@TVN) / Twitter

24 Horas (@24HorasTVN) / Twitter

La Nación Chile (@nacioncl) / Twitter

Diario Financiero (@DFinanciero) / Twitter

Chilevisión (@chilevision) / Twitter

UCV Radio 103.5 FM (@ucvradio) / Twitter

LaRed (@LaRedTV) / Twitter

Cooperativa (@Cooperativa) / Twitter

Radio Pudahuel (@RadioPudahuel) / Twitter

BioBioChile (@biobio) / Twitter

El Conquistador 91.3 (@FMConquistador) / Twitter

EL INFORMADORCHILE🇨🇱 (@INFORMADORCHILE) / Twitter

CNN Chile (@CNNChile) / Twitter

CHV Noticias (@CHVNoticias) / Twitter

El Mostrador (@elmostrador) / Twitter

Publimetro (@PublimetroChile) / Twitter

El Desconcierto (@eldesconcierto) / Twitter

El Dínamo (@el_dinamo) / Twitter

El Ciudadano (@El_Ciudadano) / Twitter

El Líbero (@elliberocl) / Twitter

(4) Diario El Observador (@eo_enlinea) / Twitter



In terms of the time period, if you could look between October 18, 2019 and November 18, 2019, that would be some cool gravy.




In [23]:
chile_news_users = [#'ElMercurio_cl',
                    'latercera',
                    'La_Segunda',
                    'lun',
                    'lacuarta',
                    'meganoticiascl',
                    'canal13',
                    'TVN',
                    '24HorasTVN',
                    'nacioncl',
                    'DFinanciero',
                    'chilevision',
                    'ucvradio',
                    'LaRedTV',
                    'Cooperativa',
                    'RadioPudahuel',
                    'biobio',
                    'FMConquistador',
                    'INFORMADORCHILE',
                    'CNNChile',
                    'CHVNoticias',
                    'elmostrador',
                    'PublimetroChile',
                    'eldesconcierto',
                    'el_dinamo',
                    'El_Ciudadano',
                    'elliberocl',
                    'eo_enlinea']

#start_date = '2019-10-18'
#end_date = '2019-11-18'
start_date = '2020-05-10'
end_date = '2020-05-15' # As a test lets just run 1 day

results = get_users_tweet_id_in_range(chile_news_users, start_date, end_date)

results

------------------------------
User: latercera Number: 1 of 27
------------------------------



RateLimitError: [{'message': 'Rate limit exceeded', 'code': 88}]

In [None]:
results = get_users_tweet_id_in_range(uk_news_users, '2021-05-20', '2021-05-22')

In [None]:
results.to_csv('twitterIDs.csv')