In [1]:
# https://docs.tweepy.org/en/v3.5.0/getting_started.html

In [1]:
# Import keys from file not commited to GitHub to keep my credentials secret
from setup_api import consumerKey, consumerSecret, accessToken, accessTokenSecret

import tweepy
import datetime
import pandas as pd

In [2]:
auth = tweepy.OAuthHandler(consumerKey, consumerSecret)
auth.set_access_token(accessToken, accessTokenSecret)

api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

In [22]:
# https://stackoverflow.com/questions/49731259/tweepy-get-tweets-between-two-dates

# Function to get all tweet ID's from a given date range for a SINGLE USER
def tweets_in_dates(user, first_id, last_id):
    '''
    user - a twitter user
    first_id - tweet id of a tweet published at the start date + time we want
    last_id - tweet id of a tweet published at the end date + time we want
    '''

    tweets = []
    # Original
    # tmpTweets = tweepy.Cursor(api.search,
    #                        q = user,
    #                        since = startDateStr,
    #                        until = endDateStr,
    #                        lang = "en"
    #                        #lang = "es"
    #                        ).items()

    # FIrst attempt with IDs
    # Works but only returns 20
    # tmpTweets = api.user_timeline(
    #                         user,
    #                         since_id=first_id, # tweet 2019-10-17
    #                         max_id=last_id  # tweet 2019-11-18
    #                         )

    # COmbined?
    # tmpTweets = tweepy.Cursor(api.search,
    #                     q = user,
    #                     since_id=first_id, # tweet 2019-10-17
    #                     max_id=last_id,  # tweet 2019-11-18
    #                     #lang = "en"
    #                     #lang = "es"
    #                     ).items()

    #https://stackoverflow.com/questions/48362198/twitter-user-timeline-not-returning-enough-tweets

    tmpTweets = tweepy.Cursor(
                        api.user_timeline, 
                        user, 
                        since_id=first_id, 
                        max_id=last_id
                        ).items()

    for tweet in tmpTweets:
        tweets.append([user, tweet.created_at, tweet.id])

        # Only print once for every 100 records 
        if len(tweets) % 100 == 0:
            print(f'Tweet {len(tweets)} from {tweet.created_at}')

    return tweets

In [4]:

# Function to get all tweet ID's from a given date range for a list of users

def get_users_tweet_id_in_range(users, first_id, last_id):
    '''
    users - a list of twitter user
    first_id - tweet id of a tweet published at the start date + time we want
    last_id - tweet id of a tweet published at the end date + time we want
    '''

    res = []

    for user in users:
        
        print('-'*30)
        print(f"User: {user} Number: {users.index(user)+1} of {len(users)}")
        print('-'*30)
        print()
        # Append results of function to results
        # Extend will insure everying is in the same list
        # Append would create a list of lists
        res.extend( tweets_in_dates(user, first_id, last_id ) )

    #Convert results from list of list to DataFrame
    tweets_df = pd.DataFrame.from_records(res, columns=['User', 'TweetCreated', 'TweetId'])

    return tweets_df


In [23]:

# Cant look back further than the past 7 days.
# Can hack around this by searching twitterId's from the dates we want
# https://stackoverflow.com/questions/26205102/making-very-specific-time-requests-to-the-second-on-twitter-api-using-python

# Example Tweet IDs and Dates
#tweet from 2019-10-17 # https://twitter.com/harry_styles/status/1185187691857678337?lang=en
#tweet from 2019-11-16 # https://twitter.com/sean_o100/status/1195811463706873857
#tweet from 2021-05-21 # https://twitter.com/saund97/status/1395883494652383232

start_id = 1185187691857678337  # 2019-10-17
end_id = 1195811463706873857    # 2019-11-16
#end_id = 1395883494652383232# 2021-05-21

# Initial test on UK news sources as I can understand that language

uk_news_users = ['BBC', 'BBCWales', 'BBCEngland', 'BBCScotland', 'ITVWales']

results = get_users_tweet_id_in_range(uk_news_users, start_id, end_id)

results

------------------------------
User: BBC Number: 1 of 5
------------------------------

Tweet 100 from 2019-11-10 16:01:00
Tweet 200 from 2019-10-30 13:01:03
Tweet 300 from 2019-10-23 14:25:48
------------------------------
User: BBCWales Number: 2 of 5
------------------------------

------------------------------
User: BBCEngland Number: 3 of 5
------------------------------

------------------------------
User: BBCScotland Number: 4 of 5
------------------------------

------------------------------
User: ITVWales Number: 5 of 5
------------------------------



Unnamed: 0,User,TweetCreated,TweetId
0,BBC,2019-11-16 19:05:27,1195779936050655237
1,BBC,2019-11-16 16:59:06,1195748135580114945
2,BBC,2019-11-16 16:35:28,1195742188929474561
3,BBC,2019-11-16 15:59:02,1195733022680764416
4,BBC,2019-11-16 13:58:03,1195702575997341697
...,...,...,...
356,BBC,2019-10-18 15:13:21,1185212275629740033
357,BBC,2019-10-18 15:12:55,1185212168964399104
358,BBC,2019-10-18 15:11:35,1185211832933474310
359,BBC,2019-10-18 14:31:32,1185201751290454018


# Chilian News sources

### So far these are the sources that have been identified as potentially interesting:



El Mercurio (@ElMercurio_cl) / Twitter

La Tercera (@latercera) / Twitter

laSegunda (@La_Segunda) / Twitter

Las Últimas Noticias (@lun) / Twitter

La Cuarta (@lacuarta) / Twitter

Meganoticias (@meganoticiascl) / Twitter

Canal 13 (@canal13) / Twitter

TVN (@TVN) / Twitter

24 Horas (@24HorasTVN) / Twitter

La Nación Chile (@nacioncl) / Twitter

Diario Financiero (@DFinanciero) / Twitter

Chilevisión (@chilevision) / Twitter

UCV Radio 103.5 FM (@ucvradio) / Twitter

LaRed (@LaRedTV) / Twitter

Cooperativa (@Cooperativa) / Twitter

Radio Pudahuel (@RadioPudahuel) / Twitter

BioBioChile (@biobio) / Twitter

El Conquistador 91.3 (@FMConquistador) / Twitter

EL INFORMADORCHILE🇨🇱 (@INFORMADORCHILE) / Twitter

CNN Chile (@CNNChile) / Twitter

CHV Noticias (@CHVNoticias) / Twitter

El Mostrador (@elmostrador) / Twitter

Publimetro (@PublimetroChile) / Twitter

El Desconcierto (@eldesconcierto) / Twitter

El Dínamo (@el_dinamo) / Twitter

El Ciudadano (@El_Ciudadano) / Twitter

El Líbero (@elliberocl) / Twitter

(4) Diario El Observador (@eo_enlinea) / Twitter



In terms of the time period, if you could look between October 18, 2019 and November 18, 2019, that would be some cool gravy.




In [25]:
chile_news_users = ['ElMercurio_cl',
                    'latercera',
                    'La_Segunda',
                    'lun',
                    'lacuarta',
                    'meganoticiascl',
                    'canal13',
                    'TVN',
                    '24HorasTVN',
                    'nacioncl',
                    'DFinanciero',
                    'chilevision',
                    'ucvradio',
                    'LaRedTV',
                    'Cooperativa',
                    'RadioPudahuel',
                    'biobio',
                    'FMConquistador',
                    'INFORMADORCHILE',
                    'CNNChile',
                    'CHVNoticias',
                    'elmostrador',
                    'PublimetroChile',
                    'eldesconcierto',
                    'el_dinamo',
                    'El_Ciudadano',
                    'elliberocl',
                    'eo_enlinea']

start_id = 1185187691857678337  # 2019-10-17
end_id = 1195811463706873857    # 2019-11-16
now_id = 1395883494652383232  # 2021-05-21

# Try getting just the dates we want in 2019
results = get_users_tweet_id_in_range(chile_news_users, start_id, end_id)

results

------------------------------
User: ElMercurio_cl Number: 1 of 28
------------------------------

------------------------------
User: latercera Number: 2 of 28
------------------------------

------------------------------
User: La_Segunda Number: 3 of 28
------------------------------

------------------------------
User: lun Number: 4 of 28
------------------------------

------------------------------
User: lacuarta Number: 5 of 28
------------------------------

------------------------------
User: meganoticiascl Number: 6 of 28
------------------------------

------------------------------
User: canal13 Number: 7 of 28
------------------------------

------------------------------
User: TVN Number: 8 of 28
------------------------------

------------------------------
User: 24HorasTVN Number: 9 of 28
------------------------------

------------------------------
User: nacioncl Number: 10 of 28
------------------------------

------------------------------
User: DFinanciero Numbe

Unnamed: 0,User,TweetCreated,TweetId
0,FMConquistador,2019-11-15 22:33:00,1195469776925155328
1,FMConquistador,2019-11-15 22:15:00,1195465247139581952
2,FMConquistador,2019-11-15 22:14:00,1195464995418435587
3,FMConquistador,2019-11-15 22:02:00,1195461975687323648
4,FMConquistador,2019-11-15 21:04:00,1195447379467485184
...,...,...,...
342,FMConquistador,2019-10-18 22:05:00,1185315870408658944
343,FMConquistador,2019-10-18 22:02:00,1185315115446554626
344,FMConquistador,2019-10-18 21:33:00,1185307817290321920
345,FMConquistador,2019-10-18 21:02:00,1185300016132513792


In [26]:
results.to_csv('InitialChile2019.csv')

In [None]:
# Try and get results from now 2021 going back as far as twitter will let me
results = get_users_tweet_id_in_range(chile_news_users, start_id, now_id)
results

In [None]:
results.to_csv('InitialChile.csv')