In [1]:
import requests
import pandas as pd
from anytree import Node, RenderTree
from functions import *

In [2]:
with open('Authentication/database_uri.txt', 'r', encoding="utf8") as f:
    uri = f.read()

In [3]:
def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers

def connect_to_endpoint(url, headers, next_token = None):    
    response = requests.request("GET", url, headers = headers)
        
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

def getTweets(user_id, header):
    tweets_url = f'https://api.twitter.com/2/users/{user_id}/tweets'
    return connect_to_endpoint(tweets_url, header)

# 'conversation_id' is the identifier for the main tweet
def getConversation(conversation_id, max_results, header):
    params = 'in_reply_to_user_id,author_id,created_at,conversation_id'
    getConversation_url = f'https://api.twitter.com/2/tweets/search/recent?query=conversation_id:{conversation_id}&tweet.fields={params}&max_results={max_results}'

    return connect_to_endpoint(getConversation_url, header)

# For now we will return the time only
def getTweetInformation(conversation_id, header):
    params = 'created_at,conversation_id,in_reply_to_user_id,author_id,referenced_tweets'
    tweetInfo_url = f'https://api.twitter.com/2/tweets?tweet.fields={params}&ids={conversation_id}'
    
    result = connect_to_endpoint(tweetInfo_url, header)
    return result['data'][0]['created_at']

def getTweetComments(conversation_data):
    conversation_dict = {'id':[], 'timestamp':[], 'reply_to':[], 'tweet':[]}
    
    for i in range(len(conversation_data['data'])):
        print('User ID:', conversation_data['data'][i]['author_id'], 
              'Time:', conversation_data['data'][i]['created_at'])
        print('In reply to:', conversation_data['data'][i]['in_reply_to_user_id'])
        print(conversation_data['data'][i]['text'], '\n')
        
        conversation_dict['id'].append(conversation_data['data'][i]['author_id'])
        conversation_dict['timestamp'].append(conversation_data['data'][i]['created_at'])
        conversation_dict['reply_to'].append(conversation_data['data'][i]['in_reply_to_user_id'])
        conversation_dict['tweet'].append(conversation_data['data'][i]['text'])
        
    return conversation_dict

In [4]:
with open('Authentication/twitter_bearer_token.txt', 'r', encoding="utf8") as f:
    token = f.read()

header = create_headers(token)
max_results = 100

# The Straits Times
## Get Tweets

In [5]:
ST_id = '37874853'
ST_tweets = getTweets(ST_id, header)

In [6]:
for i in range(len(ST_tweets['data'])):
    print('Tweet ID:', ST_tweets['data'][i]['id'], 
          'Time:', getTweetInformation(ST_tweets['data'][i]['id'], header))
    print(ST_tweets['data'][i]['text'], '\n')
    
#     command = (
#             '''
#             INSERT INTO twitter_data
#             VALUES ('%s', '%s', '%s');
#             ''' % (ST_tweets['data'][i]['id'], getTweetInformation(ST_tweets['data'][i]['id'], header), 
#                    ST_tweets['data'][i]['text'])
#             )
#     setUpDB(command, uri)

Tweet ID: 1478650991931052032 Time: 2022-01-05T08:54:03.000Z
RT @STsportsdesk: MMA: Khabib Nurmagomedov to attend One Championship event in Singapore https://t.co/Vl96tCcwOC 

Tweet ID: 1478649356999741440 Time: 2022-01-05T08:47:33.000Z
COE premiums fall in most categories at end of first tender of 2022 https://t.co/uo4ORsf5qW 

Tweet ID: 1478645962801221635 Time: 2022-01-05T08:34:04.000Z
RT @STForeignDesk: Hong Kong to ban dining-in after 6pm as Omicron spreads https://t.co/7FSDCNErNM 

Tweet ID: 1478644066694750211 Time: 2022-01-05T08:26:32.000Z
RT @STsportsdesk: Football: Star striker Kane committed to Tottenham, says Conte https://t.co/qVYIcaDTlA 

Tweet ID: 1478642445311102976 Time: 2022-01-05T08:20:05.000Z
RT @STForeignDesk: UAE faces risk of inclusion on global watchlist over dirty money https://t.co/jMEDW4xw0w 

Tweet ID: 1478642438264659969 Time: 2022-01-05T08:20:03.000Z
RT @STForeignDesk: Lithuania President rebukes government over China trade spat https://t.co/aRv1GVSOIm 

T

In [7]:
# query = '@straits_times'
# tweets_url = f'https://api.twitter.com/2/tweets/search/recent?query={query}&max_results={max_results}'
# connect_to_endpoint(tweets_url, header)

## Get Conversation

In [8]:
# ST_conversation_data = getConversation('1475383966832021504', max_results, header)
ST_conversation_data = getConversation('1477849910682832897', max_results, header)

In [9]:
ST_result = getTweetComments(ST_conversation_data)

User ID: 297254562 Time: 2022-01-05T07:57:55.000Z
In reply to: 1074078021114519552
@NatheSuter @DrEricDing I’m Kind of at a loss for words with this one. 

User ID: 954325555343773697 Time: 2022-01-05T06:02:11.000Z
In reply to: 18831926
@DrEricDing Those hospitalizations are with COVID not from COVID. Fauci admitted as much 

User ID: 1191277984864505856 Time: 2022-01-05T04:23:53.000Z
In reply to: 18831926
@DrEricDing It’s frustrating to see South Africa’s data being misrepresented this way. And you’re a serial offender. 

User ID: 808713209389715457 Time: 2022-01-05T03:05:32.000Z
In reply to: 3996146667
@Ceri_turns @DrEricDing This is what I was looking for. Hope this is helpful. https://t.co/9G89ZYvj19 

User ID: 777809778 Time: 2022-01-05T00:30:21.000Z
In reply to: 18831926
@DrEricDing Staff shortages. Staff aren't off sick they are off well. 

User ID: 3949435217 Time: 2022-01-04T23:29:24.000Z
In reply to: 18831926
@DrEricDing Thank you Dr. Feigl-Ding! 

User ID: 102918486477444300

In [10]:
ST_df = pd.DataFrame.from_dict(ST_result)
ST_df['id'] = ST_df['id'].astype(str)
ST_df['reply_to'] = ST_df['reply_to'].astype(str)
ST_df.to_csv('Datasets/ST_twitter_data.csv')

ST_df.head()

Unnamed: 0,id,timestamp,reply_to,tweet
0,297254562,2022-01-05T07:57:55.000Z,1074078021114519552,@NatheSuter @DrEricDing I’m Kind of at a loss ...
1,954325555343773697,2022-01-05T06:02:11.000Z,18831926,@DrEricDing Those hospitalizations are with CO...
2,1191277984864505856,2022-01-05T04:23:53.000Z,18831926,@DrEricDing It’s frustrating to see South Afri...
3,808713209389715457,2022-01-05T03:05:32.000Z,3996146667,@Ceri_turns @DrEricDing This is what I was loo...
4,777809778,2022-01-05T00:30:21.000Z,18831926,@DrEricDing Staff shortages. Staff aren't off ...


# Channel NewsAsia
## Get Tweets

In [11]:
CNA_tweets = getTweets('38400130', header)

for i in range(len(CNA_tweets['data'])):
    print('Tweet ID:', CNA_tweets['data'][i]['id'],
          'Time:', getTweetInformation(ST_tweets['data'][i]['id'], header))
    print(CNA_tweets['data'][i]['text'], '\n')
    
#     command = (
#             '''
#             INSERT INTO twitter_data
#             VALUES ('%s', '%s', '%s');
#             ''' % (CNA_tweets['data'][i]['id'], getTweetInformation(CNA_tweets['data'][i]['id'], header), 
#                    CNA_tweets['data'][i]['text'])
#             )
#     setUpDB(command, uri)

Tweet ID: 1478650995571716098 Time: 2022-01-05T08:54:03.000Z
Poland seal semi ATP Cup spot as Hurkacz bests Schwartzman https://t.co/TT4jwaKbSe https://t.co/7Ij0vZdE74 

Tweet ID: 1478648992833572865 Time: 2022-01-05T08:47:33.000Z
Hong Kong leader announces new COVID-19 measures, including flight bans https://t.co/H3Im7MzDqQ https://t.co/m7yqeFSNae 

Tweet ID: 1478648983673135104 Time: 2022-01-05T08:34:04.000Z
Latest COE prices close mostly lower in first exercise of 2022 https://t.co/t9H822er0R https://t.co/VKdWE4cTgK 

Tweet ID: 1478646972424675328 Time: 2022-01-05T08:26:32.000Z
Germany considers more contact limits as infections jump https://t.co/bS2YTY3U2K https://t.co/Dd5eGEPlDl 

Tweet ID: 1478645477218271233 Time: 2022-01-05T08:20:05.000Z
Sri Lanka cenbank allocates US$500 million for bond repayment https://t.co/CNiK0SQLfo https://t.co/c6gMcEqLYV 

Tweet ID: 1478645468003405824 Time: 2022-01-05T08:20:03.000Z
Volvo December sales drop as chip shortage persists https://t.co/PqnCI4

## Get Conversation

In [12]:
# CNA_conversation_data = getConversation('1475390517470896129', max_results, header)
CNA_conversation_data = getConversation('1477849910682832897', max_results, header)

result = getTweetComments(CNA_conversation_data)

User ID: 297254562 Time: 2022-01-05T07:57:55.000Z
In reply to: 1074078021114519552
@NatheSuter @DrEricDing I’m Kind of at a loss for words with this one. 

User ID: 954325555343773697 Time: 2022-01-05T06:02:11.000Z
In reply to: 18831926
@DrEricDing Those hospitalizations are with COVID not from COVID. Fauci admitted as much 

User ID: 1191277984864505856 Time: 2022-01-05T04:23:53.000Z
In reply to: 18831926
@DrEricDing It’s frustrating to see South Africa’s data being misrepresented this way. And you’re a serial offender. 

User ID: 808713209389715457 Time: 2022-01-05T03:05:32.000Z
In reply to: 3996146667
@Ceri_turns @DrEricDing This is what I was looking for. Hope this is helpful. https://t.co/9G89ZYvj19 

User ID: 777809778 Time: 2022-01-05T00:30:21.000Z
In reply to: 18831926
@DrEricDing Staff shortages. Staff aren't off sick they are off well. 

User ID: 3949435217 Time: 2022-01-04T23:29:24.000Z
In reply to: 18831926
@DrEricDing Thank you Dr. Feigl-Ding! 

User ID: 102918486477444300

In [13]:
df = pd.DataFrame.from_dict(result)
df['id'] = df['id'].astype(str)
df['reply_to'] = df['reply_to'].astype(str)
df.to_csv('Datasets/sample_tweet_conversation.csv')

df.head()

Unnamed: 0,id,timestamp,reply_to,tweet
0,297254562,2022-01-05T07:57:55.000Z,1074078021114519552,@NatheSuter @DrEricDing I’m Kind of at a loss ...
1,954325555343773697,2022-01-05T06:02:11.000Z,18831926,@DrEricDing Those hospitalizations are with CO...
2,1191277984864505856,2022-01-05T04:23:53.000Z,18831926,@DrEricDing It’s frustrating to see South Afri...
3,808713209389715457,2022-01-05T03:05:32.000Z,3996146667,@Ceri_turns @DrEricDing This is what I was loo...
4,777809778,2022-01-05T00:30:21.000Z,18831926,@DrEricDing Staff shortages. Staff aren't off ...
