In [1]:
import requests
import pandas as pd
from anytree import Node, RenderTree
from functions import *

In [2]:
with open('Authentication/database_uri.txt', 'r', encoding="utf8") as f:
    uri = f.read()

In [3]:
def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers

def connect_to_endpoint(url, headers, next_token = None):    
    response = requests.request("GET", url, headers = headers)
        
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

def getTweets(user_id, header):
    tweets_url = f'https://api.twitter.com/2/users/{user_id}/tweets'
    return connect_to_endpoint(tweets_url, header)

# 'conversation_id' is the identifier for the main tweet
def getConversation(conversation_id, max_results, header):
    params = 'in_reply_to_user_id,author_id,created_at,conversation_id'
    getConversation_url = f'https://api.twitter.com/2/tweets/search/recent?query=conversation_id:{conversation_id}&tweet.fields={params}&max_results={max_results}'

    return connect_to_endpoint(getConversation_url, header)

# For now we will return the time only
def getTweetInformation(conversation_id, header):
    params = 'created_at,conversation_id,in_reply_to_user_id,author_id,referenced_tweets'
    tweetInfo_url = f'https://api.twitter.com/2/tweets?tweet.fields={params}&ids={conversation_id}'
    
    result = connect_to_endpoint(tweetInfo_url, header)
    return result['data'][0]['created_at']

def getTweetComments(conversation_data):
    conversation_dict = {'id':[], 'timestamp':[], 'reply_to':[], 'tweet':[]}
    
    for i in range(len(conversation_data['data'])):
        print('User ID:', conversation_data['data'][i]['author_id'], 
              'Time:', conversation_data['data'][i]['created_at'])
        print('In reply to:', conversation_data['data'][i]['in_reply_to_user_id'])
        print(conversation_data['data'][i]['text'], '\n')
        
        conversation_dict['id'].append(conversation_data['data'][i]['author_id'])
        conversation_dict['timestamp'].append(conversation_data['data'][i]['created_at'])
        conversation_dict['reply_to'].append(conversation_data['data'][i]['in_reply_to_user_id'])
        conversation_dict['tweet'].append(conversation_data['data'][i]['text'])
        
    return conversation_dict

In [4]:
with open('Authentication/twitter_bearer_token.txt', 'r', encoding="utf8") as f:
    token = f.read()

header = create_headers(token)
max_results = 100

# The Straits Times
## Get Tweets

In [5]:
ST_id = '37874853'
ST_tweets = getTweets(ST_id, header)

In [6]:
for i in range(len(ST_tweets['data'])):
    print('Tweet ID:', ST_tweets['data'][i]['id'], 
          'Time:', getTweetInformation(ST_tweets['data'][i]['id'], header))
    print(ST_tweets['data'][i]['text'], '\n')
    
#     command = (
#             '''
#             INSERT INTO twitter_data
#             VALUES ('%s', '%s', '%s');
#             ''' % (ST_tweets['data'][i]['id'], getTweetInformation(ST_tweets['data'][i]['id'], header), 
#                    ST_tweets['data'][i]['text'])
#             )
#     setUpDB(command, uri)

Tweet ID: 1481205204531908608 Time: 2022-01-12T10:03:34.000Z
RT @ST_LifeTweets: BTOB's Minhyuk shows off chiselled abs in series of posts https://t.co/TI3d1Tnj9T 

Tweet ID: 1481205197724520449 Time: 2022-01-12T10:03:33.000Z
RT @ST_LifeTweets: Actress Zoe Tay turns 54 with cake and many 'likes' https://t.co/5R6nyXMc6i 

Tweet ID: 1481202559192756225 Time: 2022-01-12T09:53:04.000Z
More streamlined and efficient IP registration process with changes to the law https://t.co/2JQuZ7rmlD 

Tweet ID: 1481202553790423042 Time: 2022-01-12T09:53:02.000Z
KFC charged over allowing diners into Far East Plaza outlet without checking for Covid-19 symptoms https://t.co/cRFn5a3mar 

Tweet ID: 1481201171138183173 Time: 2022-01-12T09:47:33.000Z
RT @STForeignDesk: Crisis in Australia as Covid-19 cases explode, questions mount about preparedness https://t.co/1INJGRozi5 

Tweet ID: 1481197403499687938 Time: 2022-01-12T09:32:34.000Z
RT @STForeignDesk: China blasts KFC promotion that saw diner buying 106 meals

In [7]:
# query = '@straits_times'
# tweets_url = f'https://api.twitter.com/2/tweets/search/recent?query={query}&max_results={max_results}'
# connect_to_endpoint(tweets_url, header)

## Get Conversation

In [8]:
# ST_conversation_data = getConversation('1475383966832021504', max_results, header)
ST_conversation_data = getConversation('1480409339080810496', max_results, header)

In [9]:
ST_result = getTweetComments(ST_conversation_data)

User ID: 1097030280248942592 Time: 2022-01-11T07:22:48.000Z
In reply to: 1448184310599589888
@RM17073919 @ChannelNewsAsia Who suppose to go around and report news? LAZY or HANDS/MOUTH TIGHT. LOL. 

User ID: 1448184310599589888 Time: 2022-01-11T06:31:17.000Z
In reply to: 1448184310599589888
@Ahmad69138914 @ChannelNewsAsia Ask around who ? Who is reliable ? Need to be evidence based . 

User ID: 1097030280248942592 Time: 2022-01-11T02:18:18.000Z
In reply to: 1448184310599589888
@RM17073919 @ChannelNewsAsia Why scared of Covid? 99.98% will survive. You should worry about vaccine. Now more issues coming up after getting it. Ask around and you will hear many strange stories. 

User ID: 1448184310599589888 Time: 2022-01-10T17:13:10.000Z
In reply to: 716080004333842436
@smoky_1671 @ChannelNewsAsia Thanks smoky 

User ID: 17081024 Time: 2022-01-10T14:48:13.000Z
In reply to: 38400130
@ChannelNewsAsia 30%  So high! No wonder some doubt if #vaccines work? 
@PGeorge9601 @LawrenceWongST 

User ID: 

In [10]:
ST_df = pd.DataFrame.from_dict(ST_result)
ST_df['id'] = ST_df['id'].astype(str)
ST_df['reply_to'] = ST_df['reply_to'].astype(str)
ST_df.to_csv('Datasets/ST_twitter_data.csv')

ST_df.head()

Unnamed: 0,id,timestamp,reply_to,tweet
0,1097030280248942592,2022-01-11T07:22:48.000Z,1448184310599589888,@RM17073919 @ChannelNewsAsia Who suppose to go...
1,1448184310599589888,2022-01-11T06:31:17.000Z,1448184310599589888,@Ahmad69138914 @ChannelNewsAsia Ask around who...
2,1097030280248942592,2022-01-11T02:18:18.000Z,1448184310599589888,@RM17073919 @ChannelNewsAsia Why scared of Cov...
3,1448184310599589888,2022-01-10T17:13:10.000Z,716080004333842436,@smoky_1671 @ChannelNewsAsia Thanks smoky
4,17081024,2022-01-10T14:48:13.000Z,38400130,@ChannelNewsAsia 30% So high! No wonder some ...


# Channel NewsAsia
## Get Tweets

In [11]:
CNA_tweets = getTweets('38400130', header)

for i in range(len(CNA_tweets['data'])):
    print('Tweet ID:', CNA_tweets['data'][i]['id'],
          'Time:', getTweetInformation(ST_tweets['data'][i]['id'], header))
    print(CNA_tweets['data'][i]['text'], '\n')
    
#     command = (
#             '''
#             INSERT INTO twitter_data
#             VALUES ('%s', '%s', '%s');
#             ''' % (CNA_tweets['data'][i]['id'], getTweetInformation(CNA_tweets['data'][i]['id'], header), 
#                    CNA_tweets['data'][i]['text'])
#             )
#     setUpDB(command, uri)

Tweet ID: 1481205219039997953 Time: 2022-01-12T10:03:34.000Z
DHL opens Middle East's largest robotic sorting centre in Israel https://t.co/61qLxbxPsl https://t.co/Ja3rAc67ll 

Tweet ID: 1481205208952623108 Time: 2022-01-12T10:03:33.000Z
German watchdog says Google agrees to remove Showcase from general searches https://t.co/dMvp7pJWmn https://t.co/OfM2TVXgQw 

Tweet ID: 1481205199687483392 Time: 2022-01-12T09:53:04.000Z
China steps up construction along disputed Bhutan border, satellite images show https://t.co/Fy4aPV9LG2 https://t.co/KJzfSMpXc7 

Tweet ID: 1481203343930187776 Time: 2022-01-12T09:53:02.000Z
Exclusive-Morgan Stanley to award bonus rises of over 20per cent on Thursday to top performers -sources https://t.co/6Z6I7Oarpq https://t.co/IgHSMiZpBt 

Tweet ID: 1481203332580401154 Time: 2022-01-12T09:47:33.000Z
Indonesia waiting for utility's green light before resuming coal exports-minister https://t.co/dWx1kqt6k1 https://t.co/QTukfVyWHI 

Tweet ID: 1481203321427746816 Time: 20

## Get Conversation

In [12]:
# CNA_conversation_data = getConversation('1475390517470896129', max_results, header)
CNA_conversation_data = getConversation('1477849910682832897', max_results, header)

result = getTweetComments(CNA_conversation_data)

User ID: 3383273115 Time: 2022-01-10T17:12:53.000Z
In reply to: 18831926
@DrEricDing It would be very interesting &amp; eye-opening for you to Subscribe to https://t.co/0eylrPkr4A to get their Free Newsletter. 

User ID: 3383273115 Time: 2022-01-10T17:08:21.000Z
In reply to: 18831926
@DrEricDing https://t.co/KZOnH4zEBQ 

User ID: 799291649780252672 Time: 2022-01-09T12:08:08.000Z
In reply to: 18831926
@DrEricDing @GovRonDeSantis 

User ID: 799291649780252672 Time: 2022-01-09T12:07:30.000Z
In reply to: 18831926
@DrEricDing @chicagosmayor @NYCMayor 

User ID: 3609395232 Time: 2022-01-06T19:11:34.000Z
In reply to: 3609395232
@DrEricDing But that does not change the facts that it is MORE MILD. Lying to the people won't solve anything. Saying that the infections are higher and it's causing a surge in the hospitals is necessary but don't lie about the severity of the illness. 

User ID: 3609395232 Time: 2022-01-06T19:10:01.000Z
In reply to: 18831926
@DrEricDing But it is more mild. People are

In [13]:
df = pd.DataFrame.from_dict(result)
df['id'] = df['id'].astype(str)
df['reply_to'] = df['reply_to'].astype(str)
df.to_csv('Datasets/sample_tweet_conversation.csv')

df.head()

Unnamed: 0,id,timestamp,reply_to,tweet
0,3383273115,2022-01-10T17:12:53.000Z,18831926,@DrEricDing It would be very interesting &amp;...
1,3383273115,2022-01-10T17:08:21.000Z,18831926,@DrEricDing https://t.co/KZOnH4zEBQ
2,799291649780252672,2022-01-09T12:08:08.000Z,18831926,@DrEricDing @GovRonDeSantis
3,799291649780252672,2022-01-09T12:07:30.000Z,18831926,@DrEricDing @chicagosmayor @NYCMayor
4,3609395232,2022-01-06T19:11:34.000Z,3609395232,@DrEricDing But that does not change the facts...
