In [1]:
import requests
import pandas as pd
from anytree import Node, RenderTree
from functions import *

In [2]:
with open('Authentication/database_uri.txt', 'r', encoding="utf8") as f:
    uri = f.read()

In [3]:
def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers

def connect_to_endpoint(url, headers, next_token = None):    
    response = requests.request("GET", url, headers = headers)
        
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

def getTweets(user_id, header):
    tweets_url = f'https://api.twitter.com/2/users/{user_id}/tweets'
    return connect_to_endpoint(tweets_url, header)

# 'conversation_id' is the identifier for the main tweet
def getConversation(conversation_id, max_results, header):
    params = 'in_reply_to_user_id,author_id,created_at,conversation_id'
    getConversation_url = f'https://api.twitter.com/2/tweets/search/recent?query=conversation_id:{conversation_id}&tweet.fields={params}&max_results={max_results}'

    return connect_to_endpoint(getConversation_url, header)

# For now we will return the time only
def getTweetInformation(conversation_id, header):
    params = 'created_at,conversation_id,in_reply_to_user_id,author_id,referenced_tweets'
    tweetInfo_url = f'https://api.twitter.com/2/tweets?tweet.fields={params}&ids={conversation_id}'
    
    result = connect_to_endpoint(tweetInfo_url, header)
    return result['data'][0]['created_at']

def getTweetComments(conversation_data):
    conversation_dict = {'user_id':[], 'timestamp':[], 'reply_to':[], 'tweet':[]}
    
    for i in range(len(conversation_data['data'])):
        print('User ID:', conversation_data['data'][i]['author_id'], 
              'Time:', conversation_data['data'][i]['created_at'])
        print('In reply to:', conversation_data['data'][i]['in_reply_to_user_id'])
        print(conversation_data['data'][i]['text'], '\n')
        
        conversation_dict['user_id'].append(conversation_data['data'][i]['author_id'])
        conversation_dict['timestamp'].append(conversation_data['data'][i]['created_at'])
        conversation_dict['reply_to'].append(conversation_data['data'][i]['in_reply_to_user_id'])
        conversation_dict['tweet'].append(conversation_data['data'][i]['text'])
        
    return conversation_dict

In [4]:
with open('Authentication/twitter_bearer_token.txt', 'r', encoding="utf8") as f:
    token = f.read()

header = create_headers(token)
max_results = 100

# The Straits Times
## Get Tweets

In [5]:
ST_id = '37874853'
ST_tweets = getTweets(ST_id, header)

In [6]:
for i in range(len(ST_tweets['data'])):
    print('Tweet ID:', ST_tweets['data'][i]['id'], 
          'Time:', getTweetInformation(ST_tweets['data'][i]['id'], header))
    print(ST_tweets['data'][i]['text'], '\n')
    
    command = (
            '''
            INSERT INTO twitter_data
            VALUES ('%s', '%s', '%s');
            ''' % (ST_tweets['data'][i]['id'], getTweetInformation(ST_tweets['data'][i]['id'], header), 
                   ST_tweets['data'][i]['text'])
            )
    setUpDB(command, uri)

Tweet ID: 1478565938450472962 Time: 2022-01-05T03:16:04.000Z
RT @stbusinessdesk: Subtle word choice in China IPO rules revives hope for Hong Kong https://t.co/QHRTYD0lp9 

done
Tweet ID: 1478565930925912067 Time: 2022-01-05T03:16:03.000Z
RT @STForeignDesk: Biden to speak about threats to democracy on anniversary of Capitol attack https://t.co/6I4H05nglV 

done
Tweet ID: 1478562669250502658 Time: 2022-01-05T03:03:05.000Z
RT @STForeignDesk: Tuna sold for $197,604 at Tokyo's New Year auction falls in price for third year https://t.co/Xiulj9joOP 

syntax error at or near "s"
LINE 3: ...T @STForeignDesk: Tuna sold for $197,604 at Tokyo's New Year...
                                                             ^

Tweet ID: 1478562660530540546 Time: 2022-01-05T03:03:03.000Z
RT @STForeignDesk: Leaked memo says rusty Qantas pilots making errors: Report https://t.co/MSoAwBwyMg 

done
Tweet ID: 1478561148689543172 Time: 2022-01-05T02:57:02.000Z
RT @STsportsdesk: Badminton: Singapore chief coach M

In [7]:
# query = '@straits_times'
# tweets_url = f'https://api.twitter.com/2/tweets/search/recent?query={query}&max_results={max_results}'
# connect_to_endpoint(tweets_url, header)

## Get Conversation

In [8]:
# ST_conversation_data = getConversation('1475383966832021504', max_results, header)
ST_conversation_data = getConversation('1477849910682832897', max_results, header)

In [9]:
ST_result = getTweetComments(ST_conversation_data)

User ID: 808713209389715457 Time: 2022-01-05T03:05:32.000Z
In reply to: 3996146667
@Ceri_turns @DrEricDing This is what I was looking for. Hope this is helpful. https://t.co/9G89ZYvj19 

User ID: 777809778 Time: 2022-01-05T00:30:21.000Z
In reply to: 18831926
@DrEricDing Staff shortages. Staff aren't off sick they are off well. 

User ID: 3949435217 Time: 2022-01-04T23:29:24.000Z
In reply to: 18831926
@DrEricDing Thank you Dr. Feigl-Ding! 

User ID: 1029184864774443008 Time: 2022-01-04T22:59:15.000Z
In reply to: 18831926
@DrEricDing The people who are pushing for in-person school…@DrLeanaWen @CDCDirector @ashishkjha 

User ID: 1092450453088960513 Time: 2022-01-04T21:00:30.000Z
In reply to: 18831926
@DrEricDing https://t.co/jzXnnzamd7 

User ID: 114155868 Time: 2022-01-04T20:54:27.000Z
In reply to: 18831926
@DrEricDing ICU numbers in South Africa, when viewed province by province, and compared to previous COVID-19 waves, paints a very different picture. Whatever the reasons, hospital &am

In [10]:
ST_df = pd.DataFrame.from_dict(ST_result)
ST_df.head()
ST_df.to_csv('Datasets/ST_twitter_data.csv')

# Channel NewsAsia
## Get Tweets

In [11]:
CNA_tweets = getTweets('38400130', header)

for i in range(len(CNA_tweets['data'])):
    print('Tweet ID:', CNA_tweets['data'][i]['id'],
          'Time:', getTweetInformation(ST_tweets['data'][i]['id'], header))
    print(CNA_tweets['data'][i]['text'], '\n')
    
    command = (
            '''
            INSERT INTO twitter_data
            VALUES ('%s', '%s', '%s');
            ''' % (CNA_tweets['data'][i]['id'], getTweetInformation(CNA_tweets['data'][i]['id'], header), 
                   CNA_tweets['data'][i]['text'])
            )
    setUpDB(command, uri)

Tweet ID: 1478567720580882433 Time: 2022-01-05T03:16:04.000Z
Mominul hails Bangladesh's 'unbelievable' win over NZ https://t.co/4lyEoQZhwb https://t.co/rCgoIKT52I 

syntax error at or near "s"
LINE 3: ...2-01-05T03:23:09.000Z', 'Mominul hails Bangladesh's 'unbelie...
                                                             ^

Tweet ID: 1478567709939961858 Time: 2022-01-05T03:16:03.000Z
Indonesia to review coal export ban that has driven up global prices https://t.co/ubrGKqUgLO https://t.co/7nwHPDUMsg 

done
Tweet ID: 1478567700238503940 Time: 2022-01-05T03:03:05.000Z
Hong Kong hunts COVID-19 patient's contacts, orders cruise ship back to port https://t.co/4mH9l1btbt https://t.co/egOnPMRCAl 

syntax error at or near "s"
LINE 3: ...03:23:04.000Z', 'Hong Kong hunts COVID-19 patient's contacts...
                                                             ^

Tweet ID: 1478564297433255937 Time: 2022-01-05T03:03:03.000Z
Asian shares slip as rising US yields hit tech firms https://t.co/m

## Get Conversation

In [12]:
# CNA_conversation_data = getConversation('1475390517470896129', max_results, header)
CNA_conversation_data = getConversation('1477849910682832897', max_results, header)

result = getTweetComments(CNA_conversation_data)

User ID: 808713209389715457 Time: 2022-01-05T03:05:32.000Z
In reply to: 3996146667
@Ceri_turns @DrEricDing This is what I was looking for. Hope this is helpful. https://t.co/9G89ZYvj19 

User ID: 777809778 Time: 2022-01-05T00:30:21.000Z
In reply to: 18831926
@DrEricDing Staff shortages. Staff aren't off sick they are off well. 

User ID: 3949435217 Time: 2022-01-04T23:29:24.000Z
In reply to: 18831926
@DrEricDing Thank you Dr. Feigl-Ding! 

User ID: 1029184864774443008 Time: 2022-01-04T22:59:15.000Z
In reply to: 18831926
@DrEricDing The people who are pushing for in-person school…@DrLeanaWen @CDCDirector @ashishkjha 

User ID: 1092450453088960513 Time: 2022-01-04T21:00:30.000Z
In reply to: 18831926
@DrEricDing https://t.co/jzXnnzamd7 

User ID: 114155868 Time: 2022-01-04T20:54:27.000Z
In reply to: 18831926
@DrEricDing ICU numbers in South Africa, when viewed province by province, and compared to previous COVID-19 waves, paints a very different picture. Whatever the reasons, hospital &am

In [13]:
df = pd.DataFrame.from_dict(result)
df.head()
df.to_csv('Datasets/sample_tweet_conversation.csv')