In [1]:
import requests
import datetime
import pandas as pd
from anytree import Node, RenderTree
from functions import *

In [2]:
with open('Authentication/database_uri.txt', 'r', encoding="utf8") as f:
    uri = f.read()

In [3]:
with open('Authentication/reddit_credentials.txt', 'r', encoding="utf8") as f:
    credentials = f.read()
    
credentials = credentials.split('\n')

In [4]:
# note that CLIENT_ID refers to 'personal use script' and SECRET_TOKEN to 'token'
auth = requests.auth.HTTPBasicAuth(credentials[0], credentials[1])

In [5]:
# here we pass our login method (password), username, and password
data = {'grant_type': 'password',
        'username': credentials[2],
        'password': credentials[3]}

# setup our header info, which gives reddit a brief description of our app
headers = {'User-Agent': 'MyBot/0.0.1'}

# send our request for an OAuth token
res = requests.post('https://www.reddit.com/api/v1/access_token',
                    auth=auth, data=data, headers=headers)

# convert response to JSON and pull access_token value
TOKEN = res.json()['access_token']

# add authorization to our headers dictionary
headers = {**headers, **{'Authorization': f"bearer {TOKEN}"}}

In [6]:
def getNestedComments(replies, root_node, convo_dict):
    for i in range(len(replies)):
        reply = replies[i]
        if 'created_utc' in reply['data'] and 'parent_id' in reply['data'] and 'body' in reply['data'] and '[deleted]' not in reply['data']['body']:
            print('User:', reply['data']['id'])
            print('Time:', datetime.datetime.fromtimestamp(reply['data']['created_utc']))
            print('In reply to:', reply['data']['parent_id'][3:])
            print(reply['data']['body'], '\n')
            
            convo_dict['id'].append(reply['data']['id'])
            convo_dict['timestamp'].append(datetime.datetime.fromtimestamp(reply['data']['created_utc']))
            convo_dict['reply_to'].append(reply['data']['parent_id'][3:])
            convo_dict['comment'].append(reply['data']['body'])
            
            child = Node(reply['data']['id'], parent=root_node)

            if 'replies' in reply['data'] and reply['data']['replies'] != '':
                getNestedComments(reply['data']['replies']['data']['children'], child, convo_dict)

        else:
            return
        
def createTree(res, post_id):
    root = Node(post_id)
    comments = res.json()[1]['data']['children']
    
    conversation_dict = {'id':[], 'timestamp':[], 'reply_to':[], 'comment':[]}

    print('-----About the post-----')
    print('No. of Upvotes:', res.json()[0]['data']['children'][0]['data']['ups'])
    print('Upvote Ratio:', res.json()[0]['data']['children'][0]['data']['upvote_ratio'], '\n')

    for i in range(len(comments)):
        comment = comments[i]
        try:
            if '[deleted]' not in comment['data']['body']:

                print('User:', comment['data']['id'], 
                      'Time:', datetime.datetime.fromtimestamp(comment['data']['created_utc']))
                print('In reply to:', comment['data']['parent_id'][3:])
                print(comment['data']['body'], '\n')

                conversation_dict['id'].append(comment['data']['id'])
                conversation_dict['timestamp'].append(datetime.datetime.fromtimestamp(comment['data']['created_utc']))
                conversation_dict['reply_to'].append(comment['data']['parent_id'][3:])
                conversation_dict['comment'].append(comment['data']['body'])

                child = Node(comment['data']['id'], parent=root)

                if comment['data']['replies'] != '':
                    replies = comment['data']['replies']['data']['children']

                    getNestedComments(replies, child, conversation_dict)
                    
        except:
            continue
            
    return conversation_dict, root

## Get all Reddit Posts

In [7]:
all_posts = requests.get('https://api.pushshift.io/reddit/search/submission/?subreddit=singapore&sort=desc&sort_type=created_utc&size=50')

for i in range(len(all_posts.json()['data'])):
    post = all_posts.json()['data'][i]
    print('Post ID:', post['id'], 
          'Time:', datetime.datetime.fromtimestamp(post['created_utc']))
    print(post['title'], '\n')
    
#     command = (
#             '''
#             INSERT INTO reddit_data
#             VALUES ('%s', '%s', '%s');
#             ''' % (post['id'], datetime.datetime.fromtimestamp(post['created_utc']), 
#                    post['title'])
#             )
#     setUpDB(command, uri)

Post ID: rvw242 Time: 2022-01-04 22:48:55
Weekly Covid-19 infection growth rate rises above 1 for first time since Nov 12; 842 new cases in S’pore 

Post ID: rvw1ef Time: 2022-01-04 22:47:53
Weekly Covid-19 infection growth rate rises above 1; 842 new cases in S’pore 

Post ID: rvw04x Time: 2022-01-04 22:46:03
Scoot investigating after video circulates online showing party on plane 

Post ID: rvvs9l Time: 2022-01-04 22:35:54
Singapore’s COVID-19 weekly infection growth rate back above 1 for first time since Nov 12 

Post ID: rvvmav Time: 2022-01-04 22:28:04
842 new COVID-19 cases in Singapore; 438 Omicron infections confirmed 

Post ID: rvvaco Time: 2022-01-04 22:12:18
This picture made me think of the song: I want to see 👀 the sunshine 🌤 after the rain 🌧 I want to see bluebirds 🐦 flying over the mountains 🌄 again In this case, an awesome 🥺 double rainbow🌈🌈! 😍 

Post ID: rvv1j2 Time: 2022-01-04 22:00:33
Singaporean in UK quiz TV programme 

Post ID: rvuyy5 Time: 2022-01-04 21:57:15
Sin

## Get Comments of Individual Posts

In [8]:
# while the token is valid (~2 hours) we just add headers=headers to our requests
res = requests.get('https://oauth.reddit.com/r/singapore/comments/rmqevj', headers=headers)
# res = requests.get('https://oauth.reddit.com/r/singapore/comments/r08nub', headers=headers)

In [9]:
conversation_dict, tree = createTree(res, 'rmqevj')

-----About the post-----
No. of Upvotes: 1863
Upvote Ratio: 0.87 

User: hpr2kav Time: 2021-12-24 08:55:24
In reply to: rmqevj
All I know is - anyone trying to pull a fast one and lie in parliament from now on will think trice,  and that's a good thing for all of us. 

User: hpntm2t Time: 2021-12-23 16:24:16
In reply to: rmqevj
Here’s some perspective: 

1) The public nature and the fact that parliamentarians have to be the one doing this of course puts up lots of visible air time but that does not imply other priorities are forsaken - just like in the private sector there are always people doing the real work while leaders are there for direction and final decisions

2) Isn’t there merit in investigating the circumstances surrounding the lies given that if indeed others were complicit in it that it would be a breach of parliamentary privilege? 

3) If you would like them to stop, regardless of your political views, would you be okay if those complicit are let off? 

User: hpnwekg
Time

In [10]:
df = pd.DataFrame.from_dict(conversation_dict)
# df[df['id'] == 'hpoc9nq']
df.head()
df.to_csv('Datasets/reddit_data.csv')

In [11]:
for pre, fill, node in RenderTree(tree):
    print("%s%s" % (pre, node.name))

rmqevj
├── hpr2kav
├── hpntm2t
│   ├── hpnwekg
│   │   ├── hpnzb4r
│   │   │   └── hpo5qkg
│   │   │       └── hpokj06
│   │   │           ├── hpp0qgm
│   │   │           ├── hpoqxz5
│   │   │           └── hpraj0s
│   │   │               └── hpshgjh
│   │   ├── hpo2pl5
│   │   │   └── hpo7oxl
│   │   │       └── hpo7u0x
│   │   ├── hpo76sq
│   │   ├── hpo7l9d
│   │   ├── hpradku
│   │   │   ├── hps0rop
│   │   │   └── hprqpon
│   │   ├── hpoya8b
│   │   └── hpnwve6
│   │       └── hpnz3vc
│   │           ├── hpp0vzg
│   │           └── hpo6fwl
│   │               └── hpp7l4j
│   ├── hpo1src
│   │   ├── hpolpfk
│   │   ├── hpo33t8
│   │   │   ├── hpo4que
│   │   │   │   └── hpom3uq
│   │   │   │       └── hprayhb
│   │   │   │           └── hpruuh0
│   │   │   └── hpoq8hi
│   │   │       └── hppkszp
│   │   │           └── hpppv0m
│   │   └── hpo6898
│   │       └── hpohz2e
│   ├── hpnx97j
│   │   ├── hpnxtjg
│   │   │   ├── hpo0tg0
│   │   │   │   ├── hpo1tc3
│   │   │   │   └── hprbg

In [12]:
# comments[10]['data']['replies']['data']