## Import Libraries

In [3]:
import praw
from scraper import API_KEY, API_SECRET, USERNAME, PASSWORD
import pandas as pd

## PRAW/Reddit API

In [4]:
reddit = praw.Reddit(
    client_id=API_KEY,
    client_secret=API_SECRET,
    user_agent='YOUR_USER_AGENT',
    username=USERNAME,
    password=PASSWORD
)

In [5]:
# Function to fetch subreddit posts by category
# Thank you to Bryan Ortiz for this function

def combine_data(posts, label):
    data = []
    for p in posts:
        row = (p.created_utc, p.title, p.selftext)
        data.append(row)
        min_time = int(min(r[0] for r in data)) - 100_000
    print(f'{label.upper()} POSTS :: N = {len(data)}')
    return data

## Collect Data From Subreddit

In [11]:
subreddit = reddit.subreddit('existentialism')

posts_new = subreddit.new(limit=1000)
posts_hot = subreddit.hot(limit=1000)
posts_top = subreddit.top(limit=1000)
posts_con = subreddit.controversial(limit=1000)

data_new = combine_data(posts_new, 'new')
data_hot = combine_data(posts_hot, 'hot')
data_top = combine_data(posts_top, 'top')
data_con = combine_data(posts_con, 'controversial')

NEW POSTS :: N = 958
HOT POSTS :: N = 996
TOP POSTS :: N = 999
CONTROVERSIAL POSTS :: N = 995


In [12]:
df1 = pd.DataFrame(data_new + data_hot + data_top + data_con, columns=['time', 'title', 'text'])
df1['type'] = 1 # For merging and classifier later
df1.to_csv('./data/existentialism.csv', index=False)
df1.head()

Unnamed: 0,time,title,text,type
0,1704666000.0,Subjectivity is the Starting Point,"*The word subjectivism has two meanings, and o...",1
1,1704665000.0,Is there a possibility of an afterlife?,Title,1
2,1704575000.0,The meaning of existence,\n\nI had a discussion with friend who is an ...,1
3,1704481000.0,the mind and the notion of the self,Quite a while ago i came up with this Idea abo...,1
4,1704472000.0,I made an ambient album based on my initial in...,,1


In [9]:
subreddit = reddit.subreddit('nihilism')

posts_new = subreddit.new(limit=1000)
posts_hot = subreddit.hot(limit=1000)
posts_top = subreddit.top(limit=1000)
posts_con = subreddit.controversial(limit=1000)

data_new = combine_data(posts_new, 'new')
data_hot = combine_data(posts_hot, 'hot')
data_top = combine_data(posts_top, 'top')
data_con = combine_data(posts_con, 'controversial')

NEW POSTS :: N = 983
HOT POSTS :: N = 998
TOP POSTS :: N = 998
CONTROVERSIAL POSTS :: N = 997


In [10]:
df2 = pd.DataFrame(data_new + data_hot + data_top + data_con, columns=['time', 'title', 'text'])
df2['type'] = 0 # For merging and classifier later
df2.to_csv('./data/nihilism.csv', index=False)
df2.head()

Unnamed: 0,time,title,text,type
0,1704666000.0,Cure to nihilism,"I know it's a big claim, trust me and read.\n\...",0
1,1704669000.0,Is the general idea to embrace nihilism until ...,"I understand the idea of nihilism, but I don't...",0
2,1704665000.0,Is there a possibility of an afterlife?,,0
3,1704644000.0,"The reality is you only get 20% of your life, ...","When I turned 30, a stark reality laid before ...",0
4,1704580000.0,why are some minds more eager for meaning? or ...,"after all, those who ease their need for meani...",0


In [14]:
# Concatenate the two dataframes on the classifier, type
df = pd.concat([df1, df2], ignore_index=True)
df.to_csv('./data/total.csv', index=False)
df.head()

Unnamed: 0,time,title,text,type
0,1704666000.0,Subjectivity is the Starting Point,"*The word subjectivism has two meanings, and o...",1
1,1704665000.0,Is there a possibility of an afterlife?,Title,1
2,1704575000.0,The meaning of existence,\n\nI had a discussion with friend who is an ...,1
3,1704481000.0,the mind and the notion of the self,Quite a while ago i came up with this Idea abo...,1
4,1704472000.0,I made an ambient album based on my initial in...,,1
