In [1]:
"""Fetches top 100 posts from list of tech support subreddits."""

import os
import pandas as pd
import praw  # Reddit API wrapper
from decouple import config

# Make connection to Reddit API
r = praw.Reddit(client_id=config('CLIENT_ID'),
                client_secret=config('CLIENT_SECRET'),
                username=config('USERNAME'),
                password=config('PASSWORD'),
                user_agent=config('USER_AGENT'))

# Tech support subreddits that we'll be using.
subreddits = ['24hoursupport', 'Android', 'Applehelp', 'asktechnology',
              'buildapc', 'linux4noobs', 'pcgamingtechsupport',
              'talesfromtechsupport', 'techsupport']

df = pd.DataFrame(columns=['Subreddit', 'Text'])  # Making a dataframe with the columns we'll be using.

In [None]:
# This will populate the dataframe with text from the top 25 results for each subreddit in `subreddits` (Reddit's API only allows for 25 per request)
for sub in subreddits:
    subreddit = r.subreddit(sub)
    for submission in subreddit.top(limit=25):
        post = r.submission(id=submission.id)
        if post.selftext != '':  # Ensures that there is text within the post
            df = df.append({'Subreddit': subreddit.display_name,
                            'Text': post.selftext}, ignore_index=True)  # Selftext is the text from the post.

In [None]:
df = df.dropna()

df.head()

In [None]:
df.to_csv('datasets/fetched_data.csv', index=False)  # Sending to CSV