In [1]:
#Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import requests

from datetime import datetime,timezone


In [2]:
def get_posts(subreddit, before):
    """
    Creates API request for 250 submissions from before a date in UTC format
    
    Checks if post has 'selftext' data and not 'removed_by_category' data and then
    adds to a list of posts to return
    
    Returns list of posts and the created time of the last collected post
    """
    post_list = []
    params = {
        'subreddit': subreddit,
        'size': 250,
        'before': before
    }
    url = 'https://api.pushshift.io/reddit/search/submission'
    
    req = requests.get(url, params)
    
    if req.status_code == 200:
        posts = req.json()['data']
        for post in posts:

            if 'selftext' in post.keys() and 'removed_by_category' not in post.keys():
                post_list.append([post['id'], post['title'], post['selftext'], post['created_utc'], post['url'],  subreddit])
            
        return post_list, posts[-1]['created_utc']
    else:
        return None
    
    

In [3]:
def get_x_posts(subreddit, num_posts):
    """
    Creates a time stamp for current time in UTC
    
    Uses the get_posts method to collect posts until it has at least the number of
    posts requested
    
    Creates a dataframe and saves it as a csv
    
    Returns the dataframe created
    """
    post_list = []
    now_utc = int(datetime.now(timezone.utc).timestamp())
    
    while len(post_list) < num_posts:
        
        if get_posts(subreddit, now_utc) != None:
            new_posts, now_utc = get_posts(subreddit, now_utc)
            post_list.extend(new_posts)
        
    df = pd.DataFrame(post_list, columns = ['id', 'title', 'text', 'created_utc', 'url', 'subreddit'])
    df.drop_duplicates()
    df.to_csv(f'../Data/{subreddit}_posts.csv')     
    return pd.DataFrame(post_list, columns = ['id', 'title', 'text', 'created_utc', 'ur', 'subreddit'])

In [4]:
get_x_posts('askwomen', 2000)
get_x_posts('askmen', 2000)

(2041, 6)
(2063, 6)


Unnamed: 0,id,title,text,created_utc,ur,subreddit
0,xv4eg7,How do you guys feel about people who cheat on...,,1664854885,https://www.reddit.com/r/AskMen/comments/xv4eg...,askmen
1,xv4dlk,Why is it so hard to a beat porn addiction?,,1664854820,https://www.reddit.com/r/AskMen/comments/xv4dl...,askmen
2,xv457n,How often have you assumed that a women is fli...,,1664854133,https://www.reddit.com/r/AskMen/comments/xv457...,askmen
3,xv431v,What are good goals to have if you’re trying t...,"I’m in my mid 20’s, just ended a relationship ...",1664853953,https://www.reddit.com/r/AskMen/comments/xv431...,askmen
4,xv3ta4,How do I know if I'm being a pushover or not?,I'm a male 27 and sometimes I find myself in s...,1664853169,https://www.reddit.com/r/AskMen/comments/xv3ta...,askmen
...,...,...,...,...,...,...
2058,xmarl1,What do you want us (your partners) to say aft...,"What do you prefer you be told after you cum, ...",1663970819,https://www.reddit.com/r/AskMen/comments/xmarl...,askmen
2059,xmao68,"Guys, how do you recommend for a 16yo guy to s...","I am weak as shit and need to get strong, part...",1663970587,https://www.reddit.com/r/AskMen/comments/xmao6...,askmen
2060,xma181,"Men, how do you find the discipline to hit the...","I used to be incredibly shredded and muscular,...",1663969065,https://www.reddit.com/r/AskMen/comments/xma18...,askmen
2061,xma14r,Do you find it frustrating how toxic dating is...,With women having thousands and thousands of o...,1663969059,https://www.reddit.com/r/AskMen/comments/xma14...,askmen
