In [30]:
import os
from pathlib import Path
# 3rd party
import praw
import pandas as pd
from requests import Session
from dotenv import load_dotenv

In [14]:
load_dotenv()
reddit = praw.Reddit(
    client_id=os.environ.get("CLIENT_ID"),
    client_secret=os.environ.get("CLIENT_SECRET"),
    user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.67",
)

In [15]:
test_subreddit = reddit.subreddit("redditdev")

In [16]:
test_subreddit.display_name, test_subreddit.title, test_subreddit.description

('redditdev',
 'reddit Development',
 "A subreddit for discussion of Reddit's API and Reddit API clients.\n\n* [Read the API Overview & Rules](https://github.com/reddit/reddit/wiki/API)\n* [Check out the API documentation](http://www.reddit.com/dev/api)\n* [PRAW chat](https://join.slack.com/t/praw/shared_invite/enQtOTUwMDcxOTQ0NzY5LWVkMGQ3ZDk5YmQ5MDEwYTZmMmJkMTJkNjBkNTY3OTU0Y2E2NGRlY2ZhZTAzMWZmMWRiMTMwYjdjODkxOGYyZjY)\n* [Snoowrap chat](https://gitter.im/not-an-aardvark/snoowrap)\n* [Unofficial Discord](https://discord.gg/hVMhxpV)\n* Please do not request bots here. Consider /r/requestabot instead.\n\n\nPlease confine discussion to Reddit's API instead of using this as a soapbox to talk to the admins. In particular, use [/r/ideasfortheadmins](/r/ideasfortheadmins) for feature ideas and [/r/bugs](/r/bugs) for bugs. If you have general reddit questions, try [/r/help](/r/help).\n\nTo see an explanation of recent user-facing changes to reddit (and the code behind them), check out /r/change

In [35]:
subreddit = "redditdev"
submission_headers =  [
    'author', 'created_utc', 'id', 
    'is_original_content', 'is_self', 
    'link_flair_text', 'locked', 'name', 
    'num_comments', 'over_18', 'permalink', 
    'score', 'selftext', 'spoiler', 'stickied', 
    'subreddit', 'title', 'upvote_ratio', 'url'
]

In [36]:
new_posts = reddit.subreddit(subreddit).new(limit=55)

In [37]:
data = [
    {
        header: getattr(post, header) 
        for header in submission_headers
    }
    for post in new_posts
]

In [38]:
df = pd.DataFrame(data=data, columns=submission_headers)
df.head()

Unnamed: 0,author,created_utc,id,is_original_content,is_self,link_flair_text,locked,name,num_comments,over_18,permalink,score,selftext,spoiler,stickied,subreddit,title,upvote_ratio,url
0,Vegetable_Price5537,1688501000.0,14qplta,False,True,Reddit API,False,t3_14qplta,1,False,/r/redditdev/comments/14qplta/how_do_i_see_red...,3,"Like page 1, first 25 posts, page 2, the follo...",False,False,redditdev,how do i see reddit by pages?,0.71,https://www.reddit.com/r/redditdev/comments/14...
1,hiderostash,1688333000.0,14ozjy1,False,True,Reddit API,False,t3_14ozjy1,8,False,/r/redditdev/comments/14ozjy1/webscraping_user...,9,New to scraping reddit. I'm looking for a way ...,False,False,redditdev,Webscraping user comments,0.91,https://www.reddit.com/r/redditdev/comments/14...
2,dejavits,1688272000.0,14oevki,False,True,PRAW,False,t3_14oevki,2,False,/r/redditdev/comments/14oevki/how_to_use_use_p...,1,"Hello all, \n\n\nI have followed a Reddit Oau...",False,False,redditdev,How to use use Praw library with access and re...,0.67,https://www.reddit.com/r/redditdev/comments/14...
3,MonkeMusk1234,1688211000.0,14nsobx,False,True,PRAW,False,t3_14nsobx,8,False,/r/redditdev/comments/14nsobx/i_am_making_a_di...,8,.,False,False,redditdev,I am making a Discord Bot and i want it to pos...,0.83,https://www.reddit.com/r/redditdev/comments/14...
4,pl00h,1688159000.0,14nbw6g,False,True,,False,t3_14nbw6g,98,False,/r/redditdev/comments/14nbw6g/updated_rate_lim...,0,"Hi Devs,\n\nOver the last few months, we’ve sh...",False,True,redditdev,Updated rate limits going into effect over the...,0.42,https://www.reddit.com/r/redditdev/comments/14...


In [39]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55 entries, 0 to 54
Data columns (total 19 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   author               55 non-null     object 
 1   created_utc          55 non-null     float64
 2   id                   55 non-null     object 
 3   is_original_content  55 non-null     bool   
 4   is_self              55 non-null     bool   
 5   link_flair_text      54 non-null     object 
 6   locked               55 non-null     bool   
 7   name                 55 non-null     object 
 8   num_comments         55 non-null     int64  
 9   over_18              55 non-null     bool   
 10  permalink            55 non-null     object 
 11  score                55 non-null     int64  
 12  selftext             55 non-null     object 
 13  spoiler              55 non-null     bool   
 14  stickied             55 non-null     bool   
 15  subreddit            55 non-null     objec

In [40]:
Path("data").mkdir(parents=True, exist_ok=True)
df.to_csv(f"data/{subreddit}.csv", index=False, chunksize=1000)