In [2]:
import praw
import json
import time
from dotenv import load_dotenv
import os

In [3]:
load_dotenv()

CLIENT_ID=os.getenv("CLIENT_ID")
CLIENT_SECRET=os.getenv("CLIENT_SECRET")
USERNAME=os.getenv("USERNAME")
PASSWORD=os.getenv("PASSWORD")

reddit = praw.Reddit(client_id=CLIENT_ID,
            client_secret=CLIENT_SECRET,
            user_agent=True,
            username=USERNAME,
            password=PASSWORD)


In [4]:
subreddit_name = "frugal"
subreddit = reddit.subreddit(subreddit_name)

In [5]:
def submission_to_object(submission):
    return {
        "id": submission.id,
        "author": str(submission.author),
        "created_utc": submission.created_utc,
        "title": submission.title,
        "selftext": submission.selftext,
        "url": submission.url,
        "score": submission.score,
        "upvote_ratio": submission.upvote_ratio,
        "num_comments": submission.num_comments,
        "subreddit": str(submission.subreddit).lower(),
        "permalink": submission.permalink,
        "link_flair_text": submission.link_flair_text,
        "author_flair_text": submission.author_flair_text,
        # "clicked": submission.clicked,
        # "distinguished": submission.distinguished,
        "edited": submission.edited,
        "locked": submission.locked,
        "is_original_content": submission.is_original_content,
        "is_self": submission.is_self,
        # "name": submission.name,
        "over_18": submission.over_18,
        # "poll_data": submission.poll_data,
        # "spoiler": submission.spoiler,
        "stickied": submission.stickied,
    }
submissions_scraped_utc = int(time.time())
submissions = subreddit.top(limit=10, time_filter='all')
submissions = [submission_to_object(submission) for submission in submissions]
print(submissions[0])

{'id': '1185njd', 'author': 'bethany_katherine', 'created_utc': 1676992257.0, 'title': 'UPDATE: 30 pounds of bananas', 'selftext': 'Alright y’all. The bananas have all been used.', 'url': 'https://i.redd.it/n4cxhl2zmlja1.jpg', 'score': 50221, 'upvote_ratio': 0.93, 'num_comments': 704, 'subreddit': 'frugal', 'permalink': '/r/Frugal/comments/1185njd/update_30_pounds_of_bananas/', 'link_flair_text': 'Frugal Win 🎉', 'author_flair_text': None, 'edited': False, 'locked': False, 'is_original_content': False, 'is_self': False, 'over_18': False, 'stickied': False}


In [6]:
# save the submissions to a json file
submissions_scrape = {
    "subreddit_name": subreddit_name,
    "scraped_utc": submissions_scraped_utc,
    "submissions": submissions
}

with open(f"data/submissions/submissions_{subreddit_name}_{submissions_scraped_utc}.json", "w") as f:
    json.dump(submissions_scrape, f)

print("Scraped", len(submissions), "submissions from", subreddit_name)

Scraped 10 submissions from frugal


In [7]:
lim_dict = reddit.auth.limits
print(lim_dict)

# convert unix timestamp to human readable time
unix_timestamp = lim_dict["reset_timestamp"]
human_readable_time = time.ctime(unix_timestamp)
print(human_readable_time)

{'remaining': 999.0, 'reset_timestamp': 1726991999.6067772, 'used': 1}
Sun Sep 22 09:59:59 2024


In [8]:
post_id = "1185njd"
scraped_utc = int(time.time())
post = reddit.submission(post_id)

print(reddit.read_only)
print(len(post.comments))

False
384


In [9]:
post.comments.replace_more(limit=None)

[]

In [20]:

def comments_to_json(comment):
    
    return {
        "id": comment.id,
        "body": comment.body,
        "author": str(comment.author),
        "created_utc": int(comment.created_utc),
        # "distinguished": comment.distinguished,
        "edited": comment.edited,
        "is_submitter": comment.is_submitter,
        # "permalink": comment.permalink,
        "score": comment.score,
        "stickied": comment.stickied,
        "subreddit_id": comment.subreddit_id.split("_")[1],
        "link_id": comment.link_id.split("_")[1],
        "parent_id": comment.parent_id.split("_")[1],
        "reply_ids": [reply.id for reply in comment.replies],
    }

# run the function on the post
comments = [comments_to_json(comment) for comment in post.comments.list()]
print(len(comments))
# print(comments)


bethany_katherine
MamaMidgePidge
AdministrativeSky236
Boomboooom
BurnerJerkzog
TheRealTofuey
None


AttributeError: 'NoneType' object has no attribute 'id'

In [11]:
# save the comments to a json file
comments_scrape = {
    "post_id": post_id,
    "scraped_utc": scraped_utc,
    "comments": comments
}

with open(f'data/comments/comments_{post_id}_{scraped_utc}.json', 'w') as f:
    json.dump(comments_scrape, f)

In [13]:
redditor = reddit.redditor("spez")

In [16]:
def redditor_to_object(redditor):
    return {
        "id": redditor.id,
        "name": redditor.name,
        "comment_karma": redditor.comment_karma,
        "created_utc": redditor.created_utc,
        "has_verified_email": redditor.has_verified_email,
        "icon_img": redditor.icon_img,
        "is_employee": redditor.is_employee,
        "is_mod": redditor.is_mod,
        # "is_suspended": redditor.is_suspended,
        "is_gold": redditor.is_gold,
        "link_karma": redditor.link_karma,
        "subreddit": str(redditor.subreddit).lower(),
        "verified": redditor.verified,
    }

In [17]:
redditor_to_object(redditor)

{'id': '1w72',
 'name': 'spez',
 'comment_karma': 751294,
 'created_utc': 1118030400.0,
 'has_verified_email': True,
 'icon_img': 'https://styles.redditmedia.com/t5_3k30p/styles/profileIcon_snoo-nftv2_bmZ0X2VpcDE1NToxMzdfNDY2YTMzMDg4N2JkZjYyZDUzZjk2OGVhODI0NzkzMTUwZjA3NzYyZV8zNTIy_rare_4a74ad4e-f76b-458c-86ce-ed9202163a57-headshot.png?width=256&height=256&crop=256:256,smart&s=fb07ab998bb955877134c19f3c766d71ba7b880e',
 'is_employee': True,
 'is_mod': True,
 'is_gold': True,
 'link_karma': 176236,
 'subreddit': 'u_spez',
 'verified': True}