In [None]:
from pprint import pprint
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
import warnings
warnings.filterwarnings("ignore")
import os
from dotenv import load_dotenv

In [None]:
load_dotenv()

import praw

client_id = os.getenv('REDDIT_CLIENT_ID')
client_secret = os.getenv('REDDIT_CLIENT_SECRET')
user_agent = "TechSentimentApp"

# initializing Reddit instance
reddit = praw.Reddit(
  client_id = client_id,
  client_secret = client_secret,
  user_agent = user_agent
)

In [None]:
# Subreddit to scrape
subreddit = reddit.subreddit("technology")

# displaying the name of subreddit
print("Display Name:", subreddit.display_name)

# displaying the title of subreddit
print("Title:", subreddit.title)

# displaying the description of subreddit
print("Description:", subreddit.description)

In [None]:
# creating a empty lists to store data
reddit_data = []

# scrapping top posts and comments
for post in subreddit.hot(limit = 100):
  reddit_data.append({
    'type': 'Post',
    'post_id': post.id,
    'title': post.title,
    'timestamp': post.created_utc,
    'text': post.selftext,
    'score': post.score,
    'total_comments': post.num_comments,
    'post_url':post.url
  })

  # checking if the post has comments
  if post.num_comments > 0:
    #scrapping comments from each posts
    post.comments.replace_more(limit = 5)
    for comment in post.comments.list():
      reddit_data.append({
        'type': 'comment',
        'post_id': post.id,
        'title': None,
        'timestamp': comment.created_utc,
        'text': comment.body,
        'score': comment.score,
        'total_comments': 0,
        'post_url': None
      })
  # delay between requests to avoid rate limiting
  time.sleep(2)

# Creating pandas Dataframe for posts and comments
technology_data = pd.DataFrame(reddit_data)
technology_data

In [None]:
technology_data.to_csv("../data/reddit_tech_raw_data.csv", index = False)

##### Overall details:
- Successfully connected to the Reddit API
- Scraped 100 hot posts from the r/technology subreddit
- Retrieved top 5 comments for each of those posts