In [1]:
from pprint import pprint
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
import warnings
warnings.filterwarnings("ignore")
import os
from dotenv import load_dotenv

In [2]:
load_dotenv()

import praw

client_id = os.getenv('REDDIT_CLIENT_ID')
client_secret = os.getenv('REDDIT_CLIENT_SECRET')
user_agent = "TechSentimentApp"

# initializing Reddit instance
reddit = praw.Reddit(
  client_id = client_id,
  client_secret = client_secret,
  user_agent = user_agent
)

In [3]:
# Subreddit to scrape
subreddit = reddit.subreddit("technology")

# displaying the name of subreddit
print("Display Name:", subreddit.display_name)

# displaying the title of subreddit
print("Title:", subreddit.title)

# displaying the description of subreddit
print("Description:", subreddit.description)

Display Name: technology
Title: /r/Technology 
Description: /r/technology is a place to share and discuss the latest developments, happenings and curiosities in the world of technology; a broad spectrum of conversation as to the innovations, aspirations, applications and machinations that define our age and shape our future. 

---
---

###Browse categories:

 | |
-|-
[Artificial Intelligence](https://bit.ly/2O1fsOK)|[Business](https://goo.gl/tmoLMB#Business)
[Biotechnology](https://bit.ly/2M1cGGv)|[Crypto](https://bit.ly/2LWVkuv)
[Energy](https://goo.gl/urO5Pe#Energy)|[Hardware](https://goo.gl/IIGWMH#Hardware)
[Machine Learning](https://bit.ly/2NWNaEW)|[Nanotech/Materials](https://bit.ly/2O2P9aP)
[Networking/Telecom](https://bit.ly/2LYW5TM)|[Net Neutrality](https://bit.ly/2Sl0dhV)
[Politics](https://goo.gl/aoMkzE#Politics)|[Privacy](https://bit.ly/2XPUGWH)
[Robotics/Automation](https://bit.ly/2xNXFQb)|[Security](https://goo.gl/9r2mbh#Security)
[Social Media](https://bit.ly/2XV9oqA)|[So

In [6]:
# creating a empty lists to store data
reddit_data = []

# scrapping top posts and comments
for post in subreddit.hot(limit = 100):
  reddit_data.append({
    'type': 'Post',
    'post_id': post.id,
    'title': post.title,
    'timestamp': post.created_utc,
    'text': post.selftext,
    'score': post.score,
    'total_comments': post.num_comments,
    'post_url':post.url
  })

  # checking if the post has comments
  if post.num_comments > 0:
    #scrapping comments from each posts
    post.comments.replace_more(limit = 5)
    for comment in post.comments.list():
      reddit_data.append({
        'type': 'comment',
        'post_id': post.id,
        'title': None,
        'timestamp': comment.created_utc,
        'text': comment.body,
        'score': comment.score,
        'total_comments': 0,
        'post_url': None
      })
  # delay between requests to avoid rate limiting
  time.sleep(2)

# Creating pandas Dataframe for posts and comments
technology_data = pd.DataFrame(reddit_data)
technology_data

Unnamed: 0,type,post_id,title,timestamp,text,score,total_comments,post_url
0,Post,1njvxsb,"Sinclair Says Kimmel Suspension is Not Enough,...",1.758161e+09,,15092,1521,https://sbgi.net/sinclair-says-kimmel-suspensi...
1,comment,1njvxsb,,1.758163e+09,"""Kimmel to make a meaningful personal donation...",776,0,
2,comment,1njvxsb,,1.758162e+09,This company never addressed [that they had a ...,6337,0,
3,comment,1njvxsb,,1.758162e+09,And this is why kneeling to totalitarians to s...,1059,0,
4,comment,1njvxsb,,1.758161e+09,>Sinclair will not lift the suspension of “Ji...,12776,0,
...,...,...,...,...,...,...,...,...
14048,comment,1nis6io,,1.758070e+09,Considering you can bypass TPM and install 11 ...,6,0,
14049,Post,1nj3dc2,"Stop Killing Games' EU hearing is seemingly ""a...",1.758083e+09,,104,3,https://www.gamesradar.com/games/stop-killing-...
14050,comment,1nj3dc2,,1.758102e+09,So long as the hearing isn't UK government's s...,12,0,
14051,comment,1nj3dc2,,1.758142e+09,There is unfortunately a non-zero chance that ...,3,0,


In [7]:
technology_data.to_csv("../data/reddit_tech_raw_data.csv", index = False)

##### Overall details:
- Successfully connected to the Reddit API
- Scraped 100 hot posts from the r/technology subreddit
- Retrieved top 5 comments for each of those posts