In [1]:
from dotenv import dotenv_values
import praw
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [2]:
# Download the VADER lexicon
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\seb\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [3]:
# Load environment variables
env = dotenv_values('.env')

In [4]:
# Set up Reddit API credentials
reddit = praw.Reddit(
    client_id=env['R_CLIENT_ID'],
    client_secret=env['R_CLIENT_SECRET'],
    user_agent=env['R_USER_AGENT']
)

In [5]:
subreddit_name = 'Economics+economy+stocks'
subreddit = reddit.subreddit(subreddit_name)

praw.models.Submission: 
https://praw.readthedocs.io/en/v7.7.1/code_overview/models/submission.html

In [None]:
import database

In [None]:
import time

# Fetch submissions with pagination
submissions = []
last_submission = None

while True:
    new_submissions = list(subreddit.new(limit=1000, params={'after': last_submission}))
    if not new_submissions:
        break
    submissions.extend(new_submissions)
    last_submission = new_submissions[-1].fullname
    time.sleep(2)  # Sleep to avoid hitting rate limits


In [6]:
def extract_submission_data(submission):
    """
    """
    submission_data = {
        'id': submission.id,
        'created_utc': int(submission.created_utc),
        'subreddit': submission.subreddit.display_name,
        'author_name': submission.author.name if submission.author else None,
        'title': submission.title,
        'selftext': submission.selftext,
        'url': submission.url,
        'num_comments': submission.num_comments,
        'score': submission.score,
        'upvote_ratio': submission.upvote_ratio
    }
    return submission_data

In [7]:
def apply_sentiment_score_vader(submission_data: dict) -> dict:
    """
    """
    # Initialize the sentiment analyzer
    sia = SentimentIntensityAnalyzer()

    sentiment_title = sia.polarity_scores(submission_data['title'])
    sentiment_selftext = sia.polarity_scores(submission_data['selftext'])
    sentiment_title_vader = sentiment_title['compound']
    sentiment_selftext_vader = sentiment_selftext['compound'] if submission_data['selftext'] else None
    
    submission_data['sentiment_title_vader'] = sentiment_title_vader
    submission_data['sentiment_selftext_vader'] = sentiment_selftext_vader

    return submission_data

In [None]:
submissions_data = [extract_submission_data(s) for s in submissions]
submissions_data

In [8]:
for submission in subreddit.stream.submissions():
    if submission.stickied:  # if post is pinned, skip it
        continue

    submission_data = extract_submission_data(submission)
    submission_data = apply_sentiment_score_vader(submission_data)

    #submissions_data.append(submission_data)
    #database.insert_rows('sentiment', submission_data)

    print(submission_data)


{'id': '1f1ytke', 'created_utc': 1724705193, 'subreddit': 'stocks', 'author_name': 'Puginator', 'title': 'Apple to replace CFO Luca Maestri on Jan. 1 ', 'selftext': 'Apple announced Monday that it will replace Chief Financial Officer Luca Maestri on Jan. 1 with current Apple insider Kevan Parekh.\n\nMaestri will continue to lead teams focusing on IT, security, and real estate development, Apple said. He had been Apple CFO since 2014.\n\nParekh, the incoming CFO, has been on Maestri’s finance leadership team for years as the company’s VP of Financial Planning and Analysis.\n\n“For more than a decade, Kevan has been an indispensable member of Apple’s finance leadership team, and he understands the company inside and out. His sharp intellect, wise judgment, and financial brilliance make him the perfect choice to be Apple’s next CFO,” Apple CEO Tim Cook said in a statement.\n\nMaestri was named Apple CFO in 2014, before the stock began a torrid run, partially powered by strong demand for i

In [None]:
for submission in subreddit.stream.submissions():
    if submission.stickied:  # if post is pinned, skip it
        continue

    submission_data = extract_submission_data(submission)
    
    sentiment_title = sia.polarity_scores(submission.title)
    sentiment_selftext = sia.polarity_scores(submission.selftext)
    sentiment_title_vader = sentiment_title['compound']
    sentiment_selftext_vader = sentiment_selftext['compound'] if submission.selftext else None

    submission_data['sentiment_title_vader'] = sentiment_title_vader
    submission_data['sentiment_selftext_vader'] = sentiment_selftext_vader

    #submissions_data.append(submission_data)

    #database.insert_rows('sentiment', submission_data)

    if isinstance(submission_data, dict):
        submission_data = [submission_data]
    
    print(submission_data)
