In [7]:
import praw
from dotenv import load_dotenv
from datetime import datetime
import os
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


load_dotenv()


True

In [2]:
# Initialize Reddit API
reddit = praw.Reddit(
    client_id=os.getenv("REDDIT_CLIENT_ID"),
    client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
    user_agent=os.getenv("REDDIT_USER_AGENT"),
)
print(f"Reddit Read-Only: {reddit.read_only}")


Reddit Read-Only: True


In [3]:
# Define subreddit and query
politics_sub = reddit.subreddit("politics")
query = "Kamala Harris Trump election"

# Define date range
start_date = datetime(2024, 11, 1)
end_date = datetime(2024, 11, 10)
start_timestamp = start_date.timestamp()
end_timestamp = end_date.timestamp()


In [4]:
# Search and filter posts (Print)
for submission in politics_sub.search(query, sort="top", time_filter="month", limit=10):
    if start_timestamp <= submission.created_utc <= end_timestamp:
        print(f"Title: {submission.title}")
        print(f"Created at: {datetime.fromtimestamp(submission.created_utc)}")
        print(f"Score: {submission.score}")
        print(f"URL: {submission.url}\n")


Title: Election Deniers Went Suddenly Quiet When Trump Won
Created at: 2024-11-07 18:54:08
Score: 33419
URL: https://www.thedailybeast.com/election-deniers-went-suspiciously-quiet-when-trump-won/

Title: Kamala Harris’ campaign didn’t ignore working class voters
Created at: 2024-11-08 16:56:43
Score: 9892
URL: https://www.msnbc.com/opinion/msnbc-opinion/biden-harris-working-class-vote-trump-election-rcna179186

Title: After Trump Took the Lead, Election Deniers Went Suddenly Silent • Trump supporters spent years fomenting concern about election integrity. On Tuesday, they set it all aside.
Created at: 2024-11-06 21:48:09
Score: 7739
URL: https://www.nytimes.com/2024/11/06/technology/trump-election-denial.html



In [None]:
# Search and filter posts (Save to csv)

posts_data = []

for submission in politics_sub.search(query, sort="top", time_filter="month", limit=10):
    if start_timestamp <= submission.created_utc <= end_timestamp:
        posts_data.append({
            'title': submission.title,
            'created_at': datetime.fromtimestamp(submission.created_utc),
            'score': submission.score,
            'url': submission.url
        })


In [None]:
df = pd.DataFrame(posts_data)
df.to_csv('data/reddit_posts_test.csv', index=False)
print(f"Saved {len(posts_data)} posts to reddit_posts.csv")

Rotten Tomatoes Investigaration

Movies: Nightbitch, Mufasa, Nosferatu, Lord of the Rings: War of the Rohirrim 

In [8]:
# Sentiment Analyzer
analyzer = SentimentIntensityAnalyzer()

# Function to collect and analyze Reddit data
def collect_movie_data(movie_name, time_filter="month", limit=100):
    """
    Collects Reddit posts related to a movie, analyzes sentiment, and returns results.
    """
    sentiment_data = []
    query = f'"{movie_name}"'  # Search for the exact movie name
    print(f"Searching for posts about '{movie_name}'...")
    
    for submission in reddit.subreddit("all").search(query, sort="relevance", time_filter=time_filter, limit=limit):
        post_sentiment = analyzer.polarity_scores(submission.title + " " + (submission.selftext or ""))
        sentiment_data.append({
            "title": submission.title,
            "text": submission.selftext,
            "upvotes": submission.score,
            "comments": submission.num_comments,
            "created_utc": datetime.fromtimestamp(submission.created_utc),
            "sentiment_score": post_sentiment["compound"],
            "positive_score": post_sentiment["pos"],
            "negative_score": post_sentiment["neg"],
            "neutral_score": post_sentiment["neu"],
            "url": submission.url
        })
    
    print(f"Collected {len(sentiment_data)} posts for '{movie_name}'.")
    return sentiment_data

In [13]:
# Example Usage
movie_name = "Nosferatu"
data = collect_movie_data(movie_name, time_filter="month", limit=200)

# Display results
for entry in data:
    print(f"Title: {entry['title']}")
    print(f"Sentiment: {entry['sentiment_score']}, Upvotes: {entry['upvotes']}, Comments: {entry['comments']}")
    print(f"URL: {entry['url']}\n")

Searching for posts about 'Nosferatu'...


  "created_utc": datetime.utcfromtimestamp(submission.created_utc),


Collected 200 posts for 'Nosferatu'.
Title: Robert Eggers' 'Nosferatu' - Review Thread
Sentiment: 0.9796, Upvotes: 2828, Comments: 419
URL: https://www.reddit.com/r/movies/comments/1h56dn8/robert_eggers_nosferatu_review_thread/

Title: 'Nosferatu' first reactions
Sentiment: 0.0, Upvotes: 506, Comments: 147
URL: https://www.reddit.com/gallery/1gmaqip

Title: Nosferatu (2024) [No Spoilers]
Sentiment: 0.4073, Upvotes: 307, Comments: 483
URL: https://www.reddit.com/r/horror/comments/1gvgcit/nosferatu_2024_no_spoilers/

Title: ‘Nosferatu’ Rated “R” for Bloody Violence and “Graphic Nudity”
Sentiment: -0.7906, Upvotes: 5040, Comments: 455
URL: https://bloody-disgusting.com/movie/3840111/nosferatu-rated-r-for-bloody-violence-and-graphic-nudity/

Title: Nosferatu is one of the most profoundly frightening horror films in years. A magnificent Lily-Rose Depp is the convulsing, hysteric target of Bill Skarsgård’s vampire. (5/5)
Sentiment: -0.5487, Upvotes: 1021, Comments: 136
URL: https://www.indep

In [19]:
# Convert to DataFrame if not already
df = pd.DataFrame(data)

# Method 1: Group by Week
weekly_sentiment = (df.groupby(pd.Grouper(key='created_utc', freq='W'))
                   .agg({
                       'sentiment_score': 'mean',
                       'title': 'count'  # Count of posts per week
                   })
                   .reset_index())

print("\nWeekly Sentiment Analysis:")
for _, row in weekly_sentiment.iterrows():
    print(f"Week of {row['created_utc'].strftime('%Y-%m-%d')}:")
    print(f"  Average Sentiment: {row['sentiment_score']:.3f}")
    print(f"  Number of Posts: {row['title']}")
    print()



Weekly Sentiment Analysis:
Week of 2024-11-10:
  Average Sentiment: -0.050
  Number of Posts: 17

Week of 2024-11-17:
  Average Sentiment: 0.070
  Number of Posts: 24

Week of 2024-11-24:
  Average Sentiment: -0.017
  Number of Posts: 47

Week of 2024-12-01:
  Average Sentiment: 0.109
  Number of Posts: 36

Week of 2024-12-08:
  Average Sentiment: 0.065
  Number of Posts: 76



In [20]:
def group_by_custom_period(df, days):
    # Sort by date and group into periods
    df = df.sort_values('created_utc')
    min_date = df['created_utc'].min()
    
    # Calculate period start dates
    df['period_start'] = df['created_utc'].dt.floor(f'{days}D')
    df['period'] = df['period_start'].dt.strftime('%Y-%m-%d')
    
    # Group by period
    result = (df.groupby('period')
              .agg({
                  'sentiment_score': 'mean',
                  'title': 'count'
              })
              .reset_index()
              .sort_values('period'))  # Sort by date
    
    return result

# Use the function
custom_period_sentiment = group_by_custom_period(df, days=3)

print("\n3-Day Period Sentiment Analysis:")
for _, row in custom_period_sentiment.iterrows():
    print(f"Period starting {row['period']}:")
    print(f"  Average Sentiment: {row['sentiment_score']:.3f}")
    print(f"  Number of Posts: {row['title']}")
    print()


3-Day Period Sentiment Analysis:
Period starting 2024-11-07:
  Average Sentiment: -0.050
  Number of Posts: 17

Period starting 2024-11-10:
  Average Sentiment: 0.069
  Number of Posts: 7

Period starting 2024-11-13:
  Average Sentiment: -0.063
  Number of Posts: 9

Period starting 2024-11-16:
  Average Sentiment: 0.166
  Number of Posts: 14

Period starting 2024-11-19:
  Average Sentiment: -0.042
  Number of Posts: 19

Period starting 2024-11-22:
  Average Sentiment: -0.025
  Number of Posts: 22

Period starting 2024-11-25:
  Average Sentiment: 0.135
  Number of Posts: 17

Period starting 2024-11-28:
  Average Sentiment: 0.090
  Number of Posts: 18

Period starting 2024-12-01:
  Average Sentiment: -0.030
  Number of Posts: 30

Period starting 2024-12-04:
  Average Sentiment: 0.242
  Number of Posts: 28

Period starting 2024-12-07:
  Average Sentiment: -0.051
  Number of Posts: 19

