<a href="https://colab.research.google.com/github/MrAdithya21/Real-Time-Reddit-Sentiment-Analysis/blob/main/extract_reddit_comments.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import praw
import json
import os
from azure.storage.queue import QueueClient

# Load credentials securely from environment variables
REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID")
REDDIT_CLIENT_SECRET = os.getenv("REDDIT_CLIENT_SECRET")
REDDIT_USER_AGENT = os.getenv("REDDIT_USER_AGENT")

AZURE_STORAGE_CONNECTION_STRING = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
QUEUE_NAME = "reddit-comments"

# Initialize Reddit API with secure credentials
reddit = praw.Reddit(client_id=REDDIT_CLIENT_ID,
                     client_secret=REDDIT_CLIENT_SECRET,
                     user_agent=REDDIT_USER_AGENT)

# Azure Storage Queue Connection
queue_client = QueueClient.from_connection_string(AZURE_STORAGE_CONNECTION_STRING, QUEUE_NAME)

# Define parameters
max_comments = 100  # Limit to 100 comments
batch_size = 10  # Send messages in batches
buffer = []  # Temporary storage for batching
comment_count = 0  # Counter

# Stream Reddit Comments
subreddit = reddit.subreddit("technology")

try:
    for comment in subreddit.stream.comments(skip_existing=True):
        comment_data = {
            "text": comment.body,
            "timestamp": comment.created_utc,
            "user": comment.author.name
        }
        buffer.append(comment_data)
        comment_count += 1

        # Send batch to Azure Queue every 10 comments
        if len(buffer) >= batch_size:
            queue_client.send_message(json.dumps(buffer))
            print(f"Sent {len(buffer)} comments to Azure Queue.")
            buffer = []  # Clear buffer after sending

        # Stop after collecting 100 comments
        if comment_count >= max_comments:
            print(f"Collected {max_comments} comments. Stopping now.")
            break

except Exception as e:
    print(f"Error: {e}")

# Send remaining comments if buffer is not empty
if buffer:
    queue_client.send_message(json.dumps(buffer))
    print(f"Sent remaining {len(buffer)} comments.")
