# Reddit Streaming Producer

In [1]:
import os
import socket
import json
import time
import praw
from threading import Thread
import queue

In [2]:
HOST = os.getenv('PRODUCER_HOST', '127.0.0.1')
PORT = int(os.getenv('PRODUCER_PORT', '9998'))
CLIENT_ID = os.getenv('CLIENT_ID')
CLIENT_SECRET = os.getenv('SECRET_TOKEN')
USER_AGENT = os.getenv('USER_AGENT', 'MyRedditApp/0.0.1')
SUBREDDITS = os.getenv('SUBREDDITS', 'dataisbeautiful')

print(f"Configuration:")
print(f"HOST: {HOST}, PORT: {PORT}")
print(f"SUBREDDITS: {SUBREDDITS}")
print(f"CLIENT_ID configured: {'Yes' if CLIENT_ID else 'No'}")

Configuration:
HOST: 127.0.0.1, PORT: 9998
SUBREDDITS: dataisbeautiful
CLIENT_ID configured: Yes


In [3]:
reddit = praw.Reddit(
    client_id=CLIENT_ID, 
    client_secret=CLIENT_SECRET, 
    user_agent=USER_AGENT
)

In [None]:
def stream_comments(data_queue):
    """Stream comments from all subreddets"""
    try:
        subreddit = reddit.subreddit(SUBREDDITS)
        
        for comment in subreddit.stream.comments():
            payload = {
                'type': 'comment',
                'subreddit': str(comment.subreddit),
                'id': comment.id,
                'text': comment.body,
                'created_utc': comment.created_utc,
                'author': str(comment.author) if comment.author else '[deleted]'
            }
            data_queue.put(payload)
    except Exception as e:
        print(f"Comment stream error: {e}")

In [5]:
def stream_submissions(data_queue):
    """Stream submissions from all subreddits"""
    try:
        subreddit = reddit.subreddit(SUBREDDITS)
        
        for submission in subreddit.stream.submissions():
            text = submission.title
            if submission.selftext:
                text += ' ' + submission.selftext
                
            payload = {
                'type': 'submission', 
                'subreddit': str(submission.subreddit),
                'id': submission.id,
                'text': text,
                'created_utc': submission.created_utc,
                'author': str(submission.author) if submission.author else '[deleted]'
            }
            data_queue.put(payload)
    except Exception as e:
        print(f"Submission stream error: {e}")


In [6]:
def start_producer():
    """Main producer function"""
    # Create queue - no maxsize limit to avoid blocking
    data_queue = queue.Queue()
    
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as server_sock:
        server_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        server_sock.bind((HOST, PORT))
        server_sock.listen(1)
        print(f'Producer: Listening on {HOST}:{PORT}')
        
        conn, addr = server_sock.accept()
        print(f'Producer: Connected to {addr}')
        
        with conn:
            # Start both streams in separate threads for concurrent processing 
            # (producer consumer model with multiple streaming sources)
            Thread(target=stream_comments, args=(data_queue,), daemon=True).start()
            Thread(target=stream_submissions, args=(data_queue,), daemon=True).start()
            print("Finished setting up threads, started streaming...")

            try:
                print("Producer: Ready to send data to consumer...")
                while True: 
                    payload = data_queue.get()
                    print(f"Received: {payload['type']} from r/{payload['subreddit']}")
                    # Ensure newline for proper message separation (that '\n' is SO important!)
                    # If it doesn't end with a newline, the consumer might reads malformed JSON data
                    # and the entire thing breaks because of a single '\n' missing
                    json_str = json.dumps(payload) + '\n' 
                    conn.sendall(json_str.encode('utf-8'))
                    print(f"Sent: {payload['type']} from r/{payload['subreddit']}")
                    time.sleep(0.1)
                    
            except (ConnectionResetError, BrokenPipeError):
                print("Client disconnected")
            except KeyboardInterrupt:
                print("Shutting down...")

In [None]:
start_producer()

Producer: Listening on 127.0.0.1:9998
