## KAFKA PRODUCER ##

In [1]:
import json
import time
from datetime import datetime
import praw
from confluent_kafka import Producer

# Kafka & Reddit Configuration
KAFKA_BOOTSTRAP_SERVER = "kafka:9092"
KAFKA_TOPIC = "reddit-bitcoin"

REDDIT_CLIENT_ID = "Cats3X0jdhZSVG8r9Wsokg"
REDDIT_SECRET = "W-zJOVDj4m1kPHip-MzBqxt0kmGdPA"
REDDIT_USER_AGENT = "Puzzleheaded_Oil288"

# Initialize Reddit API
reddit = praw.Reddit(
    client_id=REDDIT_CLIENT_ID,
    client_secret=REDDIT_SECRET,
    user_agent=REDDIT_USER_AGENT,
    ratelimit_seconds=600  # Ensuring API rate limit compliance
)

# Kafka Producer Configuration
producer_config = {
    "bootstrap.servers": KAFKA_BOOTSTRAP_SERVER,
    "acks": "all",  # Ensures message durability
    "retries": 5,  # Retry on failure
    "linger.ms": 500,  # Optimize batch sending
    "delivery.timeout.ms": 10000  # Retry before failing
}
producer = Producer(producer_config)

# Kafka Delivery Callback
def delivery_report(err, msg):
    """Callback to confirm Kafka message delivery."""
    if err:
        print(f"[ERROR] Kafka message delivery failed: {err}")
    else:
        print(f"[INFO] Message delivered to {msg.topic()} [{msg.partition()}]")

# Function to Stream Reddit Posts to Kafka
def stream_reddit_to_kafka(duration_minutes=2):
    start_time = time.time()
    print(f"[INFO] Start Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    total_posts_collected = 0

    while (time.time() - start_time) < (duration_minutes * 60):
        for subreddit_name in ["Bitcoin", "CryptoCurrency", "BitcoinMarkets"]:
            try:
                subreddit = reddit.subreddit(subreddit_name)
                print(f"[INFO] Fetching posts from {subreddit_name} at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}...")

                for submission in subreddit.stream.submissions():
                    # Exit if duration limit is reached
                    if (time.time() - start_time) >= (duration_minutes * 60):
                        print(f"[INFO] End Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
                        print("[INFO] Time limit reached. Stopping producer.")
                        print(f"[INFO] Total posts collected: {total_posts_collected}")
                        return

                    # Check for keywords and send to Kafka
                    if any(keyword in submission.title.lower() for keyword in ["btc", "bitcoin", "whale alert", "crypto", "bullish", "bearish"]):
                        data = {
                            "id": submission.id,
                            "title": submission.title,
                            "score": submission.score,
                            "num_comments": submission.num_comments,
                            "created_utc": submission.created_utc,
                            "url": submission.url,
                            "subreddit": subreddit_name,
                        }
                        producer.produce(KAFKA_TOPIC, json.dumps(data), callback=delivery_report)
                        producer.flush()
                        total_posts_collected += 1
                        print(f"[SENT] {submission.title[:50]}... | Score: {submission.score} at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
                    
                    time.sleep(0.5)  # Respect API limits
            except Exception as e:
                print(f"[ERROR] Exception occurred while processing {subreddit_name}: {e}")

    print(f"[INFO] Total posts collected: {total_posts_collected}")
    print(f"[INFO] End Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# Start Streaming
stream_reddit_to_kafka(duration_minutes=2)

[INFO] Start Time: 2025-02-09 06:49:09
[INFO] Fetching posts from Bitcoin at 2025-02-09 06:49:09...
[INFO] Message delivered to reddit-bitcoin [0]
[SENT] How much conviction do you have for bitcoin?... | Score: 90 at 2025-02-09 06:49:13
[INFO] Message delivered to reddit-bitcoin [0]
[SENT] .01 BTC acquired!! lets go!... | Score: 861 at 2025-02-09 06:49:14
[INFO] Message delivered to reddit-bitcoin [0]
[SENT] El Salvador 🇸🇻 + bitcoin + golf 🏌️‍♂️ = MAXimum vi... | Score: 0 at 2025-02-09 06:49:14
[INFO] Message delivered to reddit-bitcoin [0]
[SENT] How porn links and Ben Bernanke snuck into Bitcoin... | Score: 0 at 2025-02-09 06:49:15
[INFO] Message delivered to reddit-bitcoin [0]
[SENT] Jeff Booth says use your Bitcoin... | Score: 8 at 2025-02-09 06:49:16
[INFO] Message delivered to reddit-bitcoin [0]
[SENT] Why does it feel so cringy when normies at work ta... | Score: 0 at 2025-02-09 06:49:16
[INFO] Message delivered to reddit-bitcoin [0]
[SENT] Bitcoin: The Only Asset That Makes Me 

## KAFKA CONSUMER ##

### KAFKA Consumer - view it in Docker Terminal ###
docker exec -it kafka kafka-console-consumer --bootstrap-server kafka:9092 --topic reddit-bitcoin --from-beginning