# Stream using Kafka for Pipeline

##### Config

In [14]:
import configparser

# Load configuration
config = configparser.RawConfigParser()
config.read('../config/config.ini')

# Azure Event Hub Kafka settings
KAFKA_BROKER = config['AZURE_EVENTHUB']['BROKER']
EVENT_HUB_NAME = config['AZURE_EVENTHUB']['EVENT_HUB_NAME']
KAFKA_SASL_USERNAME = config['AZURE_EVENTHUB']['SASL_USERNAME']
KAFKA_SASL_PASSWORD = config['AZURE_EVENTHUB']['SASL_PASSWORD']

# Reddit API Settings
REDDIT_CLIENT_ID = config['REDDIT_API']['CLIENT_ID']
REDDIT_CLIENT_SECRET = config['REDDIT_API']['CLIENT_SECRET']
REDDIT_USER_AGENT = config['REDDIT_API']['USER_AGENT']
REDDIT_USERNAME = config['REDDIT_API']['USERNAME']
REDDIT_PASSWORD = config['REDDIT_API']['PASSWORD']

print("Config loaded successfully!")

Config loaded successfully!


In [15]:
import praw
from kafka import KafkaProducer
import json
import time

In [16]:
# Reddit API credentials
reddit = praw.Reddit(
    client_id=REDDIT_CLIENT_ID,
    client_secret=REDDIT_CLIENT_SECRET,
    user_agent=REDDIT_USER_AGENT,
    redirect_uri="http://localhost",
)

# Set up Kafka Producer
producer = KafkaProducer(
    bootstrap_servers=KAFKA_BROKER,
    sasl_mechanism="PLAIN",
    security_protocol="SASL_SSL",
    sasl_plain_username=KAFKA_SASL_USERNAME,
    sasl_plain_password=KAFKA_SASL_PASSWORD,
    value_serializer=lambda v: json.dumps(v).encode('utf-8')  # Serialize data as JSON
)

print("Kafka Producer configured successfully!")


Kafka Producer configured successfully!


In [17]:
def stream_reddit_to_kafka(subreddit_name, num_posts=20):
    """
    Fetch Reddit posts from the specified subreddit and produce them to Kafka.

    :param subreddit_name: Name of the subreddit to stream from.
    :param num_posts: Number of posts to fetch in one iteration.
    """
    subreddit = reddit.subreddit(subreddit_name)
    print(f"Streaming posts from r/{subreddit_name}...")

    for submission in subreddit.stream.submissions():
        post_data = {
            "id": submission.id,
            "title": submission.title,
            "author": str(submission.author),
            "created_utc": submission.created_utc,
            "url": submission.url,
            "num_comments": submission.num_comments,
            "score": submission.score
        }

        # Send to Kafka
        try:
            producer.send(EVENT_HUB_NAME, value=post_data)
            print(f"Sent to Kafka: {post_data['title']}")
        except Exception as e:
            print(f"Failed to send message to Kafka: {e}")
        
        time.sleep(0.5)  # Sleep for a short duration to simulate a streaming flow


In [18]:
# Test streaming from a specific subreddit
try:
    stream_reddit_to_kafka(subreddit_name="fashion", num_posts=20)
except KeyboardInterrupt:
    print("Streaming stopped.")
except Exception as e:
    print(f"Error: {e}")


Streaming posts from r/fashion...
Sent to Kafka: Did I Kill The Look ? 
Sent to Kafka: McDonald’s, bolsos emulan Egg McMuffins y papas fritas
Sent to Kafka: Inspiration from classic European fashion
Sent to Kafka: Mom wants to wear this outfit for the Christmas party. 
Sent to Kafka: Love the colour and lenght of this dress
Sent to Kafka: Birthday look
Sent to Kafka: Smart Casual outfit for company party
Sent to Kafka: Pink goes well with green.
Sent to Kafka: Too black? 
Sent to Kafka: Is this blazer too casual/noisey for an interview? 
Sent to Kafka: All black 🖤
Sent to Kafka: Are you a fan of dress sweaters? I found this one in an exclusive vintage store
Sent to Kafka: White jeans and navy sweater 
Sent to Kafka: Some cute selfies :3
Sent to Kafka: Fashion crimes 
Sent to Kafka: How would you style these?
Sent to Kafka: Winter Light: Soft Lines, a Moment of Warmth in the Shadows. 
Sent to Kafka: Never used a fanny pack before, thoughts?
Sent to Kafka: “Mob wife aesthetic” inspired w