Spark project

In [None]:
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
from textblob import TextBlob

# Create a SparkContext
sc = SparkContext(appName="RedditStreamingSentimentAnalysis")

# Create a StreamingContext with a batch interval of 10 seconds
ssc = StreamingContext(sc, 10)

# Create a DStream from the socket
dstream = ssc.socketTextStream("localhost", 12345)

# Define a function to perform sentiment analysis on each message
def analyze_sentiment(message):
    blob = TextBlob(message)
    polarity = blob.sentiment.polarity
    subjectivity = blob.sentiment.subjectivity
    if polarity > 0:
        sentiment = "positive"
    elif polarity < 0:
        sentiment = "negative"
    else:
        sentiment = "neutral"
    return (sentiment, polarity, subjectivity)

# Apply the sentiment analysis function to each message in the DStream
tagged_dstream = dstream.map(lambda message: analyze_sentiment(message))

# Print the results
tagged_dstream.pprint()

# Start the streaming context
ssc.start()

# Wait for the streaming to finish
ssc.awaitTermination()