In [1]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql import functions as F

def preprocessing(lines):
    words = lines.select(explode(split(lines.value, "t_end")).alias("word"))
    words = words.na.replace('', None)
    words = words.na.drop()
    words = words.withColumn('word', F.regexp_replace('word', r'http\S+', ''))
    words = words.withColumn('word', F.regexp_replace('word', '@\w+', ''))
    words = words.withColumn('word', F.regexp_replace('word', '#', ''))
    words = words.withColumn('word', F.regexp_replace('word', 'RT', ''))
    words = words.withColumn('word', F.regexp_replace('word', ':', ''))
    return words

In [2]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("TwitterSentimentAnalysis").getOrCreate()
# read the tweet data from socket
lines = spark.readStream.format("socket").option("host", "0.0.0.0").option("port", 5555).load()
# Preprocess the data
words = preprocessing(lines)
#query = words.writeStream.queryName("all_tweets")\
#        .outputMode("append").format("parquet")\
#        .option("path", "s3a://686bucket/twitter_stream_out/")\
#        .option("checkpointLocation", "./check")\
#        .trigger(processingTime='60 seconds').start()
#query.awaitTermination()
#query = words.writeStream.format("console").start()
query = words \
    .writeStream \
    .outputMode("append") \
    .format("memory") \
    .queryName("all_tweets") \
    .trigger(processingTime='5 seconds').start()

In [None]:
from IPython.display import display, clear_output
from time import sleep

while True:
    clear_output(wait=True)
    display(query.status)
    display(spark.sql('SELECT * FROM all_tweets').show())
    sleep(5)

{'message': 'Waiting for next trigger',
 'isDataAvailable': False,
 'isTriggerActive': False}

+--------------------+
|                word|
+--------------------+
|  No surprise her...|
|   clouds that look |
|like snowy mountains|
|clouds sky dusk s...|
|  HOW TO SOUND LI...|
|1. make a simple ...|
|2. in between em-...|
|              3. ad…|
|The view to be fo...|
|"instead of going...|
|You can plant a t...|
|                    |
|  "Please picture me|
|       In the trees"|
|                    |
|Imagine it’s 2005...|
|Trees cover up a ...|
|  It’s my first t...|
|  The Gond tribe ...|
|This is what has ...|
+--------------------+
only showing top 20 rows



None