In [1]:
import threading

# Helper thread to avoid the Spark StreamingContext from blocking Jupyter
        
class StreamingThread(threading.Thread):
    def __init__(self, ssc):
        super().__init__()
        self.ssc = ssc
    def run(self):
        self.ssc.start()
        self.ssc.awaitTermination()
    def stop(self):
        print('----- Stopping... this may take a few seconds -----')
        self.ssc.stop(stopSparkContext=False, stopGraceFully=True)

In [2]:
sc

In [3]:
spark

In [6]:
from pyspark.streaming import StreamingContext
from pyspark.sql import Row
from pyspark.sql.functions import udf, struct, array, col, lit
from pyspark.sql.types import StringType
from pyspark.ml import PipelineModel

In [19]:
#load model
path1 = r"C:\Users\jef-w\Desktop\Uni\KUL\Year_1\Advanced_Analytics\Assignments\Assignment_3\spark\notebooks\GBT_Model"
path2 = r"C:\Users\jef-w\Desktop\Uni\KUL\Year_1\Advanced_Analytics\Assignments\Assignment_3\spark\notebooks\MLP_Model"
final_model = PipelineModel.load(path1)

In [20]:
globals()['models_loaded'] = True
globals()['my_model'] = final_model

def process(time, rdd):
    if rdd.isEmpty():
        return
    
    print("========= %s =========" % str(time))
    
    # Convert to data frame
    df = spark.read.json(rdd)
    df.show()
    
    # Load in the model if not yet loaded:
    if not globals()['models_loaded']:
        # load in your models here
        globals()['my_model'] = PipelineModel.load(path1)
        globals()['models_loaded'] = True
        
    # And then predict using the loaded model (uncomment below):
    
    df_result = globals()['my_model'].transform(df)
    df_result = df_result.select('label','review_id','pred','prediction')
    df_result.show(truncate=False)

In [21]:
ssc = StreamingContext(sc, 10)

In [22]:
lines = ssc.socketTextStream("seppe.net", 7778)
lines.foreachRDD(process)

In [23]:
ssc_t = StreamingThread(ssc)
ssc_t.start()

+-------+-----+---------+--------------------+
| app_id|label|review_id|         review_text|
+-------+-----+---------+--------------------+
|2166050|    1|136673824|Great puzzle desi...|
|1949030|    1|136673040|            БАВОВНА)|
|2156300|    1|136673106| Interesting so far.|
|1811990|    1|136674510|It's really cute,...|
|1811990|    1|136673642|Cool cartoonish d...|
+-------+-----+---------+--------------------+

+-----+---------+----------------------------------------+----------+
|label|review_id|pred                                    |prediction|
+-----+---------+----------------------------------------+----------+
|1.0  |136673824|[0.10520404640163447,0.8947959535983655]|1.0       |
|1.0  |136673040|[0.42292916299369804,0.577070837006302] |1.0       |
|1.0  |136673106|[0.13572538040476032,0.8642746195952395]|1.0       |
|1.0  |136674510|[0.07823147177598469,0.9217685282240152]|1.0       |
|1.0  |136673642|[0.21807198269903985,0.7819280173009602]|1.0       |
+-----+---------

+---------------+-----+---------+--------------------+
|         app_id|label|review_id|         review_text|
+---------------+-----+---------+--------------------+
|        1798020|    1|136673356|I'll jack in to y...|
|1798010,1798020|    1|136674910|Thank you capcom ...|
|1798010,1798020|    1|136674608|When I saw the ti...|
+---------------+-----+---------+--------------------+

+-----+---------+----------------------------------------+----------+
|label|review_id|pred                                    |prediction|
+-----+---------+----------------------------------------+----------+
|1.0  |136673356|[0.24761707077861345,0.7523829292213866]|1.0       |
|1.0  |136674910|[0.40003504257620914,0.5999649574237909]|1.0       |
|1.0  |136674608|[0.1658584251607454,0.8341415748392546] |1.0       |
+-----+---------+----------------------------------------+----------+

+-------+-----+---------+--------------------+
| app_id|label|review_id|         review_text|
+-------+-----+---------+----

In [24]:
ssc_t.stop()

----- Stopping... this may take a few seconds -----
