In [1]:
import threading

# Helper thread to avoid the Spark StreamingContext from blocking Jupyter
        
class StreamingThread(threading.Thread):
    def __init__(self, ssc):
        super().__init__()
        self.ssc = ssc
    def run(self):
        self.ssc.start()
        self.ssc.awaitTermination()
    def stop(self):
        print('----- Stopping... this may take a few seconds -----')
        self.ssc.stop(stopSparkContext=False, stopGraceFully=True)

In [2]:
sc

In [3]:
spark

In [4]:
from pyspark.streaming import StreamingContext
from pyspark.sql import Row
from pyspark.sql.functions import udf, struct, array, col, lit
from pyspark.sql.types import StringType

In [5]:
from pyspark.ml import PipelineModel
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

globals()['models_loaded'] = False
globals()['my_model'] = None

# Create evaluator
evaluatorMulti = MulticlassClassificationEvaluator(labelCol="label", predictionCol="prediction")

def process(time, rdd):
    if rdd.isEmpty():
        return
    
    print("========= %s =========" % str(time))
    
    # Convert to data frame
    df = spark.read.json(rdd)
    df.show()
    
    # Load in the model if not yet loaded:
    if not globals()['models_loaded']:
        # load in your models here
        globals()['my_model'] =  PipelineModel.load("pipeline_model")
        globals()['models_loaded'] = True
        
    # And then predict using the loaded model: 
    df_result = globals()['my_model'].transform(df)
    
    # Evaluation 
    predictionAndTarget = df_result.select("label", "prediction")
    acc = evaluatorMulti.evaluate(predictionAndTarget, {evaluatorMulti.metricName: "accuracy"})
    f1 = evaluatorMulti.evaluate(predictionAndTarget, {evaluatorMulti.metricName: "f1"})
    f1_label_0 = evaluatorMulti.evaluate(predictionAndTarget, 
                                {evaluatorMulti.metricName: "fMeasureByLabel", evaluatorMulti.metricLabel:0.0})
    f1_label_1 = evaluatorMulti.evaluate(predictionAndTarget, 
                                {evaluatorMulti.metricName: "fMeasureByLabel", evaluatorMulti.metricLabel:1.0})
    f1_label_2 = evaluatorMulti.evaluate(predictionAndTarget, 
                                {evaluatorMulti.metricName: "fMeasureByLabel", evaluatorMulti.metricLabel:2.0})
    weightedPrecision = evaluatorMulti.evaluate(predictionAndTarget, {evaluatorMulti.metricName: "weightedPrecision"})
    weightedRecall = evaluatorMulti.evaluate(predictionAndTarget, {evaluatorMulti.metricName: "weightedRecall"})
    
    print(f"F1 Score for #loltyler1: {f1_label_0}")
    print(f"F1 Score for #easportsfifa: {f1_label_1}")
    print(f"F1 Score for #trainwreckstv: {f1_label_2}\n")
    print(f"Accuracy: {acc}")
    print(f"F1 Score: {f1}")
    print(f"Weighted Precision: {weightedPrecision}")
    print(f"Weighted Recall: {weightedRecall}")
    df_result.select("channel", "message", "label", "prediction").show()
    

In [6]:
ssc = StreamingContext(sc, 10)

In [7]:
lines = ssc.socketTextStream("localhost", 8080)
lines.foreachRDD(process)

In [9]:
ssc_t = StreamingThread(ssc)
ssc_t.start()

22/05/27 18:12:08 WARN StreamingContext: StreamingContext has already been started
22/05/27 18:12:09 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:09 WARN BlockManager: Block input-0-1653667929400 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:10 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:10 WARN BlockManager: Block input-0-1653667930400 replicated to only 0 peer(s) instead of 1 peers
                                                                                



22/05/27 18:12:11 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:11 WARN BlockManager: Block input-0-1653667931400 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:12 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:12 WARN BlockManager: Block input-0-1653667932400 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:13 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:13 WARN BlockManager: Block input-0-1653667933400 replicated to only 0 peer(s) instead of 1 peers


+--------------+--------------------+--------------------+-----------------+
|       channel|            datetime|             message|         username|
+--------------+--------------------+--------------------+-----------------+
|#trainwreckstv|2022-05-27T16:12:...|https://twitter.c...|       streamlabs|
| #easportsfifa|2022-05-27T16:12:...|          Serie A <3|         kishod25|
|    #loltyler1|2022-05-27T16:12:...|               !opgg|         zorkio92|
| #easportsfifa|2022-05-27T16:12:...|argentineans dont...|        dezorisfc|
|    #loltyler1|2022-05-27T16:12:...|@zorkio92 https:/...|         fossabot|
|#trainwreckstv|2022-05-27T16:12:...|this game blows  ...|       drewzus520|
|    #loltyler1|2022-05-27T16:12:...|           PotFriend|     rand0mguy360|
|    #loltyler1|2022-05-27T16:12:...|           PartyTime|falloutpipboy3000|
|    #loltyler1|2022-05-27T16:12:...|I want to squeeze...|         gaoferry|
|    #loltyler1|2022-05-27T16:12:...|PotFriend PotFrie...|      tructrucker|

22/05/27 18:12:14 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:14 WARN BlockManager: Block input-0-1653667934400 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:15 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:15 WARN BlockManager: Block input-0-1653667935400 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:16 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:16 WARN BlockManager: Block input-0-1653667936400 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:17 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:17 WARN BlockManager: Block input-0-1653667937400 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:18 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:18 WARN BlockManager: Block input-0-1653667938400 replicated to

F1 Score for #loltyler1: 0.7272727272727274
F1 Score for #easportsfifa: 0.6666666666666666
F1 Score for #trainwreckstv: 0.4000000000000001

Accuracy: 0.6363636363636364
F1 Score: 0.6418732782369148
Weighted Precision: 0.707070707070707
Weighted Recall: 0.6363636363636364


22/05/27 18:12:20 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:20 WARN BlockManager: Block input-0-1653667940400 replicated to only 0 peer(s) instead of 1 peers


+--------------+--------------------+-----+----------+
|       channel|             message|label|prediction|
+--------------+--------------------+-----+----------+
|#trainwreckstv|https://twitter.c...|  2.0|       2.0|
| #easportsfifa|          Serie A <3|  1.0|       1.0|
|    #loltyler1|               !opgg|  0.0|       0.0|
| #easportsfifa|argentineans dont...|  1.0|       1.0|
|    #loltyler1|@zorkio92 https:/...|  0.0|       0.0|
|#trainwreckstv|this game blows  ...|  2.0|       1.0|
|    #loltyler1|           PotFriend|  0.0|       0.0|
|    #loltyler1|           PartyTime|  0.0|       1.0|
|    #loltyler1|I want to squeeze...|  0.0|       2.0|
|    #loltyler1|PotFriend PotFrie...|  0.0|       0.0|
|#trainwreckstv|      baow baow baow|  2.0|       1.0|
|    #loltyler1|@tarzanjesus1 Pot...|  0.0|       0.0|
|    #loltyler1|Hardstuck streame...|  0.0|       2.0|
|    #loltyler1|FeelsAmazingMan J...|  0.0|       1.0|
| #easportsfifa|servidores en Sur...|  1.0|       1.0|
| #easport

22/05/27 18:12:21 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:21 WARN BlockManager: Block input-0-1653667941400 replicated to only 0 peer(s) instead of 1 peers


F1 Score for #loltyler1: 0.7272727272727272
F1 Score for #easportsfifa: 0.9090909090909091
F1 Score for #trainwreckstv: 0.8

Accuracy: 0.8421052631578947
F1 Score: 0.8440191387559809
Weighted Precision: 0.8596491228070174
Weighted Recall: 0.8421052631578947
+--------------+--------------------+-----+----------+
|       channel|             message|label|prediction|
+--------------+--------------------+-----+----------+
|    #loltyler1|@pizzaeater434, A...|  0.0|       0.0|
| #easportsfifa|           GlitchLit|  1.0|       1.0|
|#trainwreckstv|something needs t...|  2.0|       2.0|
| #easportsfifa|@bothunter25 baib...|  1.0|       1.0|
|    #loltyler1|BatChest I LOOOVE...|  0.0|       0.0|
|#trainwreckstv|    @Big_DV OMEGALUL|  2.0|       0.0|
| #easportsfifa|        eafifaLetsGo|  1.0|       1.0|
| #easportsfifa|DALE CAMPEÓN! DAL...|  1.0|       1.0|
| #easportsfifa|       final argenta|  1.0|       1.0|
|#trainwreckstv|squadPains squadP...|  2.0|       2.0|
|    #loltyler1|      @gaof

22/05/27 18:12:22 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:22 WARN BlockManager: Block input-0-1653667942400 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:23 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:23 WARN BlockManager: Block input-0-1653667943400 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:23 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:23 WARN BlockManager: Block input-0-1653667943600 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:24 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:24 WARN BlockManager: Block input-0-1653667944400 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:24 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:24 WARN BlockManager: Block input-0-1653667944600 replicated to

+--------------+--------------------+--------------------+----------------+
|       channel|            datetime|             message|        username|
+--------------+--------------------+--------------------+----------------+
|    #loltyler1|2022-05-27T16:12:...|         ???????????|       nikla1997|
|    #loltyler1|2022-05-27T16:12:...|      howd that hit?|       goofyguyy|
| #easportsfifa|2022-05-27T16:12:...|           lets gooo|          trenk9|
|#trainwreckstv|2022-05-27T16:12:...|I still don’t und...|      sinofino86|
| #easportsfifa|2022-05-27T16:12:...|           rip tekkz|          nxifra|
| #easportsfifa|2022-05-27T16:12:...|  y chile? y los br?|        el_estii|
| #easportsfifa|2022-05-27T16:12:...|NEW TIMES NEW PLA...|          toze10|
| #easportsfifa|2022-05-27T16:12:...|     amigo que final|      joacosky10|
| #easportsfifa|2022-05-27T16:12:...|               hello|zextore_fgsswaps|
|    #loltyler1|2022-05-27T16:12:...|           Lollipop!|     tom_cat1950|
|    #loltyl

22/05/27 18:12:30 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:30 WARN BlockManager: Block input-0-1653667950600 replicated to only 0 peer(s) instead of 1 peers


F1 Score for #loltyler1: 0.7936507936507936
F1 Score for #easportsfifa: 0.7027027027027027
F1 Score for #trainwreckstv: 0.5

Accuracy: 0.7407407407407407
F1 Score: 0.7398986287875177
Weighted Precision: 0.7396476337448559
Weighted Recall: 0.7407407407407407
+--------------+--------------------+-----+----------+
|       channel|             message|label|prediction|
+--------------+--------------------+-----+----------+
|    #loltyler1|         ???????????|  0.0|       1.0|
|    #loltyler1|      howd that hit?|  0.0|       2.0|
| #easportsfifa|           lets gooo|  1.0|       2.0|
|#trainwreckstv|I still don’t und...|  2.0|       0.0|
| #easportsfifa|           rip tekkz|  1.0|       1.0|
| #easportsfifa|  y chile? y los br?|  1.0|       1.0|
| #easportsfifa|NEW TIMES NEW PLA...|  1.0|       0.0|
| #easportsfifa|     amigo que final|  1.0|       1.0|
| #easportsfifa|               hello|  1.0|       0.0|
|    #loltyler1|           Lollipop!|  0.0|       1.0|
|    #loltyler1|!challenger

22/05/27 18:12:31 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:31 WARN BlockManager: Block input-0-1653667951400 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:31 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:31 WARN BlockManager: Block input-0-1653667951600 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:32 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:32 WARN BlockManager: Block input-0-1653667952400 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:32 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:32 WARN BlockManager: Block input-0-1653667952600 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:33 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:33 WARN BlockManager: Block input-0-1653667953400 replicated to

+--------------+--------------------+--------------------+--------------------+
|       channel|            datetime|             message|            username|
+--------------+--------------------+--------------------+--------------------+
|    #loltyler1|2022-05-27T16:12:...|              SO BAD|              fleacr|
|    #loltyler1|2022-05-27T16:12:...|               ?????|            au_kappa|
|#trainwreckstv|2022-05-27T16:12:...|                yes?|           threestax|
|    #loltyler1|2022-05-27T16:12:...|                  ff|             lloneki|
| #easportsfifa|2022-05-27T16:12:...|🇦🇷🇦🇷🇦🇷🇦🇷🇦🇷|         dubsgab2045|
|    #loltyler1|2022-05-27T16:12:...|            OMEGALUL|           nl_jewfro|
|    #loltyler1|2022-05-27T16:12:...|      kr jhin monkaS|             foker01|
|    #loltyler1|2022-05-27T16:12:...|           flash 4th|           based_one|
| #easportsfifa|2022-05-27T16:12:...|                  hi|             dav1027|
|    #loltyler1|2022-05-27T16:12:...|             

22/05/27 18:12:40 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:40 WARN BlockManager: Block input-0-1653667960400 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:40 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:40 WARN BlockManager: Block input-0-1653667960600 replicated to only 0 peer(s) instead of 1 peers


F1 Score for #loltyler1: 0.8749999999999999
F1 Score for #easportsfifa: 0.7407407407407406
F1 Score for #trainwreckstv: 0.28571428571428575

Accuracy: 0.7959183673469388
F1 Score: 0.7967552100205161
Weighted Precision: 0.8151360544217687
Weighted Recall: 0.7959183673469388
+--------------+--------------------+-----+----------+
|       channel|             message|label|prediction|
+--------------+--------------------+-----+----------+
|    #loltyler1|              SO BAD|  0.0|       0.0|
|    #loltyler1|               ?????|  0.0|       1.0|
|#trainwreckstv|                yes?|  2.0|       1.0|
|    #loltyler1|                  ff|  0.0|       0.0|
| #easportsfifa|🇦🇷🇦🇷🇦🇷🇦🇷🇦🇷|  1.0|       1.0|
|    #loltyler1|            OMEGALUL|  0.0|       0.0|
|    #loltyler1|      kr jhin monkaS|  0.0|       0.0|
|    #loltyler1|           flash 4th|  0.0|       0.0|
| #easportsfifa|                  hi|  1.0|       1.0|
|    #loltyler1|              KR RDY|  0.0|       0.0|
|    #loltyler1|     

22/05/27 18:12:41 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:41 WARN BlockManager: Block input-0-1653667961400 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:41 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:41 WARN BlockManager: Block input-0-1653667961600 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:42 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:42 WARN BlockManager: Block input-0-1653667962400 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:42 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:42 WARN BlockManager: Block input-0-1653667962600 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:43 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:43 WARN BlockManager: Block input-0-1653667963600 replicated to

+--------------+--------------------+--------------------+-------------------+
|       channel|            datetime|             message|           username|
+--------------+--------------------+--------------------+-------------------+
|    #loltyler1|2022-05-27T16:12:...|               !rank|        jimboslic33|
| #easportsfifa|2022-05-27T16:12:...|LO DIJO BIEN CLAR...|            sebam75|
|    #loltyler1|2022-05-27T16:12:...|        time to roam|          falls2010|
|    #loltyler1|2022-05-27T16:12:...|  Shot like dogs LUL|            grimeto|
|#trainwreckstv|2022-05-27T16:12:...|What is the max w...|     thedarknighttt|
|    #loltyler1|2022-05-27T16:12:...|lmao what was tha...|        bunnyzqueen|
|    #loltyler1|2022-05-27T16:12:...|               sez u|pepperspraylaughing|
|    #loltyler1|2022-05-27T16:12:...|            GET GOOD|          offens1v3|
| #easportsfifa|2022-05-27T16:12:...|        eafifaLetsGo|           josetej1|
|    #loltyler1|2022-05-27T16:12:...|        JESUS C

22/05/27 18:12:50 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:50 WARN BlockManager: Block input-0-1653667970600 replicated to only 0 peer(s) instead of 1 peers


F1 Score for #loltyler1: 0.816326530612245
F1 Score for #easportsfifa: 0.6153846153846153
F1 Score for #trainwreckstv: 0.5333333333333333

Accuracy: 0.7111111111111111
F1 Score: 0.7106087563230421
Weighted Precision: 0.7266666666666667
Weighted Recall: 0.711111111111111
+--------------+--------------------+-----+----------+
|       channel|             message|label|prediction|
+--------------+--------------------+-----+----------+
|    #loltyler1|               !rank|  0.0|       0.0|
| #easportsfifa|LO DIJO BIEN CLAR...|  1.0|       2.0|
|    #loltyler1|        time to roam|  0.0|       0.0|
|    #loltyler1|  Shot like dogs LUL|  0.0|       0.0|
|#trainwreckstv|What is the max w...|  2.0|       1.0|
|    #loltyler1|lmao what was tha...|  0.0|       0.0|
|    #loltyler1|               sez u|  0.0|       0.0|
|    #loltyler1|            GET GOOD|  0.0|       1.0|
| #easportsfifa|        eafifaLetsGo|  1.0|       1.0|
|    #loltyler1|        JESUS CHRIST|  0.0|       0.0|
|    #loltyler

22/05/27 18:12:51 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:51 WARN BlockManager: Block input-0-1653667971600 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:52 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:52 WARN BlockManager: Block input-0-1653667972600 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:53 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:53 WARN BlockManager: Block input-0-1653667973600 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:54 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:54 WARN BlockManager: Block input-0-1653667974600 replicated to only 0 peer(s) instead of 1 peers
22/05/27 18:12:55 WARN RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
22/05/27 18:12:55 WARN BlockManager: Block input-0-1653667975600 replicated to

In [11]:
ssc_t.stop()

----- Stopping... this may take a few seconds -----


22/05/27 18:14:08 WARN StreamingContext: StreamingContext has already been stopped
