# LISTEN FOR TOPIC ML MODEL 2 , AND PUT INTO CASANDRA 

In [16]:
from pyspark.sql import SparkSession

# Spark session & context
spark = (SparkSession
         .builder
         .master('local')
         .appName('json-changes-event-consumer')
         # Add kafka package
         .config("spark.jars.packages", "org.apache.spark:spark-sql-kafka-0-10_2.12:3.1.2,mysql:mysql-connector-java:8.0.11")
         .getOrCreate())

sc = spark.sparkContext

In [17]:
# Create stream dataframe setting kafka server, topic and offset option
dfm = (spark
  .readStream
  .format("kafka")
  .option("kafka.bootstrap.servers", "185.185.126.143:9092") # kafka server
  .option("subscribe", "ML_Topic2") # topic name matching the producer 
  .option("startingOffsets", "latest") # start from beginning select  "latest" or earliest
  .load())

In [18]:
from pyspark.sql.types import StringType

# Convert binary to string key and value
dfm1 = (dfm
    .withColumn("key", dfm["key"].cast(StringType()))
    .withColumn("value", dfm["value"].cast(StringType())))

In [26]:
#Comma seprated is dangerous because text column has many commas. 
dfm2=dfm1\
      .selectExpr("split(value,',')[0] as probability" \
                  ,"split(value,',')[1] as result" \
                  ,"split(value,',')[2] as review_id" \
                  ,"split(value,',')[3] as text") 

from pyspark.sql.functions import *
from pyspark.sql.types import *

      # Create stream dataframe setting kafka server, topic and offset option

schema = StructType() \
        .add("probability", FloatType()) \
        .add("result",StringType()) \
        .add("review_id", StringType()) \
        .add("text", StringType()) 

In [28]:
dfm2.writeStream.outputMode("append") \
            .format("console") \
            .start()

21/07/31 09:33:11 WARN StreamingQueryManager: Temporary checkpoint location created which is deleted normally when the query didn't fail: /tmp/temporary-dfb2edef-a7c2-41fd-8212-527bc92630ae. If it's required to delete it under any circumstances, please set spark.sql.streaming.forceDeleteTempCheckpointLocation to true. Important to know deleting temp checkpoint folder is best effort.


<pyspark.sql.streaming.StreamingQuery at 0x7f61a0eb0ca0>

-------------------------------------------
Batch: 0
-------------------------------------------
+-----------+------+---------+----+
|probability|result|review_id|text|
+-----------+------+---------+----+
+-----------+------+---------+----+



                                                                                

-------------------------------------------
Batch: 1
-------------------------------------------
+-----------+------+--------------------+--------------------+
|probability|result|           review_id|                text|
+-----------+------+--------------------+--------------------+
| "0.9357223|Reject|\"hlG91YhkxoXTOAc...|Been 10 years sin...|
+-----------+------+--------------------+--------------------+



[Stage 2:>                                                          (0 + 1) / 1]                                                                                

-------------------------------------------
Batch: 2
-------------------------------------------
+-----------+--------+--------------------+--------------------+
|probability|  result|           review_id|                text|
+-----------+--------+--------------------+--------------------+
|"0.25759214|Accepted|\"xeDy8P98DQZ-GG3...|We used the hotel...|
+-----------+--------+--------------------+--------------------+



[Stage 3:>                                                          (0 + 1) / 1]                                                                                

-------------------------------------------
Batch: 3
-------------------------------------------
+-----------+-------+--------------------+--------------------+
|probability| result|           review_id|                text|
+-----------+-------+--------------------+--------------------+
|"0.55590594|Neutral|\"4rpNAsgSPpXSr08...|My first experien...|
+-----------+-------+--------------------+--------------------+

-------------------------------------------
Batch: 4
-------------------------------------------
+-----------+-------+--------------------+--------------------+
|probability| result|           review_id|                text|
+-----------+-------+--------------------+--------------------+
|"0.38104686|Neutral|\"DK1khRMR3V64JkO...|Wow!! Just Wow! F...|
+-----------+-------+--------------------+--------------------+



# Alert assesment of scores>80% 

In [29]:
queryStreamMem4 = (dfm2
 .writeStream
 .format("memory")
 .queryName("scored") #name for this query
 .outputMode("update")
 .start())

21/07/31 09:33:55 WARN StreamingQueryManager: Temporary checkpoint location created which is deleted normally when the query didn't fail: /tmp/temporary-60e58142-d544-4b86-8a1e-a2edf7408a26. If it's required to delete it under any circumstances, please set spark.sql.streaming.forceDeleteTempCheckpointLocation to true. Important to know deleting temp checkpoint folder is best effort.


In [30]:
from time import sleep
from IPython.display import clear_output

# Count rows every 5 seconds while stream is active
try:
    i=1
    # While stream is active, print count
    while len(spark.streams.active) > 0:
        
        # Clear output
        clear_output(wait=True)
        print("Run:{}".format(i))
        
        lst_queries = []
        for s in spark.streams.active:
            lst_queries.append(s.name)

        # Verify if wiki_changes_count query is active before count
        if "scored" in lst_queries:
            # Count number of events
             #spark.sql("select count(1) as Total_Posts from base").show()
            spark.sql("select * from scored where result='Reject'").show()
        else:
            print("'scored' query not found.")

        sleep(2)      #Report every 10 seconds 
        i=i+1
        
except KeyboardInterrupt:
    # Stop Query Stream
    queryStreamMem.stop()
    
    print("stream process interrupted")

Run:76
+-----------+------+--------------------+--------------------+
|probability|result|           review_id|                text|
+-----------+------+--------------------+--------------------+
|"0.79686004|Reject|\"ICPWU8YKPL16rP5...|Illegal Pete's se...|
| "0.7108054|Reject|\"3kA6TqOHRUYN5GC...|My kinda Chinese ...|
|"0.79834443|Reject|\"Kd9NI0co5N9cdzG...|Oh boy where do I...|
| "0.8346453|Reject|\"bD2yeQeY7VUiGnL...|Haven't read any ...|
+-----------+------+--------------------+--------------------+



NameError: name 'queryStreamMem' is not defined

# Save all results data set TO Parque file every x minutes 

In [31]:
raw_path = "/home/jovyan/work/Data/detail"
checkpoint_path = "/home/jovyan/work/Data/cp"


queryStream =(
     dfm2.repartition(1).writeStream.partitionBy('result')  \
    .trigger(processingTime='60 seconds') \
    .format("parquet") \
    .queryName("base3") \
    .option("checkpointLocation", checkpoint_path)\
    .option("path", raw_path)\
    .outputMode("append") \
    .start())

-------------------------------------------
Batch: 21
-------------------------------------------
+-----------+--------+--------------------+---------------+
|probability|  result|           review_id|           text|
+-----------+--------+--------------------+---------------+
|"0.08196679|Accepted|\"kOtFIhRknMN1dh3...|Incredible food|
+-----------+--------+--------------------+---------------+



[Stage 115:>                (0 + 1) / 1][Stage 116:>                (0 + 0) / 1]                                                                                

-------------------------------------------
Batch: 22
-------------------------------------------
+-----------+-------+--------------------+--------------------+
|probability| result|           review_id|                text|
+-----------+-------+--------------------+--------------------+
|"0.50769204|Neutral|\"t-fbCqJcgYFhc-a...|I didn't have any...|
+-----------+-------+--------------------+--------------------+



[Stage 117:>                                                        (0 + 1) / 1]                                                                                

-------------------------------------------
Batch: 23
-------------------------------------------
+-----------+--------+--------------------+--------------------+
|probability|  result|           review_id|                text|
+-----------+--------+--------------------+--------------------+
| "0.2765059|Accepted|\"_cgjf32-2Fci6vg...|Absolutely amazin...|
+-----------+--------+--------------------+--------------------+



                                                                                

-------------------------------------------
Batch: 24
-------------------------------------------
+-------------+--------+--------------------+--------------------+
|  probability|  result|           review_id|                text|
+-------------+--------+--------------------+--------------------+
|"0.0016444623|Accepted|\"4I_ozOq29kcVBPC...|Always amazing fo...|
+-------------+--------+--------------------+--------------------+



[Stage 123:>                                                        (0 + 1) / 1]                                                                                

-------------------------------------------
Batch: 25
-------------------------------------------
+-----------+------+--------------------+--------------------+
|probability|result|           review_id|                text|
+-----------+------+--------------------+--------------------+
|"0.75760573|Reject|\"Cnj64c2R20y3OC4...|I ordered the veg...|
+-----------+------+--------------------+--------------------+

-------------------------------------------
Batch: 26
-------------------------------------------
+-----------+------+--------------------+--------------------+
|probability|result|           review_id|                text|
+-----------+------+--------------------+--------------------+
|"0.81430745|Reject|\"6d48MQOGMAim4oc...|This was Ruth Chr...|
+-----------+------+--------------------+--------------------+



[Stage 127:>                                                        (0 + 1) / 1]                                                                                

-------------------------------------------
Batch: 27
-------------------------------------------
+-----------+------+--------------------+--------------------+
|probability|result|           review_id|                text|
+-----------+------+--------------------+--------------------+
|"0.75733125|Reject|\"hxWZDD6XNK-qzrl...|Just had 1 junior...|
+-----------+------+--------------------+--------------------+



[Stage 129:>                                                        (0 + 1) / 1]                                                                                

-------------------------------------------
Batch: 28
-------------------------------------------
+-----------+------+--------------------+--------------------+
|probability|result|           review_id|                text|
+-----------+------+--------------------+--------------------+
|"0.80250967|Reject|\"ZxT8BR7jo3O5iX7...|I have been a pat...|
+-----------+------+--------------------+--------------------+

-------------------------------------------
Batch: 29
-------------------------------------------
+-----------+-------+--------------------+--------------------+
|probability| result|           review_id|                text|
+-----------+-------+--------------------+--------------------+
| "0.4673072|Neutral|\"TRu2bJUMQrIkmi-...|This is probably ...|
+-----------+-------+--------------------+--------------------+

-------------------------------------------
Batch: 30
-------------------------------------------
+-----------+--------+--------------------+--------------------+
|pro

                                                                                

-------------------------------------------
Batch: 41
-------------------------------------------
+------------+--------+--------------------+--------------------+
| probability|  result|           review_id|                text|
+------------+--------+--------------------+--------------------+
|"0.007544458|Accepted|\"ICc2rQn6yJERf2X...|We were served by...|
+------------+--------+--------------------+--------------------+

-------------------------------------------
Batch: 42
-------------------------------------------
+------------+--------+--------------------+--------------------+
| probability|  result|           review_id|                text|
+------------+--------+--------------------+--------------------+
|"0.009181529|Accepted|\"3M8FJ8r_zQpV8AX...|Couldn't be more ...|
+------------+--------+--------------------+--------------------+

-------------------------------------------
Batch: 43
-------------------------------------------
+-----------+-------+--------------------+--

[Stage 168:>                                                        (0 + 1) / 1]                                                                                

-------------------------------------------
Batch: 44
-------------------------------------------
+-----------+-------+--------------------+--------------------+
|probability| result|           review_id|                text|
+-----------+-------+--------------------+--------------------+
| "0.4818336|Neutral|\"7tJucigzQKzp0EH...|Tightly packed wi...|
+-----------+-------+--------------------+--------------------+

-------------------------------------------
Batch: 45
-------------------------------------------
+-----------+------+--------------------+--------------------+
|probability|result|           review_id|                text|
+-----------+------+--------------------+--------------------+
| "0.8743521|Reject|\"7yCtkfWIJN2jY0c...|Grilled ham and c...|
+-----------+------+--------------------+--------------------+

-------------------------------------------
Batch: 46
-------------------------------------------
+-----------+-------+--------------------+--------------------+
|prob

[Stage 175:>                (0 + 1) / 1][Stage 176:>                (0 + 0) / 1]                                                                                

-------------------------------------------
Batch: 47
-------------------------------------------
+-----------+--------+--------------------+--------------------+
|probability|  result|           review_id|                text|
+-----------+--------+--------------------+--------------------+
|"0.16897982|Accepted|\"lM4h9lqOibBr8tQ...|Great place to ea...|
+-----------+--------+--------------------+--------------------+

-------------------------------------------
Batch: 48
-------------------------------------------
+-----------+-------+--------------------+--------------------+
|probability| result|           review_id|                text|
+-----------+-------+--------------------+--------------------+
|"0.59923935|Neutral|\"ROgVOBBNZY0-0R0...|Sometimes they ca...|
+-----------+-------+--------------------+--------------------+

-------------------------------------------
Batch: 49
-------------------------------------------
+-----------+-------+--------------------+-----------------

[Stage 194:>                                                        (0 + 1) / 1]                                                                                

-------------------------------------------
Batch: 55
-------------------------------------------
+-----------+------+--------------------+--------------------+
|probability|result|           review_id|                text|
+-----------+------+--------------------+--------------------+
|"0.72730446|Reject|\"psi4EvCpgrrG1ti...|I have always enj...|
+-----------+------+--------------------+--------------------+

-------------------------------------------
Batch: 56
-------------------------------------------
+-----------+------+--------------------+--------------------+
|probability|result|           review_id|                text|
+-----------+------+--------------------+--------------------+
| "0.8831718|Reject|\"24YP590WONuv9eC...|Located in the be...|
+-----------+------+--------------------+--------------------+

-------------------------------------------
Batch: 57
-------------------------------------------
+-----------+-------+--------------------+--------------------+
|probabili

[Stage 198:>                                                        (0 + 1) / 1]                                                                                

-------------------------------------------
Batch: 58
-------------------------------------------
+-----------+------+--------------------+--------------------+
|probability|result|           review_id|                text|
+-----------+------+--------------------+--------------------+
|"0.90087044|Reject|\"-EYvwzkPmvKPeQe...|Staffs are helpfu...|
+-----------+------+--------------------+--------------------+





-------------------------------------------
Batch: 59
-------------------------------------------
+-----------+--------+--------------------+--------------------+
|probability|  result|           review_id|                text|
+-----------+--------+--------------------+--------------------+
|"0.23004344|Accepted|\"8jz0PemjkcfrY13...|Since I moved to ...|
+-----------+--------+--------------------+--------------------+

-------------------------------------------
Batch: 60
-------------------------------------------
+------------+--------+--------------------+--------------------+
| probability|  result|           review_id|                text|
+------------+--------+--------------------+--------------------+
|"0.025977343|Accepted|\"G9xPreJj-JV3jFN...|I love this resta...|
+------------+--------+--------------------+--------------------+

-------------------------------------------
Batch: 61
-------------------------------------------
+-----------+-------+--------------------+-------

[Stage 208:>                                                        (0 + 1) / 1]                                                                                

-------------------------------------------
Batch: 62
-------------------------------------------
+-----------+--------+--------------------+--------------------+
|probability|  result|           review_id|                text|
+-----------+--------+--------------------+--------------------+
|"0.24846756|Accepted|\"vQy9poboFxj3skR...|El Gaucho is an u...|
+-----------+--------+--------------------+--------------------+

-------------------------------------------
Batch: 63
-------------------------------------------
+-----------+--------+--------------------+--------------------+
|probability|  result|           review_id|                text|
+-----------+--------+--------------------+--------------------+
|  "0.137303|Accepted|\"bOM1brhLzx378WQ...|Definitely not th...|
+-----------+--------+--------------------+--------------------+



[Stage 213:>                                                        (0 + 1) / 1]                                                                                

-------------------------------------------
Batch: 64
-------------------------------------------
+-----------+------+--------------------+--------------------+
|probability|result|           review_id|                text|
+-----------+------+--------------------+--------------------+
| "0.9703341|Reject|\"LKmDNYg-uxOX4-L...|Jessi and her tea...|
+-----------+------+--------------------+--------------------+

-------------------------------------------
Batch: 65
-------------------------------------------
+-----------+-------+--------------------+--------------------+
|probability| result|           review_id|                text|
+-----------+-------+--------------------+--------------------+
|"0.33575505|Neutral|\"30H7dyMcw3kyqDe...|My favorite Mexic...|
+-----------+-------+--------------------+--------------------+

-------------------------------------------
Batch: 66
-------------------------------------------
+-----------+-------+--------------------+--------------------+
|prob

[Stage 245:>                (0 + 1) / 1][Stage 246:>                (0 + 0) / 1]                                                                                

-------------------------------------------
Batch: 77
-------------------------------------------
+-----------+-------+--------------------+--------------------+
|probability| result|           review_id|                text|
+-----------+-------+--------------------+--------------------+
| "0.6052824|Neutral|\"RyCheLcdHk9ZDEl...|I had a great tim...|
+-----------+-------+--------------------+--------------------+

-------------------------------------------
Batch: 78
-------------------------------------------
+-----------+-------+--------------------+--------------------+
|probability| result|           review_id|                text|
+-----------+-------+--------------------+--------------------+
|"0.51152563|Neutral|\"OLbfUmfwSPzRLBP...|This is my favori...|
+-----------+-------+--------------------+--------------------+

-------------------------------------------
Batch: 79
-------------------------------------------
+-----------+------+--------------------+--------------------+
|

[Stage 251:>                (0 + 1) / 1][Stage 252:>                (0 + 0) / 1]                                                                                

-------------------------------------------
Batch: 80
-------------------------------------------
+-----------+-------+--------------------+--------------------+
|probability| result|           review_id|                text|
+-----------+-------+--------------------+--------------------+
| "0.5904136|Neutral|\"9nn57hvSrRK-4lT...|Best Thai in midt...|
+-----------+-------+--------------------+--------------------+

-------------------------------------------
Batch: 81
-------------------------------------------
+-----------+------+--------------------+--------------------+
|probability|result|           review_id|                text|
+-----------+------+--------------------+--------------------+
| "0.9031127|Reject|\"snU4XtuvHyAv4pD...|Excellent food. I...|
+-----------+------+--------------------+--------------------+

-------------------------------------------
Batch: 82
-------------------------------------------
+-----------+------+--------------------+--------------------+
|proba

[Stage 259:>                (0 + 1) / 1][Stage 260:>                (0 + 0) / 1]                                                                                

-------------------------------------------
Batch: 83
-------------------------------------------
+-----------+--------+--------------------+--------------------+
|probability|  result|           review_id|                text|
+-----------+--------+--------------------+--------------------+
|"0.08058247|Accepted|\"P-eERE80M-PRvsd...|Very good environ...|
+-----------+--------+--------------------+--------------------+

-------------------------------------------
Batch: 84
-------------------------------------------
+-----------+-------+--------------------+--------------------+
|probability| result|           review_id|                text|
+-----------+-------+--------------------+--------------------+
|"0.43452233|Neutral|\"1MaNxv3Kfxlui2a...|I had high hopes ...|
+-----------+-------+--------------------+--------------------+

-------------------------------------------
Batch: 85
-------------------------------------------
+-----------+------+--------------------+------------------