In [1]:
!pip install neo4j

Collecting neo4j
  Downloading neo4j-4.4.1.tar.gz (89 kB)
[K     |████████████████████████████████| 89 kB 3.1 MB/s eta 0:00:011
Building wheels for collected packages: neo4j
  Building wheel for neo4j (setup.py) ... [?25ldone
[?25h  Created wheel for neo4j: filename=neo4j-4.4.1-py3-none-any.whl size=114759 sha256=3481cf45e797f450dd94034b25ce4fcae5b92a00bf983e94cc28078dfd56357a
  Stored in directory: /home/jovyan/.cache/pip/wheels/1a/38/4b/0876d24f853fdfe40b2440c8c03332ec2d7f1f88b2446dc694
Successfully built neo4j
Installing collected packages: neo4j
Successfully installed neo4j-4.4.1


In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, TimestampType, DoubleType
from pyspark.sql.functions import from_json
from pyspark.sql.functions import from_csv
from pyspark.sql.functions import *
from pyspark.sql.window import Window
from neo4j import GraphDatabase

In [3]:
# Spark session & context
def init():
    spark = (SparkSession
         .builder
         .master('local')
         .appName('AndMalware-consumer')
         # Add kafka package  
         .config("spark.jars.packages", "org.apache.spark:spark-sql-kafka-0-10_2.12:3.1.1")\
         .getOrCreate())
    return spark

In [4]:
sc = init()
# Create stream dataframe setting kafka server, topic and offset option
def getReadStream(spark):
    df = spark \
      .readStream \
      .format("kafka") \
      .option("kafka.bootstrap.servers", "kafka-server:9092") \
      .option("startingOffsets", "earliest") \
      .option("subscribe", "AndMalwer") \
      .load()
    
    #df.selectExpr("CAST(value AS STRING)", "timestamp")
    dff = (df
    .withColumn("key", df["key"].cast(StringType()))
    .withColumn("value", df["value"].cast(StringType())))
    return dff


In [5]:
def foreach_batch_function(df, epoch_id):
    # Transform and write batchDF
    df.write\
    .format("org.neo4j.spark.DataSource")\
    .mode("Append")\
    .option("url", "bolt://neo4j:7687")\
    .option("authentication.type", "basic")\
    .option("authentication.basic.username", "neo4j")\
    .option("authentication.basic.password", "neo")\
    .option("query", "CREATE (n:Malware {DIP: event.Destination_IP,maxFD: event.MaxFD,minFD: event.MinFD,sumFD: event.SumFD,meanFD: event.MeanFD,stddevFD: event.StddevFD,maxTFWD: event.MaxTFWD,minTFWD: event.MinTFWD,sumTFWD: event.SumTFWD,meanTFWD: event.MeanTFWD,stddevTFWD: event.StddevTFWD,maxTBWD: event.MaxTBWD,minTBWD: event.MinTBWD,sumTBWD: event.SumTBWD,meanTBWD: event.MeanTBWD,stddevTBWD: event.StddevTBWD,maxFB: event.MaxFB,minFB: event.MinFB,sumFB: event.SumFB,meanFB: event.MeanFB,stddevFB: event.StddevFB,maxFP: event.MaxFB,minFP: event.MinFP,sumFP: event.SumFP,meanFP: event.MeanFP,stddevFP: event.StddevFP})")\
    .save()
    pass

In [6]:
#Geting readsteram
df1 = getReadStream(sc)

#Parsing datta from value string.

df2 = df1.selectExpr("split(value,',')[0] as Source_IP" \
                 ,"split(value,',')[1] as Source_Port" \
                 ,"split(value,',')[2] as Destination_IP" \
                ,"split(value,',')[3] as Destination_Port" \
                ,"split(value,',')[4] as Timestamp" \
                ,"split(value,',')[5] as Flow_Duration" \
                ,"split(value,',')[6] as Total_Fwd_Packets" \
                ,"split(value,',')[7] as Total_Bwd_Packets" \
                ,"split(value,',')[8] as Total_Length_of_Fwd_Packets" \
                ,"split(value,',')[9] as Total_Length_of_Bwd_Packets" \
                ,"split(value,',')[10] as Flow_Bytess" \
                ,"split(value,',')[11] as Flow_Packetss" \
                    )


#Formating data.

df3 = df2.withColumn("Source_Port", df2["Source_Port"].cast(IntegerType()))\
        .withColumn("Destination_Port", df2["Destination_Port"].cast(IntegerType()))\
        .withColumn("Destination_IP", df2["Destination_IP"].alias("DIP"))\
        .withColumn("Timestamp", df2["Timestamp"].cast(TimestampType()))\
        .withColumn("Flow_Duration", df2["Flow_Duration"].cast(IntegerType()))\
        .withColumn("Total_Fwd_Packets", df2["Total_Fwd_Packets"].cast(IntegerType()))\
        .withColumn("Total_Bwd_Packets", df2["Total_Bwd_Packets"].cast(IntegerType()))\
        .withColumn("Total_Length_of_Fwd_Packets", df2["Total_Length_of_Fwd_Packets"].cast(IntegerType()))\
        .withColumn("Total_Length_of_Bwd_Packets", df2["Total_Length_of_Bwd_Packets"].cast(IntegerType()))\
        .withColumn("Flow_Bytess", df2["Flow_Bytess"].cast(IntegerType()))\
        .withColumn("Flow_Packetss", df2["Flow_Packetss"].cast(DoubleType()))

#Creating a window duration 10 minutes.

wind = window(df3.Timestamp,"10 minutes")

wdf = df3.groupBy(wind,col("Destination_IP")).agg(sum(col("Flow_Duration")).alias("SumFD")\
                                                         ,max(col("Flow_Duration")).alias("MaxFD")\
                                                         ,min(col("Flow_Duration")).alias("MinFD")\
                                                         ,mean(col("Flow_Duration")).alias("MeanFD")\
                                                         ,stddev(col("Flow_Duration")).alias("StddevFD")\
                                                         ,sum(col("Total_Fwd_Packets")).alias("SumTFWD")\
                                                         ,max(col("Total_Fwd_Packets")).alias("MaxTFWD")\
                                                         ,min(col("Total_Fwd_Packets")).alias("MinTFWD")\
                                                         ,mean(col("Total_Fwd_Packets")).alias("MeanTFWD")\
                                                         ,stddev(col("Total_Fwd_Packets")).alias("StddevTFWD")
                                                         ,sum(col("Total_Bwd_Packets")).alias("SumTBWD")\
                                                         ,max(col("Total_Bwd_Packets")).alias("MaxTBWD")\
                                                         ,min(col("Total_Bwd_Packets")).alias("MinTBWD")\
                                                         ,mean(col("Total_Bwd_Packets")).alias("MeanTBWD")\
                                                         ,stddev(col("Total_Bwd_Packets")).alias("StddevTBWD")
                                                         ,sum(col("Flow_Bytess")).alias("SumFB")\
                                                         ,max(col("Flow_Bytess")).alias("MaxFB")\
                                                         ,min(col("Flow_Bytess")).alias("MinFB")\
                                                         ,mean(col("Flow_Bytess")).alias("MeanFB")\
                                                         ,stddev(col("Flow_Bytess")).alias("StddevFB")
                                                         ,sum(col("Flow_Packetss")).alias("SumFP")\
                                                         ,max(col("Flow_Packetss")).alias("MaxFP")\
                                                         ,min(col("Flow_Packetss")).alias("MinFP")\
                                                         ,mean(col("Flow_Packetss")).alias("MeanFP")\
                                                         ,stddev(col("Flow_Packetss")).alias("StddevFP"))\
                                                        .where(col("Destination_IP") ==" 10.42.0.151")

#Write stream.
query = (wdf.writeStream\
        .foreachBatch(foreach_batch_function)\
        .outputMode('update')\
        .trigger(processingTime='3 seconds')\
        .start())

query.awaitTermination()


StreamingQueryException: An exception was raised by the Python Proxy. Return Message: Traceback (most recent call last):
  File "/usr/local/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 2442, in _call_proxy
    return_value = getattr(self.pool[obj_id], method)(*params)
  File "/usr/local/spark/python/pyspark/sql/utils.py", line 196, in call
    raise e
  File "/usr/local/spark/python/pyspark/sql/utils.py", line 193, in call
    self.func(DataFrame(jdf, self.sql_ctx), batch_id)
  File "<ipython-input-5-dda643ea7dcf>", line 3, in foreach_batch_function
    df.write\
  File "/usr/local/spark/python/pyspark/sql/readwriter.py", line 1107, in save
    self._jwrite.save()
  File "/usr/local/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1304, in __call__
    return_value = get_return_value(
  File "/usr/local/spark/python/pyspark/sql/utils.py", line 111, in deco
    return f(*a, **kw)
  File "/usr/local/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py", line 326, in get_return_value
    raise Py4JJavaError(
py4j.protocol.Py4JJavaError: An error occurred while calling o4143.save.
: org.apache.spark.SparkException: Writing job aborted.
	at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2(WriteToDataSourceV2Exec.scala:388)
	at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2$(WriteToDataSourceV2Exec.scala:336)
	at org.apache.spark.sql.execution.datasources.v2.AppendDataExec.writeWithV2(WriteToDataSourceV2Exec.scala:218)
	at org.apache.spark.sql.execution.datasources.v2.AppendDataExec.run(WriteToDataSourceV2Exec.scala:225)
	at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:40)
	at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:40)
	at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.doExecute(V2CommandExec.scala:55)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215)
	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176)
	at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:132)
	at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:131)
	at org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:989)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
	at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:989)
	at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:370)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:301)
	at jdk.internal.reflect.GeneratedMethodAccessor77.invoke(Unknown Source)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 861.0 failed 1 times, most recent failure: Lost task 0.0 in stage 861.0 (TID 86431) (bd3602febbde executor driver): org.neo4j.driver.exceptions.ServiceUnavailableException: Unable to connect to neo4j:7687, ensure the database is running and that there is a working network connection to it.
	at org.neo4j.driver.internal.util.Futures.blockingGet(Futures.java:143)
	at org.neo4j.driver.internal.InternalSession.beginTransaction(InternalSession.java:98)
	at org.neo4j.driver.internal.InternalSession.beginTransaction(InternalSession.java:92)
	at org.neo4j.spark.writer.BaseDataWriter.writeBatch(BaseDataWriter.scala:55)
	at org.neo4j.spark.writer.BaseDataWriter.writeBatch(BaseDataWriter.scala:93)
	at org.neo4j.spark.writer.BaseDataWriter.writeBatch(BaseDataWriter.scala:93)
	at org.neo4j.spark.writer.BaseDataWriter.writeBatch(BaseDataWriter.scala:93)
	at org.neo4j.spark.writer.BaseDataWriter.commit(BaseDataWriter.scala:124)
	at org.neo4j.spark.writer.Neo4jDataWriter.commit(Neo4jDataWriter.scala:9)
	at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.$anonfun$run$1(WriteToDataSourceV2Exec.scala:425)
	at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1473)
	at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.run(WriteToDataSourceV2Exec.scala:452)
	at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.$anonfun$writeWithV2$2(WriteToDataSourceV2Exec.scala:360)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	at java.base/java.lang.Thread.run(Thread.java:829)
	Suppressed: org.neo4j.driver.internal.util.ErrorUtil$InternalExceptionCause
		at org.neo4j.driver.internal.async.connection.ChannelConnectedListener.databaseUnavailableError(ChannelConnectedListener.java:76)
		at org.neo4j.driver.internal.async.connection.ChannelConnectedListener.operationComplete(ChannelConnectedListener.java:70)
		at org.neo4j.driver.internal.async.connection.ChannelConnectedListener.operationComplete(ChannelConnectedListener.java:37)
		at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:578)
		at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.DefaultPromise.notifyListeners0(DefaultPromise.java:571)
		at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:550)
		at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:491)
		at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.DefaultPromise.setValue0(DefaultPromise.java:616)
		at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.DefaultPromise.setFailure0(DefaultPromise.java:609)
		at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.DefaultPromise.tryFailure(DefaultPromise.java:117)
		at org.neo4j.driver.internal.shaded.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.fulfillConnectPromise(AbstractNioChannel.java:321)
		at org.neo4j.driver.internal.shaded.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:337)
		at org.neo4j.driver.internal.shaded.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:707)
		at org.neo4j.driver.internal.shaded.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:655)
		at org.neo4j.driver.internal.shaded.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:581)
		at org.neo4j.driver.internal.shaded.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493)
		at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:986)
		at org.neo4j.driver.internal.shaded.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
		at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
		... 1 more
Caused by: org.neo4j.driver.internal.shaded.io.netty.channel.AbstractChannel$AnnotatedConnectException: Connection refused: neo4j/172.20.0.6:7687
Caused by: java.net.ConnectException: Connection refused
	at java.base/sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
	at java.base/sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:777)
	at org.neo4j.driver.internal.shaded.io.netty.channel.socket.nio.NioSocketChannel.doFinishConnect(NioSocketChannel.java:330)
	at org.neo4j.driver.internal.shaded.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:334)
	at org.neo4j.driver.internal.shaded.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:707)
	at org.neo4j.driver.internal.shaded.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:655)
	at org.neo4j.driver.internal.shaded.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:581)
	at org.neo4j.driver.internal.shaded.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493)
	at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:986)
	at org.neo4j.driver.internal.shaded.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
	at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
	at java.base/java.lang.Thread.run(Thread.java:829)

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2253)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2202)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2201)
	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2201)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1078)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1078)
	at scala.Option.foreach(Option.scala:407)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1078)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2440)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2382)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2371)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:868)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2202)
	at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2(WriteToDataSourceV2Exec.scala:357)
	... 32 more
Caused by: org.neo4j.driver.exceptions.ServiceUnavailableException: Unable to connect to neo4j:7687, ensure the database is running and that there is a working network connection to it.
	at org.neo4j.driver.internal.util.Futures.blockingGet(Futures.java:143)
	at org.neo4j.driver.internal.InternalSession.beginTransaction(InternalSession.java:98)
	at org.neo4j.driver.internal.InternalSession.beginTransaction(InternalSession.java:92)
	at org.neo4j.spark.writer.BaseDataWriter.writeBatch(BaseDataWriter.scala:55)
	at org.neo4j.spark.writer.BaseDataWriter.writeBatch(BaseDataWriter.scala:93)
	at org.neo4j.spark.writer.BaseDataWriter.writeBatch(BaseDataWriter.scala:93)
	at org.neo4j.spark.writer.BaseDataWriter.writeBatch(BaseDataWriter.scala:93)
	at org.neo4j.spark.writer.BaseDataWriter.commit(BaseDataWriter.scala:124)
	at org.neo4j.spark.writer.Neo4jDataWriter.commit(Neo4jDataWriter.scala:9)
	at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.$anonfun$run$1(WriteToDataSourceV2Exec.scala:425)
	at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1473)
	at org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.run(WriteToDataSourceV2Exec.scala:452)
	at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.$anonfun$writeWithV2$2(WriteToDataSourceV2Exec.scala:360)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	... 1 more
	Suppressed: org.neo4j.driver.internal.util.ErrorUtil$InternalExceptionCause
		at org.neo4j.driver.internal.async.connection.ChannelConnectedListener.databaseUnavailableError(ChannelConnectedListener.java:76)
		at org.neo4j.driver.internal.async.connection.ChannelConnectedListener.operationComplete(ChannelConnectedListener.java:70)
		at org.neo4j.driver.internal.async.connection.ChannelConnectedListener.operationComplete(ChannelConnectedListener.java:37)
		at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:578)
		at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.DefaultPromise.notifyListeners0(DefaultPromise.java:571)
		at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:550)
		at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:491)
		at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.DefaultPromise.setValue0(DefaultPromise.java:616)
		at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.DefaultPromise.setFailure0(DefaultPromise.java:609)
		at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.DefaultPromise.tryFailure(DefaultPromise.java:117)
		at org.neo4j.driver.internal.shaded.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.fulfillConnectPromise(AbstractNioChannel.java:321)
		at org.neo4j.driver.internal.shaded.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:337)
		at org.neo4j.driver.internal.shaded.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:707)
		at org.neo4j.driver.internal.shaded.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:655)
		at org.neo4j.driver.internal.shaded.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:581)
		at org.neo4j.driver.internal.shaded.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493)
		at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:986)
		at org.neo4j.driver.internal.shaded.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
		at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
		... 1 more
Caused by: org.neo4j.driver.internal.shaded.io.netty.channel.AbstractChannel$AnnotatedConnectException: Connection refused: neo4j/172.20.0.6:7687
Caused by: java.net.ConnectException: Connection refused
	at java.base/sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
	at java.base/sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:777)
	at org.neo4j.driver.internal.shaded.io.netty.channel.socket.nio.NioSocketChannel.doFinishConnect(NioSocketChannel.java:330)
	at org.neo4j.driver.internal.shaded.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:334)
	at org.neo4j.driver.internal.shaded.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:707)
	at org.neo4j.driver.internal.shaded.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:655)
	at org.neo4j.driver.internal.shaded.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:581)
	at org.neo4j.driver.internal.shaded.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493)
	at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:986)
	at org.neo4j.driver.internal.shaded.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
	at org.neo4j.driver.internal.shaded.io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
	at java.base/java.lang.Thread.run(Thread.java:829)


=== Streaming Query ===
Identifier: [id = 230adad4-e408-4040-a692-45bc16fb42bf, runId = 5f106e52-76c5-491e-9030-83ff07ed26d0]
Current Committed Offsets: {KafkaV2[Subscribe[AndMalwer]]: {"AndMalwer":{"0":1652}}}
Current Available Offsets: {KafkaV2[Subscribe[AndMalwer]]: {"AndMalwer":{"0":1654}}}

Current State: ACTIVE
Thread State: RUNNABLE

Logical Plan:
Filter (Destination_IP#88 =  10.42.0.151)
+- Aggregate [window#348, Destination_IP#88], [window#348 AS window#205, Destination_IP#88, sum(cast(Flow_Duration#114 as bigint)) AS SumFD#219L, max(Flow_Duration#114) AS MaxFD#221, min(Flow_Duration#114) AS MinFD#223, avg(cast(Flow_Duration#114 as bigint)) AS MeanFD#225, stddev_samp(cast(Flow_Duration#114 as double)) AS StddevFD#235, sum(cast(Total_Fwd_Packets#127 as bigint)) AS SumTFWD#237L, max(Total_Fwd_Packets#127) AS MaxTFWD#239, min(Total_Fwd_Packets#127) AS MinTFWD#241, avg(cast(Total_Fwd_Packets#127 as bigint)) AS MeanTFWD#243, stddev_samp(cast(Total_Fwd_Packets#127 as double)) AS StddevTFWD#253, sum(cast(Total_Bwd_Packets#140 as bigint)) AS SumTBWD#255L, max(Total_Bwd_Packets#140) AS MaxTBWD#257, min(Total_Bwd_Packets#140) AS MinTBWD#259, avg(cast(Total_Bwd_Packets#140 as bigint)) AS MeanTBWD#261, stddev_samp(cast(Total_Bwd_Packets#140 as double)) AS StddevTBWD#271, sum(cast(Flow_Bytess#179 as bigint)) AS SumFB#273L, max(Flow_Bytess#179) AS MaxFB#275, min(Flow_Bytess#179) AS MinFB#277, avg(cast(Flow_Bytess#179 as bigint)) AS MeanFB#279, stddev_samp(cast(Flow_Bytess#179 as double)) AS StddevFB#289, sum(Flow_Packetss#192) AS SumFP#291, max(Flow_Packetss#192) AS MaxFP#293, ... 3 more fields]
   +- Filter isnotnull(Timestamp#101)
      +- Project [named_struct(start, precisetimestampconversion(((((CASE WHEN (cast(CEIL((cast((precisetimestampconversion(Timestamp#101, TimestampType, LongType) - 0) as double) / cast(600000000 as double))) as double) = (cast((precisetimestampconversion(Timestamp#101, TimestampType, LongType) - 0) as double) / cast(600000000 as double))) THEN (CEIL((cast((precisetimestampconversion(Timestamp#101, TimestampType, LongType) - 0) as double) / cast(600000000 as double))) + cast(1 as bigint)) ELSE CEIL((cast((precisetimestampconversion(Timestamp#101, TimestampType, LongType) - 0) as double) / cast(600000000 as double))) END + cast(0 as bigint)) - cast(1 as bigint)) * 600000000) + 0), LongType, TimestampType), end, precisetimestampconversion((((((CASE WHEN (cast(CEIL((cast((precisetimestampconversion(Timestamp#101, TimestampType, LongType) - 0) as double) / cast(600000000 as double))) as double) = (cast((precisetimestampconversion(Timestamp#101, TimestampType, LongType) - 0) as double) / cast(600000000 as double))) THEN (CEIL((cast((precisetimestampconversion(Timestamp#101, TimestampType, LongType) - 0) as double) / cast(600000000 as double))) + cast(1 as bigint)) ELSE CEIL((cast((precisetimestampconversion(Timestamp#101, TimestampType, LongType) - 0) as double) / cast(600000000 as double))) END + cast(0 as bigint)) - cast(1 as bigint)) * 600000000) + 0) + 600000000), LongType, TimestampType)) AS window#348, Source_IP#37, Source_Port#61, Destination_IP#88, Destination_Port#74, Timestamp#101, Flow_Duration#114, Total_Fwd_Packets#127, Total_Bwd_Packets#140, Total_Length_of_Fwd_Packets#153, Total_Length_of_Bwd_Packets#166, Flow_Bytess#179, Flow_Packetss#192]
         +- Project [Source_IP#37, Source_Port#61, Destination_IP#88, Destination_Port#74, Timestamp#101, Flow_Duration#114, Total_Fwd_Packets#127, Total_Bwd_Packets#140, Total_Length_of_Fwd_Packets#153, Total_Length_of_Bwd_Packets#166, Flow_Bytess#179, cast(Flow_Packetss#48 as double) AS Flow_Packetss#192]
            +- Project [Source_IP#37, Source_Port#61, Destination_IP#88, Destination_Port#74, Timestamp#101, Flow_Duration#114, Total_Fwd_Packets#127, Total_Bwd_Packets#140, Total_Length_of_Fwd_Packets#153, Total_Length_of_Bwd_Packets#166, cast(Flow_Bytess#47 as int) AS Flow_Bytess#179, Flow_Packetss#48]
               +- Project [Source_IP#37, Source_Port#61, Destination_IP#88, Destination_Port#74, Timestamp#101, Flow_Duration#114, Total_Fwd_Packets#127, Total_Bwd_Packets#140, Total_Length_of_Fwd_Packets#153, cast(Total_Length_of_Bwd_Packets#46 as int) AS Total_Length_of_Bwd_Packets#166, Flow_Bytess#47, Flow_Packetss#48]
                  +- Project [Source_IP#37, Source_Port#61, Destination_IP#88, Destination_Port#74, Timestamp#101, Flow_Duration#114, Total_Fwd_Packets#127, Total_Bwd_Packets#140, cast(Total_Length_of_Fwd_Packets#45 as int) AS Total_Length_of_Fwd_Packets#153, Total_Length_of_Bwd_Packets#46, Flow_Bytess#47, Flow_Packetss#48]
                     +- Project [Source_IP#37, Source_Port#61, Destination_IP#88, Destination_Port#74, Timestamp#101, Flow_Duration#114, Total_Fwd_Packets#127, cast(Total_Bwd_Packets#44 as int) AS Total_Bwd_Packets#140, Total_Length_of_Fwd_Packets#45, Total_Length_of_Bwd_Packets#46, Flow_Bytess#47, Flow_Packetss#48]
                        +- Project [Source_IP#37, Source_Port#61, Destination_IP#88, Destination_Port#74, Timestamp#101, Flow_Duration#114, cast(Total_Fwd_Packets#43 as int) AS Total_Fwd_Packets#127, Total_Bwd_Packets#44, Total_Length_of_Fwd_Packets#45, Total_Length_of_Bwd_Packets#46, Flow_Bytess#47, Flow_Packetss#48]
                           +- Project [Source_IP#37, Source_Port#61, Destination_IP#88, Destination_Port#74, Timestamp#101, cast(Flow_Duration#42 as int) AS Flow_Duration#114, Total_Fwd_Packets#43, Total_Bwd_Packets#44, Total_Length_of_Fwd_Packets#45, Total_Length_of_Bwd_Packets#46, Flow_Bytess#47, Flow_Packetss#48]
                              +- Project [Source_IP#37, Source_Port#61, Destination_IP#88, Destination_Port#74, cast(Timestamp#41 as timestamp) AS Timestamp#101, Flow_Duration#42, Total_Fwd_Packets#43, Total_Bwd_Packets#44, Total_Length_of_Fwd_Packets#45, Total_Length_of_Bwd_Packets#46, Flow_Bytess#47, Flow_Packetss#48]
                                 +- Project [Source_IP#37, Source_Port#61, Destination_IP#39 AS Destination_IP#88, Destination_Port#74, Timestamp#41, Flow_Duration#42, Total_Fwd_Packets#43, Total_Bwd_Packets#44, Total_Length_of_Fwd_Packets#45, Total_Length_of_Bwd_Packets#46, Flow_Bytess#47, Flow_Packetss#48]
                                    +- Project [Source_IP#37, Source_Port#61, Destination_IP#39, cast(Destination_Port#40 as int) AS Destination_Port#74, Timestamp#41, Flow_Duration#42, Total_Fwd_Packets#43, Total_Bwd_Packets#44, Total_Length_of_Fwd_Packets#45, Total_Length_of_Bwd_Packets#46, Flow_Bytess#47, Flow_Packetss#48]
                                       +- Project [Source_IP#37, cast(Source_Port#38 as int) AS Source_Port#61, Destination_IP#39, Destination_Port#40, Timestamp#41, Flow_Duration#42, Total_Fwd_Packets#43, Total_Bwd_Packets#44, Total_Length_of_Fwd_Packets#45, Total_Length_of_Bwd_Packets#46, Flow_Bytess#47, Flow_Packetss#48]
                                          +- Project [split(value#29, ,, -1)[0] AS Source_IP#37, split(value#29, ,, -1)[1] AS Source_Port#38, split(value#29, ,, -1)[2] AS Destination_IP#39, split(value#29, ,, -1)[3] AS Destination_Port#40, split(value#29, ,, -1)[4] AS Timestamp#41, split(value#29, ,, -1)[5] AS Flow_Duration#42, split(value#29, ,, -1)[6] AS Total_Fwd_Packets#43, split(value#29, ,, -1)[7] AS Total_Bwd_Packets#44, split(value#29, ,, -1)[8] AS Total_Length_of_Fwd_Packets#45, split(value#29, ,, -1)[9] AS Total_Length_of_Bwd_Packets#46, split(value#29, ,, -1)[10] AS Flow_Bytess#47, split(value#29, ,, -1)[11] AS Flow_Packetss#48]
                                             +- Project [key#21, cast(value#8 as string) AS value#29, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]
                                                +- Project [cast(key#7 as string) AS key#21, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]
                                                   +- StreamingDataSourceV2Relation [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@d2890ea, KafkaV2[Subscribe[AndMalwer]]
