In [1]:
println(s"Current spark version is ${spark.version}")

Current spark version is 2.4.4


In [2]:
import org.apache.spark.sql.types.{StructType, StructField, IntegerType, LongType, StringType}

val dataSchema = new StructType()
    .add("target", IntegerType)
    .add("id", LongType)
    .add("raw_timestamp", StringType)
    .add("query_status", StringType)
    .add("author", StringType)
    .add("tweet", StringType)

    
val dataPath= "/home/jovyan/data/training.1600000.processed.noemoticon.csv"

val raw_sentiment = spark.read
    .format("csv")
    .option("header",false)
    .schema(dataSchema)
    .load(dataPath)
    .selectExpr("tweet")

println(s"Total tweets in file: ${raw_sentiment.count}")


Total tweets in file: 1600000


dataSchema = StructType(StructField(target,IntegerType,true), StructField(id,LongType,true), StructField(raw_timestamp,StringType,true), StructField(query_status,StringType,true), StructField(author,StringType,true), StructField(tweet,StringType,true))
dataPath = /home/jovyan/data/training.1600000.processed.noemoticon.csv
raw_sentiment = [tweet: string]


[tweet: string]

In [4]:
import java.util.{Calendar, Timer, TimerTask}

val timer = new Timer()

val task = new TimerTask {
  def run(): Unit = {
      val data = raw_sentiment.sample(fraction=0.00001,withReplacement=true)
      data.coalesce(1).write.format("json").mode("append").save("/home/jovyan/data/events-stream")
      println(s"${Calendar.getInstance().toInstant} - saved some data to the events stream!")
  } 
}

println("Streaming started!")

timer.schedule(task, 1000L, 1000L)

Streaming started!


timer = java.util.Timer@424664ce
task = $anon$1@1e8f429b


$anon$1@1e8f429b

2019-10-16T11:04:31.670Z - saved some data to the events stream!
2019-10-16T11:04:40.062Z - saved some data to the events stream!
2019-10-16T11:04:47.413Z - saved some data to the events stream!
2019-10-16T11:04:54.424Z - saved some data to the events stream!
2019-10-16T11:05:01.004Z - saved some data to the events stream!
2019-10-16T11:05:07.728Z - saved some data to the events stream!
2019-10-16T11:05:14.347Z - saved some data to the events stream!
2019-10-16T11:05:20.810Z - saved some data to the events stream!
2019-10-16T11:05:27.440Z - saved some data to the events stream!
2019-10-16T11:05:34.033Z - saved some data to the events stream!
2019-10-16T11:05:40.843Z - saved some data to the events stream!
2019-10-16T11:05:47.543Z - saved some data to the events stream!
2019-10-16T11:05:54.092Z - saved some data to the events stream!
2019-10-16T11:06:00.595Z - saved some data to the events stream!
2019-10-16T11:06:07.568Z - saved some data to the events stream!
2019-10-16T11:06:14.322Z 

2019-10-16T11:20:10.896Z - saved some data to the events stream!
2019-10-16T11:20:18.376Z - saved some data to the events stream!
2019-10-16T11:20:25.793Z - saved some data to the events stream!
2019-10-16T11:20:33.284Z - saved some data to the events stream!
2019-10-16T11:20:40.540Z - saved some data to the events stream!
2019-10-16T11:20:47.666Z - saved some data to the events stream!
2019-10-16T11:20:55.005Z - saved some data to the events stream!
2019-10-16T11:21:03.749Z - saved some data to the events stream!
2019-10-16T11:21:10.940Z - saved some data to the events stream!
2019-10-16T11:21:18.138Z - saved some data to the events stream!
2019-10-16T11:21:25.595Z - saved some data to the events stream!
2019-10-16T11:21:32.669Z - saved some data to the events stream!
2019-10-16T11:21:39.847Z - saved some data to the events stream!
2019-10-16T11:21:47.485Z - saved some data to the events stream!
2019-10-16T11:21:55.066Z - saved some data to the events stream!
2019-10-16T11:22:02.552Z 

2019-10-16T11:35:15.133Z - saved some data to the events stream!
2019-10-16T11:35:22.070Z - saved some data to the events stream!
2019-10-16T11:35:29.422Z - saved some data to the events stream!
2019-10-16T11:35:36.339Z - saved some data to the events stream!
2019-10-16T11:35:43.604Z - saved some data to the events stream!
2019-10-16T11:35:50.947Z - saved some data to the events stream!
2019-10-16T11:35:58.270Z - saved some data to the events stream!
2019-10-16T11:36:04.933Z - saved some data to the events stream!
2019-10-16T11:36:12.172Z - saved some data to the events stream!
2019-10-16T11:36:18.975Z - saved some data to the events stream!
2019-10-16T11:36:25.802Z - saved some data to the events stream!
2019-10-16T11:36:33.088Z - saved some data to the events stream!
2019-10-16T11:36:39.876Z - saved some data to the events stream!
2019-10-16T11:36:47.032Z - saved some data to the events stream!
2019-10-16T11:36:54.104Z - saved some data to the events stream!
2019-10-16T11:37:01.396Z 

2019-10-16T11:50:08.082Z - saved some data to the events stream!
2019-10-16T11:50:14.793Z - saved some data to the events stream!
2019-10-16T11:50:21.742Z - saved some data to the events stream!
2019-10-16T11:50:29.016Z - saved some data to the events stream!
2019-10-16T11:50:35.968Z - saved some data to the events stream!
2019-10-16T11:50:43.180Z - saved some data to the events stream!
2019-10-16T11:50:50.133Z - saved some data to the events stream!
2019-10-16T11:50:57.209Z - saved some data to the events stream!
2019-10-16T11:51:05.106Z - saved some data to the events stream!
2019-10-16T11:51:12.412Z - saved some data to the events stream!
2019-10-16T11:51:20.164Z - saved some data to the events stream!
2019-10-16T11:51:27.260Z - saved some data to the events stream!
2019-10-16T11:51:34.295Z - saved some data to the events stream!
2019-10-16T11:51:41.192Z - saved some data to the events stream!
2019-10-16T11:51:48.130Z - saved some data to the events stream!
2019-10-16T11:51:55.288Z 

2019-10-16T12:05:17.346Z - saved some data to the events stream!
2019-10-16T12:05:24.023Z - saved some data to the events stream!
2019-10-16T12:05:30.520Z - saved some data to the events stream!
2019-10-16T12:05:36.971Z - saved some data to the events stream!
2019-10-16T12:05:43.477Z - saved some data to the events stream!
2019-10-16T12:05:50.013Z - saved some data to the events stream!
2019-10-16T12:05:56.495Z - saved some data to the events stream!
2019-10-16T12:06:03.236Z - saved some data to the events stream!
2019-10-16T12:06:09.582Z - saved some data to the events stream!
2019-10-16T12:06:16.499Z - saved some data to the events stream!
2019-10-16T12:06:22.967Z - saved some data to the events stream!
2019-10-16T12:06:29.614Z - saved some data to the events stream!
2019-10-16T12:06:36.050Z - saved some data to the events stream!
2019-10-16T12:06:42.810Z - saved some data to the events stream!
2019-10-16T12:06:49.576Z - saved some data to the events stream!
2019-10-16T12:06:56.409Z 

2019-10-16T12:20:17.056Z - saved some data to the events stream!
2019-10-16T12:20:25.874Z - saved some data to the events stream!
2019-10-16T12:20:34.602Z - saved some data to the events stream!
2019-10-16T12:20:42.407Z - saved some data to the events stream!
2019-10-16T12:20:50.108Z - saved some data to the events stream!
2019-10-16T12:20:57.846Z - saved some data to the events stream!
2019-10-16T12:21:06.707Z - saved some data to the events stream!
2019-10-16T12:21:14.687Z - saved some data to the events stream!
2019-10-16T12:21:23.444Z - saved some data to the events stream!
2019-10-16T12:21:32.361Z - saved some data to the events stream!
2019-10-16T12:21:41.556Z - saved some data to the events stream!
2019-10-16T12:21:49.424Z - saved some data to the events stream!
2019-10-16T12:21:57.260Z - saved some data to the events stream!
2019-10-16T12:22:05.528Z - saved some data to the events stream!
2019-10-16T12:22:15.954Z - saved some data to the events stream!
2019-10-16T12:22:25.665Z 

In [5]:
task.cancel()

true