<a id="open-existing-db"></a>
###  Import the correct libraries

In [23]:
//import libraries
import org.apache.spark.{SparkConf, SparkContext, SparkFiles}
import org.apache.spark.sql.{SQLContext, SparkSession, Row}
import org.apache.spark.SparkFiles

import org.apache.spark.ml.feature.{StringIndexer, IndexToString, VectorIndexer, VectorAssembler}
import org.apache.spark.ml.regression.LinearRegression
import org.apache.spark.ml.classification.{LogisticRegression, DecisionTreeClassifier}
import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator

import org.apache.spark.ml.evaluation.RegressionEvaluator
import org.apache.spark.ml.{Pipeline, PipelineStage}
import org.apache.spark.ml.ibm.transformers.RenameColumn

import com.ibm.analytics.ngp.repository._
import com.ibm.analytics.ngp.ingest.Sampling
import com.ibm.analytics.ngp.util._
import com.ibm.analytics.ngp.pipeline.evaluate.{Evaluator,MLProblemType}

<a id="open-existing-db"></a>
###  Open the IBM Db2 Event store database

In [24]:
import com.ibm.event.oltp.EventContext
val eContext = EventContext.getEventContext("KillrWeather")

In [25]:
spark.sparkContext.version

2.0.2

<a id="validate-db"></a>
###  Validate that the table have been created

In [26]:
val raw_weather_data = eContext.getTable("raw_weather_data")
val sky_condition_lookup = eContext.getTable("sky_condition_lookup")
val monthly_aggregate_precip = eContext.getTable("monthly_aggregate_precip")
val monthly_aggregate_windspeed = eContext.getTable("monthly_aggregate_windspeed")
val monthly_aggregate_pressure = eContext.getTable("monthly_aggregate_pressure")
val monthly_aggregate_temperature = eContext.getTable("monthly_aggregate_temperature")
val daily_aggregate_precip = eContext.getTable("daily_aggregate_precip")
val daily_aggregate_windspeed = eContext.getTable("daily_aggregate_windspeed")
val daily_aggregate_pressure = eContext.getTable("daily_aggregate_pressure")
val daily_aggregate_temperature = eContext.getTable("daily_aggregate_temperature")
val daily_predicted_temperature = eContext.getTable("daily_predicted_temperature")

<a id="create-sqlContext"></a>
### Create the IBM Db2 EventSession

In [27]:
import java.io.File
import com.ibm.event.oltp.EventContext
import org.apache.log4j.{Level, LogManager, Logger}
import org.apache.spark._
import org.apache.spark.sql.ibm.event.EventSession

val sqlContext = new EventSession(spark.sparkContext, "KillrWeather")

<a id="prepare-DataFrame"></a>
### Prepare a DataFrame for the query 
The following API provides a DataFrame that holds the query results on the IBM Db2 Event Store table. 

In [28]:
val dfDailyTemp = sqlContext.loadEventTable("daily_aggregate_temperature")

In [29]:
dfDailyTemp.printSchema()

root
 |-- wsid: string (nullable = false)
 |-- year: integer (nullable = false)
 |-- month: integer (nullable = false)
 |-- day: integer (nullable = false)
 |-- ts: long (nullable = false)
 |-- high: double (nullable = false)
 |-- low: double (nullable = false)
 |-- mean: double (nullable = false)
 |-- variance: double (nullable = false)
 |-- stdev: double (nullable = false)



In [30]:
dfDailyTemp.count()

343

In [31]:
dfDailyTemp.show(5)

+------------+----+-----+---+-------------+----+---+------------------+------------------+------------------+
|        wsid|year|month|day|           ts|high|low|              mean|          variance|             stdev|
+------------+----+-----+---+-------------+----+---+------------------+------------------+------------------+
|725030:14732|2011|    9|  1|1317452400183| 3.9|0.0|2.1541666666666663|1.0841493055555556| 1.041224906326945|
|725030:14732|2011|    9|  2|1317538800183|12.2|0.0| 6.216666666666667|  6.12888888888889| 2.475659283683619|
|725030:14732|2011|    9|  3|1317625200895|10.0|0.0|            4.4375| 8.139010416666666|2.8528950938768616|
|725030:14732|2011|    9|  4|1317711600896| 7.8|0.0| 3.858333333333333| 4.009097222222221|2.0022730139074993|
|725030:14732|2011|    9|  5|1317798000659|11.1|0.0| 6.929166666666666|10.974565972222223|3.3127882474167016|
+------------+----+-----+---+-------------+----+---+------------------+------------------+------------------+
only showi

In [32]:
val weatherStations = dfDailyTemp.select("wsid").distinct.collect.flatMap(_.toSeq)

In [33]:
import sqlContext.implicits._
val weatherStationsArray = weatherStations.map(ws => dfDailyTemp.where($"wsid" <=> ws))

In [22]:
import org.apache.spark.sql.functions.round
import org.apache.spark.sql.functions.lag
import org.apache.spark.sql.functions.col 
import play.api.libs.json._
import scalaj.http.{Http, HttpOptions}
import com.ibm.spss.ml.classificationandregression.LinearRegression

System.out.println(weatherStationsArray.length)
for (weatherStation <- weatherStationsArray) {

    val weatherStationID = weatherStation.first()(0)
    System.out.println(s"""Weather Station ID is ${weatherStationID}""")
    
    val w = org.apache.spark.sql.expressions.Window.orderBy("year", "month", "day")  
    val dfTrain = dfDailyTemp.withColumn("day-1", lag(col("mean"), 1, null).over(w)).
        withColumn("day-2", lag(col("mean"), 2, null).over(w)).
        withColumn("day-3", lag(col("mean"), 3, null).over(w))

    dfTrain.select("mean", "day-1", "day-2", "day-3").show()

    val dfTrain2 = dfTrain.withColumn("day-1", round(col("day-1"), 1)).
        withColumn("day-2", round(col("day-2"), 1)).
        withColumn("day-3", round(col("day-3"), 1))

    val dfTrain3 = dfTrain2.na.drop()

    dfTrain3.select("day-1", "day-2", "day-3").show()
    
    val splits = dfTrain3.randomSplit(Array(0.8, 0.20), seed = 24L)
    val training_data = splits(0)
    val test_data = splits(1)

    val linearRegression = LinearRegression().
        setInputFieldList(Array("day-1", "day-2", "day-3")).
        setTargetField("mean")

    val linearRegressionModel = linearRegression.fit(training_data)

    val predictions = linearRegressionModel.transform(test_data)
    predictions.select("prediction").show()

    val pmml = linearRegressionModel.toPMML().replace('\"', '\'')

    val online_path = "http://think-demo.lightbend.com/model"
    val modelString = s"""{"wsid":"${weatherStationID}","pmml":"${pmml.toString}"}"""
    System.out.println (modelString)

    val response_online = Http(online_path).postData(modelString).header("Content-Type", "application/json").option(HttpOptions.connTimeout(10000)).option(HttpOptions.readTimeout(50000)).asString
    print (response_online)
}

1
Weather Station ID is 725030:14732
+------------------+------------------+------------------+------------------+
|              mean|             day-1|             day-2|             day-3|
+------------------+------------------+------------------+------------------+
|          -1.84375|              null|              null|              null|
|             2.925|          -1.84375|              null|              null|
| 4.887499999999999|             2.925|          -1.84375|              null|
| 9.508333333333333| 4.887499999999999|             2.925|          -1.84375|
|13.583333333333332| 9.508333333333333| 4.887499999999999|             2.925|
|12.112499999999999|13.583333333333332| 9.508333333333333| 4.887499999999999|
| 8.254166666666666|12.112499999999999|13.583333333333332| 9.508333333333333|
| 8.504166666666666| 8.254166666666666|12.112499999999999|13.583333333333332|
| 8.283333333333331| 8.504166666666666| 8.254166666666666|12.112499999999999|
| 5.195833333333334| 8.2833

<hr>
Copyright &copy; IBM Corp. 2018. Released as licensed Sample Materials.