<a id="open-existing-db"></a>
###  Import the correct libraries

In [1]:
//import libraries
import org.apache.spark.{SparkConf, SparkContext, SparkFiles}
import org.apache.spark.sql.{SQLContext, SparkSession, Row}
import org.apache.spark.SparkFiles

import org.apache.spark.ml.feature.{StringIndexer, IndexToString, VectorIndexer, VectorAssembler}
import org.apache.spark.ml.regression.LinearRegression
import org.apache.spark.ml.classification.{LogisticRegression, DecisionTreeClassifier}
import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator

import org.apache.spark.ml.evaluation.RegressionEvaluator
import org.apache.spark.ml.{Pipeline, PipelineStage}
import org.apache.spark.ml.ibm.transformers.RenameColumn

import com.ibm.analytics.ngp.repository._
import com.ibm.analytics.ngp.ingest.Sampling
import com.ibm.analytics.ngp.util._
import com.ibm.analytics.ngp.pipeline.evaluate.{Evaluator,MLProblemType}

<a id="open-existing-db"></a>
###  Open the IBM Db2 Event store database

In [2]:
import com.ibm.event.oltp.EventContext
val eContext = EventContext.getEventContext("KillrWeather")

In [3]:
spark.sparkContext.version

2.0.2

<a id="validate-db"></a>
###  Validate that the table have been created

In [4]:
val raw_weather_data = eContext.getTable("raw_weather_data")
val sky_condition_lookup = eContext.getTable("sky_condition_lookup")
val monthly_aggregate_precip = eContext.getTable("monthly_aggregate_precip")
val monthly_aggregate_windspeed = eContext.getTable("monthly_aggregate_windspeed")
val monthly_aggregate_pressure = eContext.getTable("monthly_aggregate_pressure")
val monthly_aggregate_temperature = eContext.getTable("monthly_aggregate_temperature")
val daily_aggregate_precip = eContext.getTable("daily_aggregate_precip")
val daily_aggregate_windspeed = eContext.getTable("daily_aggregate_windspeed")
val daily_aggregate_pressure = eContext.getTable("daily_aggregate_pressure")
val daily_aggregate_temperature = eContext.getTable("daily_aggregate_temperature")
val daily_predicted_temperature = eContext.getTable("daily_predicted_temperature")

<a id="create-sqlContext"></a>
### Create the IBM Db2 EventSession

In [5]:
import java.io.File
import com.ibm.event.oltp.EventContext
import org.apache.log4j.{Level, LogManager, Logger}
import org.apache.spark._
import org.apache.spark.sql.ibm.event.EventSession

val sqlContext = new EventSession(spark.sparkContext, "KillrWeather")

<a id="prepare-DataFrame"></a>
### Prepare a DataFrame for the query 
The following API provides a DataFrame that holds the query results on the IBM Db2 Event Store table. 

In [6]:
val dfDailyTemp = sqlContext.loadEventTable("daily_aggregate_temperature")

In [7]:
dfDailyTemp.printSchema()

root
 |-- wsid: string (nullable = false)
 |-- year: integer (nullable = false)
 |-- month: integer (nullable = false)
 |-- day: integer (nullable = false)
 |-- ts: long (nullable = false)
 |-- high: double (nullable = false)
 |-- low: double (nullable = false)
 |-- mean: double (nullable = false)
 |-- variance: double (nullable = false)
 |-- stdev: double (nullable = false)



In [8]:
dfDailyTemp.count()

290

In [9]:
dfDailyTemp.show(5)

+------------+----+-----+---+-------------+----+---+------------------+------------------+------------------+
|        wsid|year|month|day|           ts|high|low|              mean|          variance|             stdev|
+------------+----+-----+---+-------------+----+---+------------------+------------------+------------------+
|725030:14732|2017|    2|  1|1488355200333| 3.9|0.0|2.1541666666666663|1.0841493055555556| 1.041224906326945|
|725030:14732|2017|    2|  2|1488441600334|12.2|0.0| 6.216666666666667|  6.12888888888889| 2.475659283683619|
|725030:14732|2017|    2|  3|1488528000334|10.0|0.0|            4.4375| 8.139010416666666|2.8528950938768616|
|725030:14732|2017|    2|  4|1488614400335| 7.8|0.0| 3.858333333333333| 4.009097222222221|2.0022730139074993|
|725030:14732|2017|    2|  5|1488700800336|11.1|0.0| 6.929166666666666|10.974565972222223|3.3127882474167016|
+------------+----+-----+---+-------------+----+---+------------------+------------------+------------------+
only showi

In [10]:
import org.apache.spark.sql.functions.lag
import org.apache.spark.sql.functions.col 

val w = org.apache.spark.sql.expressions.Window.orderBy("year", "month", "day")  

val dfTrain = dfDailyTemp.withColumn("mean1", lag(col("mean"), 1, null).over(w)).
    withColumn("mean2", lag(col("mean"), 2, null).over(w)).
    withColumn("mean3", lag(col("mean"), 3, null).over(w))

In [11]:
dfTrain.select("mean", "mean1", "mean2", "mean3").show()

+-------------------+------------------+------------------+------------------+
|               mean|             mean1|             mean2|             mean3|
+-------------------+------------------+------------------+------------------+
|  5.345833333333333|              null|              null|              null|
| 1.5708333333333329| 5.345833333333333|              null|              null|
|             -6.875|1.5708333333333329| 5.345833333333333|              null|
| -3.287500000000001|            -6.875|1.5708333333333329| 5.345833333333333|
|              2.925|-3.287500000000001|            -6.875|1.5708333333333329|
|  4.887499999999999|             2.925|-3.287500000000001|            -6.875|
|  9.508333333333333| 4.887499999999999|             2.925|-3.287500000000001|
| 13.583333333333332| 9.508333333333333| 4.887499999999999|             2.925|
| 12.112499999999999|13.583333333333332| 9.508333333333333| 4.887499999999999|
|  8.254166666666666|12.112499999999999|13.583333333

In [12]:
import org.apache.spark.sql.functions.round

val dfTrain2 = dfTrain.withColumn("mean", round(col("mean"), 1)).
    withColumn("mean1", round(col("mean1"), 1)).
    withColumn("mean2", round(col("mean2"), 1)).
    withColumn("mean3", round(col("mean3"), 1))

In [13]:
val dfTrain3 = dfTrain2.na.drop()

In [14]:
dfTrain3.select("mean", "mean1", "mean2", "mean3").show()

+----+-----+-----+-----+
|mean|mean1|mean2|mean3|
+----+-----+-----+-----+
|-3.3| -6.9|  1.6|  5.3|
| 2.9| -3.3| -6.9|  1.6|
| 4.9|  2.9| -3.3| -6.9|
| 9.5|  4.9|  2.9| -3.3|
|13.6|  9.5|  4.9|  2.9|
|12.1| 13.6|  9.5|  4.9|
| 8.3| 12.1| 13.6|  9.5|
| 8.5|  8.3| 12.1| 13.6|
| 8.3|  8.5|  8.3| 12.1|
| 5.2|  8.3|  8.5|  8.3|
| 3.4|  5.2|  8.3|  8.5|
| 2.7|  3.4|  5.2|  8.3|
| 1.6|  2.7|  3.4|  5.2|
| 1.4|  1.6|  2.7|  3.4|
| 5.1|  1.4|  1.6|  2.7|
| 2.6|  5.1|  1.4|  1.6|
|-1.5|  2.6|  5.1|  1.4|
|-5.9| -1.5|  2.6|  5.1|
|-0.6| -5.9| -1.5|  2.6|
| 3.1| -0.6| -5.9| -1.5|
+----+-----+-----+-----+
only showing top 20 rows



In [15]:
val splits = dfTrain3.randomSplit(Array(0.8, 0.20), seed = 24L)
val training_data = splits(0)
val test_data = splits(1)

In [16]:
import com.ibm.spss.ml.classificationandregression.LinearRegression

val linearRegression = LinearRegression().
    setInputFieldList(Array("mean1", "mean2", "mean2")).
    setTargetField("mean")

val linearRegressionModel = linearRegression.fit(training_data) 

log4j: reset attribute= "false".
log4j: Threshold ="null".
log4j: Retreiving an instance of org.apache.log4j.Logger.
log4j: Setting [org.apache.zookeeper] additivity to [true].
log4j: Level value for org.apache.zookeeper is  [off].
log4j: org.apache.zookeeper level set to OFF
log4j: Retreiving an instance of org.apache.log4j.Logger.
log4j: Setting [org.apache.spark] additivity to [true].
log4j: Level value for org.apache.spark is  [off].
log4j: org.apache.spark level set to OFF
log4j: Retreiving an instance of org.apache.log4j.Logger.
log4j: Setting [com.ibm.event] additivity to [true].
log4j: Level value for com.ibm.event is  [warn].
log4j: com.ibm.event level set to WARN
log4j: Retreiving an instance of org.apache.log4j.Logger.
log4j: Setting [com.ibm.event.rest] additivity to [true].
log4j: Level value for com.ibm.event.rest is  [debug].
log4j: com.ibm.event.rest level set to DEBUG
log4j: Level value for root is  [off].
log4j: root level set to OFF
log4j: Class name: [org.apache.log

In [17]:
val predictions = linearRegressionModel.transform(test_data)
predictions.select("mean", "prediction").show()

+----+--------------------+
|mean|          prediction|
+----+--------------------+
|-3.3|  -5.653836587694465|
| 9.5|   5.687148623653268|
| 1.4|   2.487573648313256|
| 5.1|  2.4724313872074934|
| 3.1|  2.1178894125917616|
| 1.2|   6.764123897435455|
|11.7|    7.49544742735073|
| 4.0|    5.28478920377357|
|-0.8|-0.35380177888684045|
| 0.3|   2.033875205803433|
| 2.1|   6.201294803722097|
| 6.2|   2.781569136626652|
| 4.0|   5.117098816560812|
| 8.7|  11.792955239895084|
| 7.6|   6.740866261676819|
|12.4|  13.747574092820033|
|11.9|    9.99893435664755|
|13.0|  11.726393614336985|
|15.1|  15.962792357838921|
|10.5|  10.338547696441415|
+----+--------------------+
only showing top 20 rows



### Save the Model in PMML

In [18]:
val pmml = linearRegressionModel.toPMML()
System.out.println(pmml.toString)

<?xml version="1.0" ?><PMML version="4.3" xmlns="http://www.dmg.org/PMML-4_3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.dmg.org/PMML-4_3 http://www.dmg.org/v4-3/pmml-4-3.xsd"><Header copyright="(c) Copyright IBM Corp. 2011, 2015" description="linear engine"><Application name="Analytic Framework" version="3.0"></Application><Timestamp>Sat Mar 3 20:35:13 2018</Timestamp></Header><DataDictionary numberOfFields="13"><DataField name="wsid" displayName="wsid" optype="categorical" dataType="string"><Value value="725030:14732" property="valid"></Value></DataField><DataField name="year" displayName="year" optype="continuous" dataType="integer"></DataField><DataField name="month" displayName="month" optype="continuous" dataType="integer"></DataField><DataField name="day" displayName="day" optype="continuous" dataType="integer"></DataField><DataField name="ts" displayName="ts" optype="continuous" dataType="integer"></DataField><DataField name="high" disp

### Deploy the Model to Lightbend

In [27]:
val service_path = "http://"
val online_path = service_path + "/v2/deployments"

val response_online = Http(online_path).postData(pmml.toString).header("Content-Type", "application/xml").option(HttpOptions.connTimeout(10000)).option(HttpOptions.readTimeout(50000)).asString

print (response_online)

<hr>
Copyright &copy; IBM Corp. 2017. Released as licensed Sample Materials.