# Scala Tech Talk:
##### 2017-08-22
<img src="http://www.scala-lang.org/files/archive/spec/2.11/public/images/scala-logo-red-spiral-dark.png">
---

---
## Install:

        > brew install scala
        > brew install hadoop
        > brew install apache-spark
        > brew install apache-zeppelin
        > pip3 install jupyter \
            && git clone https://github.com/jupyter-scala/jupyter-scala.git \
            && cd jupyter-scala \
            && ./jupyter-scala

---
## Motivation:

In [1]:
def fun() = {
    val a = System.nanoTime()
    (1L to Math.pow(10, 8).toLong).sum
    val b = System.nanoTime()
    (b - a) / Math.pow(10, 9)
}
println(fun(), " seconds")
println("Scala destroys Python.")

(0.001181502, seconds)
Scala destroys Python.


defined [32mfunction[39m [36mfun[39m

In [2]:
println("Horizontal scaling. (Apache Spark)")

Horizontal scaling. (Apache Spark)


---
## Intro:

In [3]:
import sys.process._

[32mimport [39m[36msys.process._[39m

In [4]:
"whoami"!

joel.bondurant


[36mres3[39m: [32mInt[39m = [32m0[39m

In [5]:
"pwd"!

/Users/joel.bondurant/work/scala_talk


[36mres4[39m: [32mInt[39m = [32m0[39m

In [6]:
"ls -lhS"!

total 423856
-rw-r--r--  1 joel.bondurant  staff   206M Aug 22 10:52 truven_1m_head.csv
-rw-r--r--  1 joel.bondurant  staff   401K Aug 22 12:18 derby.log
-rw-r--r--  1 joel.bondurant  staff    42K Aug 22 13:48 scala_talk.ipynb
-rw-r--r--  1 joel.bondurant  staff    41K Aug 22 11:24 scala_talk_backup.ipynb
-rw-r--r--  1 joel.bondurant  staff    31K Aug 22 13:38 python_demo.ipynb
-rw-r--r--  1 joel.bondurant  staff    18K Aug 21 17:37 truven_100_head.csv
-rw-r--r--  1 joel.bondurant  staff   2.5K Aug 22 13:38 HelloWorld$.class
-rw-r--r--  1 joel.bondurant  staff   791B Aug 22 13:38 HelloWorld$delayedInit$body.class
-rw-r--r--  1 joel.bondurant  staff   790B Aug 22 13:38 HelloWorld.class
drwxr-xr-x  9 joel.bondurant  staff   306B Aug 22 12:18 metastore_db
-rw-r--r--  1 joel.bondurant  staff   176B Aug 21 17:18 Dockerfile
-rwxr-xr-x  1 joel.bondurant  staff    89B Aug 21 17:09 build_java9.sh
drwxr-xr-x  2 joel.bondurant  staff    68B Aug 21 17:36 spark-warehouse
-rw-r--r--  1 joel.bonduran

[36mres5[39m: [32mInt[39m = [32m0[39m

---
## Hello World:

In [7]:
"cat HelloWorld.scala"!

object HelloWorld extends App {
	println("Hello World!")
}


[36mres6[39m: [32mInt[39m = [32m0[39m

In [8]:
"scalac HelloWorld.scala"!

[36mres7[39m: [32mInt[39m = [32m0[39m

In [9]:
"scala HelloWorld"!

Hello World!


[36mres8[39m: [32mInt[39m = [32m0[39m

In [10]:
println("HelloWorldFaster")

HelloWorldFaster


---
## Calculator:

In [11]:
8*9

[36mres10[39m: [32mInt[39m = [32m72[39m

In [12]:
val bi = "8888888888888888888888888888888888888888888888888888888888888888888888888"
val bd = "1.88888888888888888888888888888888888888888888888888888888888888888888888"

[36mbi[39m: [32mString[39m = [32m"8888888888888888888888888888888888888888888888888888888888888888888888888"[39m
[36mbd[39m: [32mString[39m = [32m"1.88888888888888888888888888888888888888888888888888888888888888888888888"[39m

In [13]:
BigInt(bi) + 1

[36mres12[39m: [32mBigInt[39m = 8888888888888888888888888888888888888888888888888888888888888888888888889

In [14]:
BigDecimal(bd) + 1

[36mres13[39m: [32mBigDecimal[39m = 2.88888888888888888888888888888888888888888888888888888888888888888888888

In [15]:
(new java.math.BigDecimal(bd)).add(new java.math.BigDecimal(1))

[36mres14[39m: [32mjava[39m.[32mmath[39m.[32mBigDecimal[39m = 2.88888888888888888888888888888888888888888888888888888888888888888888888

---
## Strings:

In [16]:
"[0-9]+".r.findAllIn("123 East Maple St Apt 444").foreach(println)

123
444


In [17]:
"[0-9]+".r.findFirstIn("ABC East Maple St Apt A").foreach(println)

In [18]:
val name = "Joel"
val someText = s"$name was here"
println(someText)

Joel was here


[36mname[39m: [32mString[39m = [32m"Joel"[39m
[36msomeText[39m: [32mString[39m = [32m"Joel was here"[39m

---
## Values & Variables:

In [19]:
var xvar = 4
val xval: Int = 5

[36mxvar[39m: [32mInt[39m = [32m4[39m
[36mxval[39m: [32mInt[39m = [32m5[39m

In [20]:
println(xvar)
xvar = 3
print(xvar)

4
3

In [20]:
yvar = 3

cmd20.sc:1: not found: value yvar
val res20 = yvar = 3
            ^

: 

---
## Type System:

<img src="http://docs.scala-lang.org/resources/images/tour/unified-types-diagram.svg" width="50%" height="50%">

---
## Collections:

#### scala.collection.immutable:
<img src="https://docs.scala-lang.org/resources/images/collections.immutable.png" width="40%" height="40%">
#### scala.collection.mutable:
<img src="https://docs.scala-lang.org/resources/images/collections.mutable.png" width="50%" height="50%">

In [21]:
val x1 = List(1,2,3)
val x2 = Vector(1,2,3)
val x3 = (1 to 3)
val x4 = (1,2,3)

[36mx1[39m: [32mList[39m[[32mInt[39m] = [33mList[39m([32m1[39m, [32m2[39m, [32m3[39m)
[36mx2[39m: [32mVector[39m[[32mInt[39m] = [33mVector[39m([32m1[39m, [32m2[39m, [32m3[39m)
[36mx3[39m: [32mRange[39m.[32mInclusive[39m = [33mRange[39m([32m1[39m, [32m2[39m, [32m3[39m)
[36mx4[39m: ([32mInt[39m, [32mInt[39m, [32mInt[39m) = ([32m1[39m, [32m2[39m, [32m3[39m)

In [22]:
println(x1.sum)
println(x2.sum)
println(x3.sum)

6
6
6


In [23]:
x4._3

[36mres22[39m: [32mInt[39m = [32m3[39m

In [24]:
val (cat, dog, cow) = x4

[36mcat[39m: [32mInt[39m = [32m1[39m
[36mdog[39m: [32mInt[39m = [32m2[39m
[36mcow[39m: [32mInt[39m = [32m3[39m

In [25]:
print(cow)

3

In [26]:
import scala.collection.mutable.ListBuffer
val lb = new ListBuffer[Int]()
println(lb)
lb ++= List(7,8,9)
println(lb)
lb += 10
println(lb)

ListBuffer()
ListBuffer(7, 8, 9)
ListBuffer(7, 8, 9, 10)


[32mimport [39m[36mscala.collection.mutable.ListBuffer
[39m
[36mlb[39m: [32mcollection[39m.[32mmutable[39m.[32mListBuffer[39m[[32mInt[39m] = [33mListBuffer[39m([32m7[39m, [32m8[39m, [32m9[39m, [32m10[39m)
[36mres25_3[39m: [32mcollection[39m.[32mmutable[39m.[32mListBuffer[39m[[32mInt[39m] = [33mListBuffer[39m([32m7[39m, [32m8[39m, [32m9[39m, [32m10[39m)
[36mres25_5[39m: [32mcollection[39m.[32mmutable[39m.[32mListBuffer[39m[[32mInt[39m] = [33mListBuffer[39m([32m7[39m, [32m8[39m, [32m9[39m, [32m10[39m)

In [27]:
lb.toVector

[36mres26[39m: [32mVector[39m[[32mInt[39m] = [33mVector[39m([32m7[39m, [32m8[39m, [32m9[39m, [32m10[39m)

In [28]:
val x5 = Array(7,8,9)
x5.map(q => 2*q)

[36mx5[39m: [32mArray[39m[[32mInt[39m] = [33mArray[39m([32m7[39m, [32m8[39m, [32m9[39m)
[36mres27_1[39m: [32mArray[39m[[32mInt[39m] = [33mArray[39m([32m14[39m, [32m16[39m, [32m18[39m)

---
## Flow Control:

In [29]:
if (1==1) {
    println("yep")
} else {
    println("nope")
}

yep


In [30]:
println(if (1==1) "yep" else "nope")

yep


In [31]:
for (i <- 0 to 6) {
    println(i)
}

0
1
2
3
4
5
6


In [32]:
for (i <- 0 until 6) {
    println(i)
}

0
1
2
3
4
5


In [33]:
(1 to 6).foreach(println)

1
2
3
4
5
6


In [34]:
import scala.util.Random;
val randy = Random.nextInt(6)
println(randy)
println(randy match {
    case 0 => "zero"
    case 1 => "one"
    case 2 => "two"
    case 3 => "three"
    case 4 => "four"
    case 5 => "five"
    case 6 => "six"
})

0
zero


[32mimport [39m[36mscala.util.Random;
[39m
[36mrandy[39m: [32mInt[39m = [32m0[39m

---
## Functions:

In [35]:
def func1(x: Int): Int = {
    x * 6
}
println(func1(2))

12


defined [32mfunction[39m [36mfunc1[39m

In [36]:
val func2 = (x: Int) => 7*x
println(func2(2))

14


[36mfunc2[39m: [32mInt[39m => [32mInt[39m = <function1>

In [37]:
def func3(f: (Int => Int), x: Int): Int = {
    8*f(x)
}
println(3*6*8)
println(func3(func1, 3))

144
144


defined [32mfunction[39m [36mfunc3[39m

In [38]:
def func4(): Unit = {
    println("hi")
}
println(func4().getClass)

hi
void


defined [32mfunction[39m [36mfunc4[39m

---
## OOP:

In [39]:
class Box(val width: Int, val height: Int, val depth: Int) {
    def volume(): Int = width*height*depth
}

defined [32mclass[39m [36mBox[39m

In [40]:
val abox = new Box(2,3,4)
println(abox.width)
println(abox.volume)

2
24


[36mabox[39m: [32mBox[39m = $sess.cmd38Wrapper$Helper$Box@19a527c3

In [41]:
case class Student(name: String, score: Int)

defined [32mclass[39m [36mStudent[39m

In [42]:
val stu = Student("Joel", 80)
println(stu.name, stu.score)

(Joel,80)


[36mstu[39m: [32mStudent[39m = [33mStudent[39m([32m"Joel"[39m, [32m80[39m)

---
## Apache Spark:
<img src="https://spark.apache.org/images/spark-logo-trademark.png">
---

In [43]:
import $exclude.`org.slf4j:slf4j-log4j12`, $ivy.`org.slf4j:slf4j-nop:1.7.21`
import $profile.`hadoop-2.8`
import $ivy.`org.apache.spark::spark-sql:2.1.1`
import $ivy.`org.apache.hadoop:hadoop-aws:2.8.1`
import $ivy.`org.jupyter-scala::spark:0.4.2`

import org.apache.spark._
import org.apache.spark.sql._
import jupyter.spark.session._

val spark = JupyterSparkSession.builder().jupyter().master("local").appName("notebook").getOrCreate()

log4j:WARN No appenders could be found for logger (io.netty.util.internal.logging.InternalLoggerFactory).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.


[32mimport [39m[36m$exclude.$                        , $ivy.$                           
[39m
[32mimport [39m[36m$profile.$           
[39m
[32mimport [39m[36m$ivy.$                                  
[39m
[32mimport [39m[36m$ivy.$                                   
[39m
[32mimport [39m[36m$ivy.$                               

[39m
[32mimport [39m[36morg.apache.spark._
[39m
[32mimport [39m[36morg.apache.spark.sql._
[39m
[32mimport [39m[36mjupyter.spark.session._

[39m
[36mspark[39m: [32morg[39m.[32mapache[39m.[32mspark[39m.[32msql[39m.[32mSparkSession[39m = org.apache.spark.sql.SparkSession@14043e6e

In [44]:
val truven = spark.read.option("inferschema","true").option("header","true").csv("truven_100_head.csv")
truven.show(2)

+------+-------+------+-------+-------+----+---+-----+------+------+-------+---+---+--------+------+-------+-------+-------+-------+-------+-------+-------+-------+-------+--------+--------+--------+-------+-------+-------+-------+-------+-------+-------+-------+-------+--------+--------+--------+-------+-------+-------+-------+-------+-------+-------+-------+-------+--------+--------+--------+-------+-------+-------+-------+-------+-------+-------+-------+-------+--------+--------+--------+-------+-------+-------+-------+--------+-----+------+--------+
|SEQNUM|VERSION|EFAMID|ENROLID|MEMDAYS|YEAR|AGE|DOBYR|AGEGRP|EMPREL|PHYFLAG| RX|SEX|HLTHPLAN|ENRMON|DATTYP1|DATTYP2|DATTYP3|DATTYP4|DATTYP5|DATTYP6|DATTYP7|DATTYP8|DATTYP9|DATTYP10|DATTYP11|DATTYP12|ENRIND1|ENRIND2|ENRIND3|ENRIND4|ENRIND5|ENRIND6|ENRIND7|ENRIND8|ENRIND9|ENRIND10|ENRIND11|ENRIND12|MEMDAY1|MEMDAY2|MEMDAY3|MEMDAY4|MEMDAY5|MEMDAY6|MEMDAY7|MEMDAY8|MEMDAY9|MEMDAY10|MEMDAY11|MEMDAY12|PLNTYP1|PLNTYP2|PLNTYP3|PLNTYP4|PLNTYP5|PLN

[36mtruven[39m: [32mDataFrame[39m = [SEQNUM: int, VERSION: int ... 69 more fields]

In [45]:
truven.printSchema

root
 |-- SEQNUM: integer (nullable = true)
 |-- VERSION: integer (nullable = true)
 |-- EFAMID: integer (nullable = true)
 |-- ENROLID: integer (nullable = true)
 |-- MEMDAYS: integer (nullable = true)
 |-- YEAR: integer (nullable = true)
 |-- AGE: integer (nullable = true)
 |-- DOBYR: integer (nullable = true)
 |-- AGEGRP: integer (nullable = true)
 |-- EMPREL: integer (nullable = true)
 |-- PHYFLAG: integer (nullable = true)
 |-- RX: integer (nullable = true)
 |-- SEX: integer (nullable = true)
 |-- HLTHPLAN: integer (nullable = true)
 |-- ENRMON: integer (nullable = true)
 |-- DATTYP1: integer (nullable = true)
 |-- DATTYP2: integer (nullable = true)
 |-- DATTYP3: integer (nullable = true)
 |-- DATTYP4: integer (nullable = true)
 |-- DATTYP5: integer (nullable = true)
 |-- DATTYP6: integer (nullable = true)
 |-- DATTYP7: integer (nullable = true)
 |-- DATTYP8: integer (nullable = true)
 |-- DATTYP9: integer (nullable = true)
 |-- DATTYP10: integer (nullable = true)
 |-- DATTYP11: i

In [46]:
truven.select("SEQNUM", "VERSION", "AGE", "DOBYR").show(6)

+------+-------+---+-----+
|SEQNUM|VERSION|AGE|DOBYR|
+------+-------+---+-----+
|319770|     10| 63| 1951|
|319771|     10| 60| 1954|
|319772|     10| 64| 1950|
|319773|     10| 64| 1950|
|319774|     10| 57| 1957|
|319775|     10| 64| 1950|
+------+-------+---+-----+
only showing top 6 rows



In [47]:
truven.createOrReplaceTempView("truven")
spark.sql("select mean(AGE) from truven").coalesce(1).collect()(0)(0)

[36mres46_1[39m: [32mAny[39m = 52.21212121212121

In [48]:
truven.agg(("AGE" -> "mean")).coalesce(1).collect()(0)(0)

[36mres47[39m: [32mAny[39m = 52.21212121212121

In [49]:
truven.stat.corr("AGE", "DOBYR")

[36mres48[39m: [32mDouble[39m = [32m-0.999908095601205[39m

---
## Plotly:

In [50]:
import $ivy.`org.plotly-scala::plotly-jupyter-scala:0.3.2`

import plotly._
import plotly.element._
import plotly.layout._
import plotly.JupyterScala._

plotly.JupyterScala.init()

[32mimport [39m[36m$ivy.$                                             

[39m
[32mimport [39m[36mplotly._
[39m
[32mimport [39m[36mplotly.element._
[39m
[32mimport [39m[36mplotly.layout._
[39m
[32mimport [39m[36mplotly.JupyterScala._

[39m

In [51]:
val age_hist = truven.select("AGE").rdd.collect().map(_(0)).map(_.toString.toInt).toSeq
Histogram(x = age_hist).plot()

[36mage_hist[39m: [32mSeq[39m[[32mInt[39m] = [33mArray[39m(
  [32m63[39m,
  [32m60[39m,
  [32m64[39m,
  [32m64[39m,
  [32m57[39m,
  [32m64[39m,
  [32m53[39m,
  [32m55[39m,
  [32m60[39m,
  [32m64[39m,
  [32m51[39m,
[33m...[39m
[36mres50_1[39m: [32mString[39m = [32m"plot-496259711"[39m