In [0]:
println("Hello Scala")

In [1]:
import breeze.linalg._
import breeze.numerics._

In [2]:
println(DenseMatrix.zeros[Double](3, 5))
println(DenseVector.fill(3){5.0})
println(DenseMatrix((1.0, 0.5, 2.0), (4.1, 5.4, 0.1)))

In [3]:
println(linspace(2, 7, 6))

In [4]:
println(DenseMatrix.tabulate(3, 2){case (i, j) => i+j})

In [5]:
println(DenseMatrix.rand(2, 3))

In [6]:
val m = DenseMatrix.tabulate(3, 3){case (i, j) => i - j}

In [7]:
val cat = m(::, 1 to 2)

In [8]:
val e = m(-1, 1)

In [9]:
val v = DenseVector(1.0, 2.0, 3.0)
val A = DenseMatrix((1.0, 2.0, 3.0),
                    (4.0, 5.0, 6.0))

In [10]:
println(v dot v)

In [11]:
println(v.t * v)

In [12]:
println(A*v)

In [13]:
val v = DenseVector(1.0, 2.0, 3.0)
println(v *:* v)
println(v*v)

In [14]:
v :+= v
println(v)

In [15]:
v :*= 3.0
println(v)
println(argmax(v))

In [16]:
println(v <:< (v +:+ v))

In [17]:
val A = DenseMatrix((1.0, 2.0, 3.0), (1.0, 5.0, 2.0))

In [18]:
println(sum(A))
println(trace(A(::, 0 to 1)))

In [19]:
val f = A.toDenseVector

In [20]:
accumulate(f)

In [21]:
val A = DenseMatrix((1.0, 2.0), (-1.0, 2.0))
val b = A * DenseMatrix(3.0, 1.0)

In [22]:
println(A\b)

In [23]:
println(det(A))

In [24]:
println(inv(A))

In [25]:
println(eig(A))

In [26]:
import breeze.optimize.{DiffFunction, LBFGS}

{
    val X = DenseMatrix.rand(2000, 3)
    val y = X * DenseVector(0.5, -0.1, 0.2)
    
    val J = new DiffFunction[DenseVector[Double]] {
        def calculate(w: DenseVector[Double]) = {
            val error = X * w - y
            val loss = sum(error ^:^ 2.0) / (2.0 * X.rows)
        
            val grad = (error.t * X) /:/ (2.0 * X.rows)
            (loss, grad.t)
        }
    }
    
    val optimizer = new LBFGS[DenseVector[Double]]()
    println(optimizer.minimize(J, DenseVector(0.0, 0.0, 0.0)))
}

In [27]:
val X = DenseMatrix.rand(2000, 3)
val w = DenseVector(0.5, -0.1, 0.2)
val y = X * w

In [28]:
val Data = DenseMatrix.horzcat(X, y.asDenseMatrix.t)

In [29]:
println("Data shape: ", (Data.rows, Data.cols))

In [30]:
val df = Data(*, ::).iterator.map(x => (x(0), x(1), x(2), x(3))).toSeq.toDF("x1", "x2", "x3", "y")

df.show(2)

In [31]:
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.feature.VectorAssembler
import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel}

In [32]:
val pipeline = new Pipeline().setStages(Array(
        new VectorAssembler()
            .setInputCols(Array("x1", "x2", "x3"))
            .setOutputCol("features"),
        new LinearRegression().setLabelCol("y")
    ))
    
val model = pipeline.fit(df)

val w = model.stages.last.asInstanceOf[LinearRegressionModel].coefficients

In [33]:
val predictions = model.transform(df)
predictions.show(3)

In [34]:
import org.apache.spark.ml.attribute.AttributeGroup

AttributeGroup.fromStructField(predictions.schema("features")).attributes.get.foreach(println)