# [Clustering4Ever](https://github.com/Clustering4Ever/Clustering4Ever) on [SparkNotebook](http://spark-notebook.io/) by [LIPN](https://lipn.univ-paris13.fr/) [A3](https://lipn.univ-paris13.fr/accueil/equipe/a3/) team

# Scala Kernel Gradient Ascent

In [ ]:
import smile.plot._
import clustering4ever.scala.clustering.meanshift.GradientAscent
import clustering4ever.math.distances.scalar.{Euclidean, Cosine, Minkowski}
import scala.io.Source
import clustering4ever.spark.indexes.ExternalIndexes
import clustering4ever.scala.indexes.ExternalIndexes
import clustering4ever.scala.kernels.KernelNature._

import smile.plot._
import clustering4ever.scala.clustering.meanshift.GradientAscent
import clustering4ever.math.distances.scalar.{Euclidean, Cosine, Minkowski}
import scala.io.Source
import clustering4ever.spark.indexes.ExternalIndexes
import clustering4ever.scala.indexes.ExternalIndexes
import clustering4ever.scala.kernels.KernelNature._


## Download dataset Aggregation

In [ ]:
:sh wget -P /tmp/ http://www.clustering4ever.org/Datasets/Digits/digits.csv
:sh wget -P /tmp/ http://www.clustering4ever.org/Datasets/Digits/labels

--2018-04-08 21:15:44--  http://www.clustering4ever.org/Datasets/Digits/digits.csv
Resolving www.clustering4ever.org (www.clustering4ever.org)... 62.210.16.62
Connecting to www.clustering4ever.org (www.clustering4ever.org)|62.210.16.62|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 959999 (937K) [text/csv]
Saving to: ‘/tmp/digits.csv.4’

     0K .......... .......... .......... .......... ..........  5% 2.78M 0s
    50K .......... .......... .......... .......... .......... 10% 4.46M 0s
   100K .......... .......... .......... .......... .......... 16% 4.25M 0s
   150K .......... .......... .......... .......... .......... 21% 4.25M 0s
   200K .......... .......... .......... .......... .......... 26% 4.33M 0s
   250K .......... .......... .......... .......... .......... 32% 4.42M 0s
   300K .......... .......... .......... .......... .......... 37% 4.26M 0s
   350K .......... .......... .......... .......... .......... 42% 4.26M 0s
   400K .......... .......

## Import data as Array[(Int, Array[Double])]

In [ ]:
val path = "/tmp/aggregation.csv"
val labelsPath = "/tmp/labels"
val data = Source.fromFile(path).getLines.map(_.split(",").map(_.toDouble)).zipWithIndex.map(_.swap).toArray
val labels = Source.fromFile(labelsPath).getLines.map(_.toInt).toArray

path: String = /tmp/aggregation.csv
labelsPath: String = /tmp/labels
data: Array[(Int, Array[Double])] = Array((0,Array(15.55, 28.65)), (1,Array(14.9, 27.55)), (2,Array(14.45, 28.35)), (3,Array(14.15, 28.8)), (4,Array(13.75, 28.05)), (5,Array(13.35, 28.45)), (6,Array(13.0, 29.15)), (7,Array(13.45, 27.5)), (8,Array(13.6, 26.5)), (9,Array(12.8, 27.35)), (10,Array(12.4, 27.85)), (11,Array(12.3, 28.4)), (12,Array(12.2, 28.65)), (13,Array(13.4, 25.1)), (14,Array(12.95, 25.95)), (15,Array(12.9, 26.5)), (16,Array(11.85, 27.0)), (17,Array(11.35, 28.0)), (18,Array(11.15, 28.7)), (19,Array(11.25, 27.4)), (20,Array(10.75, 27.7)), (21,Array(10.5, 28.35)), (22,Array(9.65, 28.45)), (23,Array(10.25, 27.25)), (24,Array(10.75, 26.55)), (25,Array(11.7, 26.35)), (26,Array(11.6, 25.9)), (27,Array(11.9, 25....

## Parameters 

In [ ]:
val iterMax = 25
val epsilon = 0.00001
// True defines the true Euclidean with square root, set it to false to avoid the square
val metric1 = new Euclidean(true)
val kernel1 = Gaussian
val kernel2 = Flat
val kernel3 = KNN
val kernel4 = Sigmoid
val bandwitch = Array("1")
val knnArg = Array("40")
val sigmoidArgs = Array("1","0")

iterMax: Int = 25
epsilon: Double = 1.0E-5
metric1: clustering4ever.math.distances.scalar.Euclidean = Euclidean with root applied
kernel1: clustering4ever.scala.kernels.KernelNature.Value = Gaussian
kernel2: clustering4ever.scala.kernels.KernelNature.Value = Flat
kernel3: clustering4ever.scala.kernels.KernelNature.Value = KNN
kernel4: clustering4ever.scala.kernels.KernelNature.Value = Sigmoid
bandwitch: Array[String] = Array(1)
knnArg: Array[String] = Array(40)
sigmoidArgs: Array[String] = Array(1, 0)


## Run the algorithm

In [ ]:
val gradientAscentDataFlat = GradientAscent.run(data, metric1, epsilon, iterMax, kernel2, bandwitch)
val gradientAscentDataGaussian = GradientAscent.run(data, metric1, epsilon, iterMax, kernel1, bandwitch)
val gradientAscentDataKNN = GradientAscent.run(data, metric1, epsilon, 100, kernel3, knnArg)
val gradientAscentDataSigmoid = GradientAscent.run(data, metric1, epsilon, iterMax, kernel4, sigmoidArgs)

gradientAscentDataFlat: scala.collection.parallel.mutable.ParArray[(Int, Array[Double], Array[Double], Boolean)] = ParArray((0,[D@4bf5fbf0,[D@757ef1b2,true), (1,[D@14d28db0,[D@ab245e9,true), (2,[D@6e7c1534,[D@6c0d9a28,true), (3,[D@6cb22514,[D@74b7b866,true), (4,[D@7a6cc671,[D@7ab0dfce,true), (5,[D@7d7df09,[D@4eea231d,true), (6,[D@52293bff,[D@2e1bc0db,true), (7,[D@178d6d86,[D@86056a1,true), (8,[D@130d6291,[D@63607533,true), (9,[D@1ffdd584,[D@15224d84,true), (10,[D@6dc3ac5b,[D@dea853d,true), (11,[D@7cca30f1,[D@7b030c81,true), (12,[D@bd2d6ce,[D@52c879e4,true), (13,[D@7f2896f4,[D@1c2cfa94,true), (14,[D@1d53b79,[D@104375fe,true), (15,[D@2be7de34,[D@50647986,true), (16,[D@78428e00,[D@73a5e873,true), (17,[D@66a68e4d,[D@157d6356,true), (18,[D@7923dd77,[D@25994450,true), (19,[D@771f7248,[D@986c8...

# Plot results

In [ ]:
val toPlotFlat = gradientAscentDataFlat.map(_._3).toArray
val toPlotGaussian = gradientAscentDataGaussian.map(_._3).toArray
val toPlotKNN = gradientAscentDataKNN.map(_._3).toArray
val toPlotSigmoid = gradientAscentDataSigmoid.map(_._3).toArray
plot(toPlotFlat, labels, '.', Palette.COLORS)
plot(toPlotGaussian, labels, '.', Palette.COLORS)
plot(toPlotKNN, labels, '.', Palette.COLORS)
plot(toPlotSigmoid, labels, '.', Palette.COLORS)

toPlotFlat: Array[Array[Double]] = Array(Array(15.55, 28.65), Array(13.830000000000002, 28.23), Array(13.830000000000002, 28.23), Array(13.925, 28.4125), Array(13.830000000000002, 28.23), Array(13.830000000000002, 28.23), Array(12.65, 28.5), Array(13.15, 27.839999999999996), Array(13.149999999999999, 26.316666666666666), Array(12.8875, 27.3), Array(12.425, 28.0625), Array(12.062500000000002, 28.225), Array(12.65, 28.5), Array(13.175, 25.525), Array(13.2125, 26.0125), Array(13.062499999999998, 26.575), Array(11.43, 26.640000000000004), Array(11.0, 28.03), Array(11.0, 28.03), Array(11.033333333333333, 27.316666666666674), Array(10.82, 27.74), Array(11.0, 28.03), Array(9.783333333333333, 28.2), Array(10.2625, 27.1875), Array(11.033333333333333, 27.316666666666674), Array(11.43, 26.64000000...