# [Clustering4Ever](https://github.com/Clustering4Ever/Clustering4Ever) by [LIPN](https://lipn.univ-paris13.fr/) [A3](https://lipn.univ-paris13.fr/accueil/equipe/a3/) team

In [None]:
%%classpath add mvn
org.clustering4ever clustering4ever_2.11 0.9.6

In [None]:
%%classpath add mvn
org.apache.spark spark-core_2.11 2.4.3

# Scala Gradient Ascent

In [None]:
import scala.io.Source
import org.clustering4ever.math.distances.scalar.Euclidean
import org.clustering4ever.clusterizables.EasyClusterizable
import org.clustering4ever.vectorizables.Vectorizable
import org.clustering4ever.vectors.ScalarVector
import org.clustering4ever.clustering.scala.meanshift.{GradientAscent, GradientAscentArgs}
import org.clustering4ever.enums.KernelNature
import scala.collection.{immutable, mutable}
import org.clustering4ever.kernels._
import org.clustering4ever.shapeless.VMapping

## Download dataset Aggregation

In [None]:
%%bash
wget -P /tmp/ http://www.clustering4ever.org/Datasets/Aggregation/aggregation.csv
wget -P /tmp/ http://www.clustering4ever.org/Datasets/Aggregation/labels

## Import data as ParArray[Array[Double]]

In [None]:
val path = "/tmp/aggregation.csv"
val data = Source.fromFile(path).getLines.toSeq.map( x => x.split(",").map(_.toDouble)).toParArray.zipWithIndex
    .map{ case (v, id) => EasyClusterizable(id, ScalarVector(v)) }
val labelsPath = "/tmp/labels"

## Parameters 

In [None]:
val classicEuclidean = new Euclidean(false)
val k = 40
val epsilon = 0.00001
val maxNumberIter = 50

In [None]:
val knnRealKernel = EstimatorArgsKnnScalar(k = 50, metric = classicEuclidean)
val knnEuclideanKernel = EstimatorArgsKnnEuclidean(k = 100, metric = classicEuclidean)
val gaussianArgs = EstimatorArgsGaussian(bandwidth = 0.001, metric = classicEuclidean)
val flatArgs = EstimatorArgsFlat(bandwidth = 0.75, metric = classicEuclidean, lambda = 1D)
val sigmoidArgs = EstimatorArgsSigmoid(a = 1D, b = 3D)

In [None]:
val gaussian = EstimatorGaussian(gaussianArgs)
val flat = EstimatorFlat(flatArgs)
val sigmoid = EstimatorSigmoid(sigmoidArgs)
val knnReal = EstimatorKnnScalar(knnRealKernel)
val knnEuc = EstimatorKnnEuclidean(knnEuclideanKernel)

## Run the algorithm

In [None]:
val idWhereKeepNewVectorization = 9

val t1 = System.currentTimeMillis
val convergedDS = GradientAscent(epsilon, maxNumberIter, knnEuc, classicEuclidean, idWhereKeepNewVectorization).fit(data)
val t2 = System.currentTimeMillis

(t2 - t1) / 1000D