# Pure scala _K_-Means from Clustering4Ever example 

In [ ]:
import smile.plot._
import clustering4ever.scala.clustering.KMeans
import clustering4ever.math.distances.scalar.{Euclidean, Cosine, Minkowski}

import smile.plot._
import clustering4ever.scala.clustering.KMeans
import clustering4ever.math.distances.scalar.{Euclidean, Cosine, Minkowski}


## Import data as Array[(Int, Array[Double])]

In [ ]:
val path = "/home/KyBe/tmpDS/aggregation.csv"
val data = scala.io.Source.fromFile(path).getLines.map(_.split(",").map(_.toDouble)).zipWithIndex.map(_.swap).toArray

path: String = /home/KyBe/tmpDS/aggregation.csv
data: Array[(Int, Array[Double])] = Array((0,Array(15.55, 28.65)), (1,Array(14.9, 27.55)), (2,Array(14.45, 28.35)), (3,Array(14.15, 28.8)), (4,Array(13.75, 28.05)), (5,Array(13.35, 28.45)), (6,Array(13.0, 29.15)), (7,Array(13.45, 27.5)), (8,Array(13.6, 26.5)), (9,Array(12.8, 27.35)), (10,Array(12.4, 27.85)), (11,Array(12.3, 28.4)), (12,Array(12.2, 28.65)), (13,Array(13.4, 25.1)), (14,Array(12.95, 25.95)), (15,Array(12.9, 26.5)), (16,Array(11.85, 27.0)), (17,Array(11.35, 28.0)), (18,Array(11.15, 28.7)), (19,Array(11.25, 27.4)), (20,Array(10.75, 27.7)), (21,Array(10.5, 28.35)), (22,Array(9.65, 28.45)), (23,Array(10.25, 27.25)), (24,Array(10.75, 26.55)), (25,Array(11.7, 26.35)), (26,Array(11.6, 25.9)), (27,Array(11.9, 25.05)), (28,Array(12.6,...

## Parameters 

In [ ]:
val k = 6
val iterMax = 100
val epsilon = 0.001
// True defines the true Euclidean with square root, set it to false to avoid the square
val metric1 = new Euclidean(true)
val metric2 = new Cosine
// Minkowski(p) where p is the Minkowski parameter
val metric3 = new Minkowski(4)

k: Int = 6
iterMax: Int = 100
epsilon: Double = 0.001
metric1: clustering4ever.math.distances.scalar.Euclidean = Euclidean with root applied
metric2: clustering4ever.math.distances.scalar.Cosine = clustering4ever.math.distances.scalar.Cosine@36965901
metric3: clustering4ever.math.distances.scalar.Minkowski = clustering4ever.math.distances.scalar.Minkowski@2aea7e41


## Run the algorithm

In [ ]:
val clusterized = KMeans.run(data, k, epsilon, iterMax, metric)

clusterized: clustering4ever.scala.clustering.KMeans.ClusterizedData = Array((0,(0,Array(15.55, 28.65))), (2,(1,Array(14.9, 27.55))), (2,(2,Array(14.45, 28.35))), (2,(3,Array(14.15, 28.8))), (2,(4,Array(13.75, 28.05))), (2,(5,Array(13.35, 28.45))), (2,(6,Array(13.0, 29.15))), (2,(7,Array(13.45, 27.5))), (2,(8,Array(13.6, 26.5))), (2,(9,Array(12.8, 27.35))), (2,(10,Array(12.4, 27.85))), (2,(11,Array(12.3, 28.4))), (2,(12,Array(12.2, 28.65))), (2,(13,Array(13.4, 25.1))), (2,(14,Array(12.95, 25.95))), (2,(15,Array(12.9, 26.5))), (2,(16,Array(11.85, 27.0))), (2,(17,Array(11.35, 28.0))), (2,(18,Array(11.15, 28.7))), (2,(19,Array(11.25, 27.4))), (2,(20,Array(10.75, 27.7))), (2,(21,Array(10.5, 28.35))), (2,(22,Array(9.65, 28.45))), (2,(23,Array(10.25, 27.25))), (2,(24,Array(10.75, 26.55))), (2...

In [ ]:
val rawData = clusterized.map{ case (clusterID, (id, vector)) => vector }
val labels = clusterized.map{ case (clusterID, (id, vector)) => clusterID }

plot(rawData, labels, '*', Palette.COLORS)

rawData: Array[clustering4ever.scala.clustering.KMeans.Vector] = Array(Array(15.55, 28.65), Array(14.9, 27.55), Array(14.45, 28.35), Array(14.15, 28.8), Array(13.75, 28.05), Array(13.35, 28.45), Array(13.0, 29.15), Array(13.45, 27.5), Array(13.6, 26.5), Array(12.8, 27.35), Array(12.4, 27.85), Array(12.3, 28.4), Array(12.2, 28.65), Array(13.4, 25.1), Array(12.95, 25.95), Array(12.9, 26.5), Array(11.85, 27.0), Array(11.35, 28.0), Array(11.15, 28.7), Array(11.25, 27.4), Array(10.75, 27.7), Array(10.5, 28.35), Array(9.65, 28.45), Array(10.25, 27.25), Array(10.75, 26.55), Array(11.7, 26.35), Array(11.6, 25.9), Array(11.9, 25.05), Array(12.6, 24.05), Array(11.9, 24.5), Array(11.1, 25.2), Array(10.55, 25.15), Array(10.05, 25.95), Array(9.35, 26.6), Array(9.3, 27.25), Array(9.2, 27.8), Array(7....

import clustering4ever.scala.clustering.KMeans
