# 1.0 Load Packages & Testing Data

In [1]:
Threads.nthreads()

6

In [2]:
# Load Packages
using Clustering
using ParallelKMeans
using BenchmarkTools
using DelimitedFiles
using mlpack

## Read Data as transposed matrices since Julia is column major

In [3]:
X_1m = permutedims(DelimitedFiles.readdlm("data_1m.csv", ',', Float64));

In [4]:
X_100k = permutedims(DelimitedFiles.readdlm("data_100k.csv", ',', Float64));

In [5]:
X_10k = permutedims(DelimitedFiles.readdlm("data_10k.csv", ',', Float64));

In [6]:
X_1k = permutedims(DelimitedFiles.readdlm("data_1k.csv", ',', Float64));

## Row-wise Data For mlpack

In [7]:
X_1m_mlp = permutedims(X_1m);

In [8]:
X_100k_mlp = permutedims(X_100k);

In [9]:
X_10k_mlp = permutedims(X_10k);

In [10]:
X_1k_mlp = permutedims(X_1k);

# Elbow Method MLPACK

In [11]:
@btime [mlpack.kmeans(i, $X_1m_mlp; algorithm="hamerly", max_iterations=1000)[1] for i = 2:10];

  238.716 s (85 allocations: 2.08 GiB)


In [12]:
@btime [mlpack.kmeans(i, $X_100k_mlp; algorithm="hamerly", max_iterations=1000)[1] for i = 2:10];

  20.224 s (85 allocations: 212.88 MiB)


In [13]:
@btime [mlpack.kmeans(i, $X_10k_mlp; algorithm="hamerly", max_iterations=1000)[1] for i = 2:10];

  721.430 ms (85 allocations: 21.30 MiB)


In [14]:
@btime [mlpack.kmeans(i, $X_1k_mlp; algorithm="hamerly", max_iterations=1000)[1] for i = 2:10];

  24.581 ms (85 allocations: 2.14 MiB)


# 2.0 Elbow Method Clustering.jl

In [15]:
@btime [Clustering.kmeans($X_1m, i; tol=1e-6, maxiter=1000).totalcost for i = 2:10];

  580.079 s (32485 allocations: 34.42 GiB)


In [16]:
@btime [Clustering.kmeans($X_100k, i; tol=1e-6, maxiter=1000).totalcost for i = 2:10];

  47.804 s (27599 allocations: 2.90 GiB)


In [17]:
@btime [Clustering.kmeans($X_10k, i; tol=1e-6, maxiter=1000).totalcost for i = 2:10]; 

  882.486 ms (8650 allocations: 93.42 MiB)


In [18]:
@btime [Clustering.kmeans($X_1k, i; tol=1e-6, maxiter=1000).totalcost for i = 2:10]; 

  17.424 ms (1577 allocations: 2.20 MiB)


# 3.0 Elbow Method Speed ParallelKMeans.jl

## Lloyd

In [19]:
@btime [ParallelKMeans.kmeans(Lloyd(), $X_1m, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10];

  22.946 s (43965 allocations: 210.36 MiB)


In [20]:
@btime [ParallelKMeans.kmeans(Lloyd(), $X_100k, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10];

  2.844 s (54383 allocations: 26.01 MiB)


In [21]:
@btime [ParallelKMeans.kmeans(Lloyd(), $X_10k, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10];

  177.329 ms (34604 allocations: 5.56 MiB)


In [22]:
@btime [ParallelKMeans.kmeans(Lloyd(), $X_1k, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10];

  6.403 ms (10587 allocations: 1.37 MiB)


## Hamerly

In [23]:
@btime [ParallelKMeans.kmeans(Hamerly(), $X_1m, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10];

  11.084 s (52379 allocations: 349.14 MiB)


In [24]:
@btime [ParallelKMeans.kmeans(Hamerly(), $X_100k, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10];

  1.160 s (67677 allocations: 41.87 MiB)


In [25]:
@btime [ParallelKMeans.kmeans(Hamerly(), $X_10k, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10];

  96.670 ms (58154 allocations: 9.93 MiB)


In [26]:
@btime [ParallelKMeans.kmeans(Hamerly(), $X_1k, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10];

  6.459 ms (16734 allocations: 2.29 MiB)


# Elkan

In [27]:
@btime [ParallelKMeans.kmeans(Elkan(), $X_1m, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10];

  13.773 s (50855 allocations: 700.80 MiB)


In [28]:
@btime [ParallelKMeans.kmeans(Elkan(), $X_100k, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10];

  1.457 s (69447 allocations: 77.21 MiB)


In [29]:
@btime [ParallelKMeans.kmeans(Elkan(), $X_10k, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10];

  80.484 ms (46490 allocations: 12.13 MiB)


In [30]:
@btime [ParallelKMeans.kmeans(Elkan(), $X_1k, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10];

  6.854 ms (17482 allocations: 2.71 MiB)
