# 1.0 Load Packages & Testing Data

In [1]:
Threads.nthreads()

6

In [2]:
# Load Packages
using Clustering
using ParallelKMeans
using BenchmarkTools
using DelimitedFiles

## Read Data as transposed matrices since Julia is column major

In [3]:
X_1m = permutedims(DelimitedFiles.readdlm("data_1m.csv", ',', Float64));

In [4]:
X_100k = permutedims(DelimitedFiles.readdlm("data_100k.csv", ',', Float64));

In [5]:
X_10k = permutedims(DelimitedFiles.readdlm("data_10k.csv", ',', Float64));

In [6]:
X_1k = permutedims(DelimitedFiles.readdlm("data_1k.csv", ',', Float64));

# 2.0 Elbow Method Clustering.jl

In [7]:
@btime [Clustering.kmeans(X_1m, i; tol=1e-6, maxiter=1000).totalcost for i = 2:10]

  666.840 s (39552 allocations: 42.20 GiB)


9-element Array{Float64,1}:
 2.4375022036585887e6
 2.3995026350645283e6
 2.368956147241305e6
 2.3465189109189017e6
 2.3269519086732655e6
 2.309690084759929e6
 2.293811805984634e6
 2.280594080957371e6
 2.268225523325909e6

In [8]:
@btime [Clustering.kmeans(X_100k, i; tol=1e-6, maxiter=1000).totalcost for i = 2:10]

  34.034 s (21478 allocations: 2.21 GiB)


9-element Array{Float64,1}:
 243594.45278279012
 239811.28569635848
 236703.8272640062
 234462.62502097985
 232489.97314096565
 230723.0485242891
 229157.1972914591
 227739.77975983976
 226472.3094850646

In [9]:
@btime [Clustering.kmeans(X_10k, i; tol=1e-6, maxiter=1000).totalcost for i = 2:10] 

  709.049 ms (7695 allocations: 81.68 MiB)


9-element Array{Float64,1}:
 24275.444286222566
 23875.557020184788
 23578.946955787447
 23318.473142828305
 23084.67069252044
 22875.085488870653
 22717.258454559385
 22550.670739108667
 22418.63818326847

In [10]:
@btime [Clustering.kmeans(X_1k, i; tol=1e-6, maxiter=1000).totalcost for i = 2:10] 

  17.686 ms (1738 allocations: 2.35 MiB)


9-element Array{Float64,1}:
 2418.380841440787
 2366.269549833395
 2324.782494422872
 2295.0028498146385
 2257.7141132498864
 2240.752627243752
 2217.2948134611097
 2191.3630881577565
 2179.0718331261965

# 3.0 Elbow Method Speed ParallelKMeans.jl

## Lloyd

In [11]:
@btime [ParallelKMeans.kmeans(Lloyd(), X_1m, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10]

  21.730 s (44716 allocations: 210.45 MiB)


9-element Array{Float64,1}:
 2.437523884802478e6
 2.3994183127988405e6
 2.369587418012976e6
 2.3465842810245752e6
 2.327195663893961e6
 2.309860383274498e6
 2.2935430149697494e6
 2.280894938360677e6
 2.26821335945493e6

In [12]:
@btime [ParallelKMeans.kmeans(Lloyd(), X_100k, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10]

  2.975 s (58760 allocations: 26.38 MiB)


9-element Array{Float64,1}:
 243631.78264891994
 239792.4639819173
 236755.72321950592
 234464.24202293635
 232529.77487873653
 230734.45570173766
 229165.76200897153
 227686.15963125622
 226438.54043162096

In [13]:
@btime [ParallelKMeans.kmeans(Lloyd(), X_10k, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10]

  163.771 ms (34026 allocations: 5.51 MiB)


9-element Array{Float64,1}:
 24271.648171204935
 23894.119149999144
 23573.616337801654
 23337.5657444241
 23078.763113354762
 22891.475248180508
 22707.85660316113
 22549.977948341566
 22401.95942618817

In [14]:
@btime [ParallelKMeans.kmeans(Lloyd(), X_1k, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10]

  6.444 ms (10352 allocations: 1.35 MiB)


9-element Array{Float64,1}:
 2417.0877173469385
 2365.8133587840457
 2324.6788690432386
 2291.5290419226367
 2269.5194919448163
 2239.343846441753
 2214.726735397414
 2194.282617457555
 2174.7872800729156

## Hamerly

In [15]:
@btime [ParallelKMeans.kmeans(Hamerly(), X_1m, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10]

  11.784 s (64206 allocations: 350.47 MiB)


9-element Array{Float64,1}:
 2.437341685966827e6
 2.399415126529132e6
 2.369254506628911e6
 2.3467951793523636e6
 2.3270840769749777e6
 2.313686253289302e6
 2.296110956193394e6
 2.2814086561771547e6
 2.27119959416904e6

In [16]:
@btime [ParallelKMeans.kmeans(Hamerly(), X_100k, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10]

  1.339 s (82368 allocations: 43.49 MiB)


9-element Array{Float64,1}:
 244373.19334730235
 240674.19263342157
 236911.93253428186
 234543.29649092205
 232822.1605010765
 230701.10885423486
 229256.8542916515
 227837.66572991127
 226529.47736081632

In [17]:
@btime [ParallelKMeans.kmeans(Hamerly(), X_10k, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10]

  94.233 ms (51797 allocations: 9.24 MiB)


9-element Array{Float64,1}:
 24261.872060369802
 23918.03508487201
 23554.980243149133
 23305.029416266672
 23088.523219687137
 22888.398250747316
 22701.768190004157
 22568.54473470184
 22417.85283075115

In [18]:
@btime [ParallelKMeans.kmeans(Hamerly(), X_1k, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10]

  6.600 ms (17113 allocations: 2.33 MiB)


9-element Array{Float64,1}:
 2417.8122639415556
 2367.295114693651
 2326.92270431841
 2301.4090836529717
 2262.592058921362
 2237.677144788601
 2216.6865080073426
 2192.842847353733
 2175.1435030668317

# Elkan

In [19]:
@btime [ParallelKMeans.kmeans(Elkan(), X_1m, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10]

  17.591 s (68127 allocations: 702.71 MiB)


9-element Array{Float64,1}:
 2.4375136207882697e6
 2.3994863411646667e6
 2.3698159559913045e6
 2.34656618859055e6
 2.327164977038004e6
 2.3100652099671173e6
 2.2974686486714045e6
 2.2824355819614637e6
 2.268417948648371e6

In [20]:
@btime [ParallelKMeans.kmeans(Elkan(), X_100k, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10]

  1.074 s (51543 allocations: 75.25 MiB)


9-element Array{Float64,1}:
 243613.14376647965
 239764.98541243855
 236718.47482885054
 234743.26100571707
 232797.09604246088
 230956.8431501608
 229331.63328603297
 228419.87861601743
 226697.66013825266

In [21]:
@btime [ParallelKMeans.kmeans(Elkan(), X_10k, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10]

  81.995 ms (40317 allocations: 11.46 MiB)


9-element Array{Float64,1}:
 24315.498032064315
 23898.717729445918
 23555.821362137038
 23303.073244830943
 23090.24268729613
 22903.503875280156
 22723.694819363172
 22533.790826789904
 22413.09535077061

In [22]:
@btime [ParallelKMeans.kmeans(Elkan(), X_1k, i; tol=1e-6, max_iters=1000, verbose=false).totalcost for i = 2:10]

  6.953 ms (16616 allocations: 2.62 MiB)


9-element Array{Float64,1}:
 2416.521641930817
 2366.4516136696107
 2327.7402614908065
 2298.342819194774
 2262.073982754183
 2239.6565302511785
 2216.845951416227
 2196.929026408215
 2177.2843007941256