In [1]:
import warnings
warnings.simplefilter('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from k_means import kMeans
from k_centers import kCenters
from single_swap import singleSwap
from spectral_clustering import spectralClustering
from EM import EM

%matplotlib inline

In [2]:
# read the data
clustering = pd.read_csv('./data/clustering.csv', header=None).values
bigClustering = pd.read_csv('./data/bigClusteringData.csv', header=None).values

# define colors
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', '#7FFFD4', '#9ACD32', '#FFA500']

# I. K-Means Algorithm

In [3]:
# ------------------------------------------------------------------------
# clustering.txt
# ------------------------------------------------------------------------
tol = 1e-5
X = clustering

for K in [3, 4, 5, 6, 7, 8, 9, 10]:
    print('K = ', K)
    Q, C, D = kMeans(X, K, tol=tol, random_state=None, verbose=True)

K =  3
K-Means finished in 0.2673s, 14 iters
K =  4
K-Means finished in 0.2543s, 13 iters
K =  5
K-Means finished in 0.3624s, 18 iters
K =  6
K-Means finished in 0.4325s, 22 iters
K =  7
K-Means finished in 0.2837s, 15 iters
K =  8
K-Means finished in 0.1923s, 11 iters
K =  9
K-Means finished in 0.3481s, 18 iters
K =  10
K-Means finished in 0.1989s, 10 iters


In [4]:
# ------------------------------------------------------------------------
# bigClustering.txt
# ------------------------------------------------------------------------
tol = 1e-5
X = bigClustering

for K in [3, 4, 5, 6, 7, 8, 9, 10]:
    print('K = ', K)
    Q, C, D = kMeans(X, K, tol=tol, random_state=None, verbose=True)

K =  3
K-Means finished in 0.4725s, 12 iters
K =  4
K-Means finished in 0.8326s, 23 iters
K =  5
K-Means finished in 0.4722s, 13 iters
K =  6
K-Means finished in 1.258s, 38 iters
K =  7
K-Means finished in 0.9619s, 30 iters
K =  8
K-Means finished in 0.7767s, 24 iters
K =  9
K-Means finished in 0.7177s, 21 iters
K =  10
K-Means finished in 1.2369s, 37 iters


# II. Greedy K-Centers Algorithm

In [5]:
# ------------------------------------------------------------------------
# clustering.txt
# ------------------------------------------------------------------------
X = clustering

for K in [3, 4, 5, 6, 7, 8, 9, 10]:
    print('K = ', K)
    Q, C, D, _ = kCenters(X, K, random_state=None, verbose=True)

K =  3
K-Centers is finished in 0.0067s
K =  4
K-Centers is finished in 0.001s
K =  5
K-Centers is finished in 0.0012s
K =  6
K-Centers is finished in 0.0013s
K =  7
K-Centers is finished in 0.0015s
K =  8
K-Centers is finished in 0.0019s
K =  9
K-Centers is finished in 0.0024s
K =  10
K-Centers is finished in 0.0026s


In [6]:
# ------------------------------------------------------------------------
# bigClustering.txt
# ------------------------------------------------------------------------
X = bigClustering

for K in [3, 4, 5, 6, 7, 8, 9, 10]:
    print('K = ', K)
    Q, C, D, _ = kCenters(X, K, random_state=None, verbose=True)

K =  3
K-Centers is finished in 0.002s
K =  4
K-Centers is finished in 0.0019s
K =  5
K-Centers is finished in 0.0021s
K =  6
K-Centers is finished in 0.0024s
K =  7
K-Centers is finished in 0.0025s
K =  8
K-Centers is finished in 0.0034s
K =  9
K-Centers is finished in 0.0037s
K =  10
K-Centers is finished in 0.0041s


# III. Single-Swap Algorithm

In [7]:
# ------------------------------------------------------------------------
# clustering.txt
# ------------------------------------------------------------------------
tau = 0.05
X = clustering

for K in [3, 4, 5, 6, 7, 8, 9, 10]:
    print('K = ', K)
    Q, C, D = singleSwap(X, K, tau=tau, random_state=None, verbose=True)

K =  3
Single-Swap is finished in 0.1582s
K =  4
Single-Swap is finished in 0.1971s
K =  5
Single-Swap is finished in 0.1895s
K =  6
Single-Swap is finished in 0.2668s
K =  7
Single-Swap is finished in 0.2693s
K =  8
Single-Swap is finished in 0.3306s
K =  9
Single-Swap is finished in 0.3012s
K =  10
Single-Swap is finished in 0.379s


In [8]:
# ------------------------------------------------------------------------
# bigClustering.txt
# ------------------------------------------------------------------------
tau = 0.05
X = bigClustering

for K in [3, 4, 5, 6, 7, 8, 9, 10]:
    print('K = ', K)
    Q, C, D = singleSwap(X, K, tau=tau, random_state=None, verbose=True)

K =  3
Single-Swap is finished in 0.4511s
K =  4
Single-Swap is finished in 0.5692s
K =  5
Single-Swap is finished in 0.6445s
K =  6
Single-Swap is finished in 0.8809s
K =  7
Single-Swap is finished in 0.7638s
K =  8
Single-Swap is finished in 0.7815s
K =  9
Single-Swap is finished in 0.901s
K =  10
Single-Swap is finished in 1.0744s


# IV. Spectral Clustering Algorithm

In [9]:
# ------------------------------------------------------------------------
# clustering.txt
# ------------------------------------------------------------------------
X = clustering

for K in [3, 4, 5, 6, 7, 8, 9, 10]:
    print('K = ', K)
    W, U, Q, C, D = spectralClustering(X, K, random_state=None, verbose=True)

K =  3
Spectral Clustering finished in 1.5716s
K =  4
Spectral Clustering finished in 1.4587s
K =  5
Spectral Clustering finished in 1.4132s
K =  6
Spectral Clustering finished in 1.484s
K =  7
Spectral Clustering finished in 1.4877s
K =  8
Spectral Clustering finished in 1.4708s
K =  9
Spectral Clustering finished in 1.4669s
K =  10
Spectral Clustering finished in 1.5053s


In [10]:
# ------------------------------------------------------------------------
# bigClustering.txt
# ------------------------------------------------------------------------
X = bigClustering

for K in [3, 4, 5, 6, 7, 8, 9, 10]:
    print('K = ', K)
    W, U, Q, C, D = spectralClustering(X, K, random_state=None, verbose=True)

K =  3
Spectral Clustering finished in 11.186s
K =  4
Spectral Clustering finished in 10.2642s
K =  5
Spectral Clustering finished in 10.5166s
K =  6
Spectral Clustering finished in 10.2167s
K =  7
Spectral Clustering finished in 9.8986s
K =  8
Spectral Clustering finished in 9.4616s
K =  9
Spectral Clustering finished in 9.8733s
K =  10
Spectral Clustering finished in 9.8636s


# V. EM Algorithm

In [11]:
# ------------------------------------------------------------------------
# clustering.txt
# ------------------------------------------------------------------------
threshold = 1e-7
X = clustering

for K in [3, 4, 5, 6, 7, 8, 9, 10]:
    print('K = ', K)
    em = EM(m=K, threshold=threshold, random_state=None, maxIter=500)
    em.train(X, verbose=True)  # train the EM model

K =  3
Stopped, reach the maximum iteration 0.4412s
K =  4
Stopped, reach the maximum iteration 0.5078s
K =  5
Reach threshold at 4 th iters in 0.4881s
K =  6
Stopped, reach the maximum iteration 0.7462s
K =  7
Stopped, reach the maximum iteration 0.8227s
K =  8
Stopped, reach the maximum iteration 0.9216s
K =  9
Stopped, reach the maximum iteration 1.0106s
K =  10
Stopped, reach the maximum iteration 1.1277s


In [12]:
# ------------------------------------------------------------------------
# bigClustering.txt
# ------------------------------------------------------------------------
threshold = 1e-7
X = bigClustering

for K in [3, 4, 5, 6, 7, 8, 9, 10]:
    print('K = ', K)
    em = EM(m=K, threshold=threshold, random_state=None, maxIter=500)
    em.train(X, verbose=True)  # train the EM model

K =  3
Reach threshold at 2 th iters in 0.3412s
K =  4
Reach threshold at 3 th iters in 0.2553s
K =  5
Reach threshold at 4 th iters in 0.5763s
K =  6
Reach threshold at 5 th iters in 0.6196s
K =  7
Stopped, reach the maximum iteration 1.1591s
K =  8
Stopped, reach the maximum iteration 1.3177s
K =  9
Stopped, reach the maximum iteration 1.4977s
K =  10
Stopped, reach the maximum iteration 1.756s
