In [1]:
from time import time
import numpy as np
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.layers import Layer, InputSpec, Dense, Input
from tensorflow.python.keras.models import Model
from tensorflow.keras.optimizers import SGD
from tensorflow.python.keras import callbacks
from tensorflow.python.keras.initializers import VarianceScaling
from sklearn.cluster import KMeans
import metrics
from custom_layers import autoencoder as auto_encoder
from custom_layers import ClusteringLayer
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score
import cv2
import os, glob, shutil

In [2]:
# computing an auxiliary target distribution
def target_distribution(q):
    weight = q ** 2 / q.sum(0)
    return (weight.T / weight.sum(1)).T

In [3]:
x_train = [cv2.resize(cv2.imread(file), (224, 224)) for file in glob.glob("input/train/images/*.png")]
x_test = [cv2.resize(cv2.imread(file), (224, 224)) for file in glob.glob("input/test/images/*.png")]
print("Images loaded")

Images loaded


In [4]:
n_clusters = 5  # No. of clusters
x = np.concatenate((x_train, x_test))
x = x.reshape((x.shape[0], -1))
x = np.divide(x, 255.)

In [5]:
# dim[0] = og dimensions (w * h) dim[1:] = layers of auto encoder. 
# Numbers equal the shape the encoder forces the data into in the layer.
dims = [x.shape[-1], 500, 500, 2000, 10]
init = VarianceScaling(scale=1. / 3., mode='fan_in', distribution='uniform')
pretrain_optimizer = SGD(learning_rate=1, momentum=0.9)
pretrain_epochs = 50
batch_size = 800
save_dir = './weights'

In [6]:
autoencoder, encoder = auto_encoder(dims, init=init)
autoencoder.compile(optimizer=pretrain_optimizer, loss='mse')
autoencoder.fit(x, x, batch_size=batch_size, epochs=pretrain_epochs)  # , callbacks=cb)
autoencoder.save_weights(save_dir + '/ae_weights.h5')

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [7]:
autoencoder.save_weights(save_dir + '/ae_weights.h5')

In [8]:
autoencoder.load_weights(save_dir + '/ae_weights.h5')

In [9]:
clustering_layer = ClusteringLayer(n_clusters, name='clustering')(encoder.output)
model = Model(inputs=encoder.input, outputs=clustering_layer)
model.compile(optimizer=SGD(0.01, 0.9), loss='kld')

In [10]:
kmeans = KMeans(n_clusters=n_clusters, n_init=20, verbose=True)
y_pred = kmeans.fit_predict(encoder.predict(x))
y_pred_last = np.copy(y_pred)
model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])

Initialization complete
start iteration
done sorting
end inner loop
Iteration 0, inertia 38.513264
start iteration
done sorting
end inner loop
Iteration 1, inertia 37.109695
start iteration
done sorting
end inner loop
Iteration 2, inertia 36.854797
start iteration
done sorting
end inner loop
Iteration 3, inertia 36.806725
start iteration
done sorting
end inner loop
Iteration 4, inertia 36.802864
center shift 3.998117e-06 within tolerance 8.005982e-06
Initialization complete
start iteration
done sorting
end inner loop
Iteration 0, inertia 58.556786
start iteration
done sorting
end inner loop
Iteration 1, inertia 56.123913
start iteration
done sorting
end inner loop
Iteration 2, inertia 55.45099
start iteration
done sorting
end inner loop
Iteration 3, inertia 55.283646
start iteration
done sorting
end inner loop
Iteration 4, inertia 55.228653
start iteration
done sorting
end inner loop
Iteration 5, inertia 55.209385
start iteration
done sorting
end inner loop
Iteration 6, inertia 55.2055

Iteration 9, inertia 36.83713
start iteration
done sorting
end inner loop
Iteration 10, inertia 36.826477
center shift 3.929385e-06 within tolerance 8.005982e-06
Initialization complete
start iteration
done sorting
end inner loop
Iteration 0, inertia 41.779846
start iteration
done sorting
end inner loop
Iteration 1, inertia 38.664898
start iteration
done sorting
end inner loop
Iteration 2, inertia 37.821346
start iteration
done sorting
end inner loop
Iteration 3, inertia 37.02935
start iteration
done sorting
end inner loop
Iteration 4, inertia 36.84849
start iteration
done sorting
end inner loop
Iteration 5, inertia 36.82211
start iteration
done sorting
end inner loop
Iteration 6, inertia 36.805622
start iteration
done sorting
end inner loop
Iteration 7, inertia 36.804268
center shift 2.535639e-06 within tolerance 8.005982e-06
Initialization complete
start iteration
done sorting
end inner loop
Iteration 0, inertia 47.449253
start iteration
done sorting
end inner loop
Iteration 1, inert

In [11]:
loss = 0
index = 0
maxiter = 8000
update_interval = 140
index_array = np.arange(x.shape[0])
tol = 0.001
y = None

In [12]:

for ite in range(int(maxiter)):
    if ite % update_interval == 0:
        q = model.predict(x, verbose=0)
        p = target_distribution(q)  # update the auxiliary target distribution p

        # evaluate the clustering performance
        y_pred = q.argmax(1)
        if y is not None:
            acc = np.round(metrics.acc(y, y_pred), 5)
            nmi = np.round(metrics.nmi(y, y_pred), 5)
            ari = np.round(metrics.ari(y, y_pred), 5)
            loss = np.round(loss, 5)
            print('Iter %d: acc = %.5f, nmi = %.5f, ari = %.5f' % (ite, acc, nmi, ari), ' ; loss=', loss)

        # check stop criterion - model convergence
        delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0]
        y_pred_last = np.copy(y_pred)
        if ite > 0 and delta_label < tol:
            print('delta_label ', delta_label, '< tol ', tol)
            print('Reached tolerance threshold. Stopping training.')
            break
    idx = index_array[index * batch_size: min((index + 1) * batch_size, x.shape[0])]
    loss = model.train_on_batch(x=x[idx], y=p[idx])
    index = index + 1 if (index + 1) * batch_size <= x.shape[0] else 0

model.save_weights(save_dir + '/DEC_model_final.h5')

KeyboardInterrupt: 

In [16]:
paths = glob.glob("input/train/images/*.png")
print("paths found")
for i in range(n_clusters):
    shutil.rmtree("output\cluster" + str(i))
    os.makedirs("output\cluster" + str(i))
    print("output\cluster" + str(i))
print("Moving Images")
for i in range(len(paths)):
    shutil.copy2(paths[i], "output/cluster"+str(y_pred_last[i]))
    print(str(i) + "/" + str(len(paths)) + " Images Copied")

paths found
output\cluster0
output\cluster1
output\cluster2
output\cluster3
output\cluster4
Moving Images
0/800 Images Copied
1/800 Images Copied
2/800 Images Copied
3/800 Images Copied
4/800 Images Copied
5/800 Images Copied
6/800 Images Copied
7/800 Images Copied
8/800 Images Copied
9/800 Images Copied
10/800 Images Copied
11/800 Images Copied
12/800 Images Copied
13/800 Images Copied
14/800 Images Copied
15/800 Images Copied
16/800 Images Copied
17/800 Images Copied
18/800 Images Copied
19/800 Images Copied
20/800 Images Copied
21/800 Images Copied
22/800 Images Copied
23/800 Images Copied
24/800 Images Copied
25/800 Images Copied
26/800 Images Copied
27/800 Images Copied
28/800 Images Copied
29/800 Images Copied
30/800 Images Copied
31/800 Images Copied
32/800 Images Copied
33/800 Images Copied
34/800 Images Copied
35/800 Images Copied
36/800 Images Copied
37/800 Images Copied
38/800 Images Copied
39/800 Images Copied
40/800 Images Copied
41/800 Images Copied
42/800 Images Copied
4

383/800 Images Copied
384/800 Images Copied
385/800 Images Copied
386/800 Images Copied
387/800 Images Copied
388/800 Images Copied
389/800 Images Copied
390/800 Images Copied
391/800 Images Copied
392/800 Images Copied
393/800 Images Copied
394/800 Images Copied
395/800 Images Copied
396/800 Images Copied
397/800 Images Copied
398/800 Images Copied
399/800 Images Copied
400/800 Images Copied
401/800 Images Copied
402/800 Images Copied
403/800 Images Copied
404/800 Images Copied
405/800 Images Copied
406/800 Images Copied
407/800 Images Copied
408/800 Images Copied
409/800 Images Copied
410/800 Images Copied
411/800 Images Copied
412/800 Images Copied
413/800 Images Copied
414/800 Images Copied
415/800 Images Copied
416/800 Images Copied
417/800 Images Copied
418/800 Images Copied
419/800 Images Copied
420/800 Images Copied
421/800 Images Copied
422/800 Images Copied
423/800 Images Copied
424/800 Images Copied
425/800 Images Copied
426/800 Images Copied
427/800 Images Copied
428/800 Im

762/800 Images Copied
763/800 Images Copied
764/800 Images Copied
765/800 Images Copied
766/800 Images Copied
767/800 Images Copied
768/800 Images Copied
769/800 Images Copied
770/800 Images Copied
771/800 Images Copied
772/800 Images Copied
773/800 Images Copied
774/800 Images Copied
775/800 Images Copied
776/800 Images Copied
777/800 Images Copied
778/800 Images Copied
779/800 Images Copied
780/800 Images Copied
781/800 Images Copied
782/800 Images Copied
783/800 Images Copied
784/800 Images Copied
785/800 Images Copied
786/800 Images Copied
787/800 Images Copied
788/800 Images Copied
789/800 Images Copied
790/800 Images Copied
791/800 Images Copied
792/800 Images Copied
793/800 Images Copied
794/800 Images Copied
795/800 Images Copied
796/800 Images Copied
797/800 Images Copied
798/800 Images Copied
799/800 Images Copied
