In [1]:
import numpy as np
from tensorflow.python.keras.models import Model
from tensorflow.keras.optimizers import SGD
from tensorflow.python.keras.initializers import VarianceScaling
from sklearn.cluster import KMeans
from custom_layers import autoencoder as auto_encoder
from custom_layers import ClusteringLayer
import cv2
import os, glob, shutil

In [2]:
# computing an auxiliary target distribution
def target_distribution(q):
    weight = q ** 2 / q.sum(0)
    return (weight.T / weight.sum(1)).T

In [3]:
x_train = [cv2.resize(cv2.imread(file), (224, 224)) for file in glob.glob("input/train/images/*.png")]
x_test = [cv2.resize(cv2.imread(file), (224, 224)) for file in glob.glob("images/test/images/*.png")]
print("Images loaded")

Images loaded


In [4]:
n_clusters = 5  # No. of clusters
x = np.concatenate((x_train,x_train))
x = x.reshape((x.shape[0], -1))
x = np.divide(x, 255.)

In [5]:
# dim[0] = og dimensions (w * h) dim[1:] = layers of auto encoder. 
# Numbers equal the shape the encoder forces the data into in the layer.
dims = [x.shape[-1], 500, 500, 2000, 10]
init = VarianceScaling(scale=1. / 3., mode='fan_in', distribution='uniform')
pretrain_optimizer = SGD(learning_rate=1, momentum=0.9)
pretrain_epochs = 20
batch_size = 500
save_dir = './weights'

In [None]:
autoencoder, encoder = auto_encoder(dims, init=init)
autoencoder.compile(optimizer=pretrain_optimizer, loss='mse')
print("Starting training")
autoencoder.fit(x, x, batch_size=batch_size, epochs=pretrain_epochs)
autoencoder.save_weights(save_dir + '/ae_weights.h5')

Starting training
Epoch 1/20


In [None]:
autoencoder.save_weights(save_dir + '/ae_weights.h5')

In [None]:
autoencoder.load_weights(save_dir + '/ae_weights.h5')

In [None]:
clustering_layer = ClusteringLayer(n_clusters, name='clustering')(encoder.output)
model = Model(inputs=encoder.input, outputs=clustering_layer)
model.compile(optimizer=SGD(0.01, 0.9), loss='kld')

In [None]:
kmeans = KMeans(n_clusters=n_clusters, n_init=100, verbose=True, max_iter = 1000)
y_pred = kmeans.fit_predict(encoder.predict(x))
y_pred_last = np.copy(y_pred)
model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])

In [None]:
model.save_weights(save_dir + '/DEC_model_final.h5')

In [None]:
model.load_weights(save_dir + '/DEC_model_final.h5')

In [None]:
paths = glob.glob("input/train/images/*.png")
print("paths found")
for i in range(n_clusters):
    if os.path.exists("output/cluster" + str(i)):
        shutil.rmtree("output/cluster" + str(i))
        os.makedirs("output/cluster" + str(i))
    else:
        os.makedirs("output/cluster" + str(i))
    print("output/cluster" + str(i))
print("Moving Images")
for i in range(len(paths)):
    shutil.copy2(paths[i], "output/cluster"+str(y_pred_last[i]))
    print(str(i+1) + "/" + str(len(paths)) + " Images Copied")

In [None]:
for 
#x_train = [cv2.resize(cv2.imread(file), (224, 224)) for file in glob.glob("output/cluster0/*.png")]