In [None]:
#K-Means
import sklearn
import keras
import sys
import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.cluster import MiniBatchKMeans
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, confusion_matrix, normalized_mutual_info_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, UpSampling2D, Activation
from keras import backend as K


(x_train, y_train),(x_test, y_test)=tf.keras.datasets.fashion_mnist.load_data()
x=np.concatenate((x_train, x_test))
y=np.concatenate((y_train, y_test))
print(x.shape)
x=x_train.reshape((x_train.shape[0],-1))
x=np.divide(x.astype(float),255)
kmeans = KMeans(n_clusters = 10)
kmeans.fit(x)

def infer_cluster_labels(kmeans, actual_labels):
    inferred_labels = {}
    for i in range(kmeans.n_clusters):
        labels = []
        index = np.where(kmeans.labels_ == i)
        labels.append(actual_labels[index])
        if len(labels[0]) == 1:
            counts = np.bincount(labels[0])
        else:
            counts = np.bincount(np.squeeze(labels))
        if np.argmax(counts) in inferred_labels:
            inferred_labels[np.argmax(counts)].append(i)
        else:
            inferred_labels[np.argmax(counts)] = [i]      
    return inferred_labels  

def infer_data_labels(X_labels, cluster_labels):
    predicted_labels = np.zeros(len(X_labels)).astype(np.uint8)    
    for i, cluster in enumerate(X_labels):
        for key, value in cluster_labels.items():
            if cluster in value:
                predicted_labels[i] = key
                
    return predicted_labels
    
n_clusters=10
cluster_labels = infer_cluster_labels(kmeans, y_train)
X_clusters = kmeans.predict(x)
predicted_labels = infer_data_labels(X_clusters, cluster_labels)
print(predicted_labels[:20])
print(y_train[:20])
from sklearn import metrics

def calculate_metrics(estimator, data, labels):   
    print('Homogeneity: {}'.format(metrics.homogeneity_score(labels, estimator.labels_)))
    print('Inertia: {}'.format(estimator.inertia_))
    print('Number of Clusters: {}'.format(estimator.n_clusters))
    
    
clusters = [10]
for n_clusters in clusters:
    estimator = MiniBatchKMeans(n_clusters = n_clusters)
    estimator.fit(x)
    calculate_metrics(estimator, x, y_train)
    cluster_labels = infer_cluster_labels(estimator, y_train)
    predicted_Y = infer_data_labels(estimator.labels_, cluster_labels)
    print('KMeans Accuracy: {}\n'.format(metrics.accuracy_score(y_train, predicted_Y)))

In [None]:
#Normalizing
x_train=x_train.reshape(-1,28,28,1)/255
x_test=x_test.reshape(-1,28,28,1)/255
x_train, x_validate, y_train, y_validate = train_test_split(x_train, y_train, test_size=0.2, stratify=y_train, random_state=123)

In [None]:
#Auto Encoder using K-Means clustering
from keras.layers import Input, Dense
from keras.models import Model
from keras.optimizers import SGD
model = Sequential()
model.add(Conv2D(14, kernel_size=3, padding='same', activation='relu', input_shape=(28,28,1)))
model.add(MaxPool2D((2,2), padding='same'))
model.add(Dropout(0.2))
model.add(Conv2D(7, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPool2D((2,2), padding='same'))
model.add(Dropout(0.2))
model.add(Conv2D(7, kernel_size=3, padding='same', activation='relu'))
model.add(UpSampling2D((2,2)))
model.add(Dropout(0.2))
model.add(Conv2D(14, kernel_size=3, padding='same', activation='relu'))
model.add(UpSampling2D((2,2)))
model.add(Dropout(0.2))
model.add(Conv2D(1, kernel_size=3, padding='same', activation='relu'))
model.compile(optimizer=SGD(0.01,0.9), loss="mse", metrics=['accuracy'])
model.summary()

In [None]:
history_AE=model.fit(x_train, x_train, epochs=10, batch_size=256, validation_data=(x_validate, x_validate), verbose=1)

In [None]:
def lossPlot():
    trainingLoss,=plt.plot(history_AE.history['loss'],"r--")
    validationLoss,=plt.plot(history_AE.history['val_loss'],"b--")
    q=plt.legend([trainingLoss,validationLoss],["Training Loss","Validation Loss"])
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Training Loss & Validation Loss vs Number of Epochs Graph")
    plt.show()
lossPlot()