In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from sklearn import preprocessing
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA
from sklearn import metrics
from numpy import load
from keras.utils import to_categorical
from sklearn.metrics import classification_report
from keras.callbacks import EarlyStopping
from sklearn.cluster import Birch

In [None]:
url = 'C:/Users/iqbal/Desktop/CogWear/'

In [None]:
Xtrain_list = ['load npy files']

Ytrain_list = ['load npy files']

In [None]:
Xtest_list = ['load npy files']

Ytest_list = ['load npy files']

In [None]:
X_train = load(url + Xtrain_list[0])

for i in range(len(Xtrain_list) - 1):
    data = load(url + Xtrain_list[i + 1])
    X_train = np.append(X_train, data, axis = 0)

In [None]:
X_test = load(url + Xtest_list[0])

for i in range(len(Xtest_list) - 1):
    data = load(url + Xtest_list[i + 1])
    X_test = np.append(X_test, data, axis = 0)

In [None]:
y_train = load(url + Ytrain_list[0])

for i in range(len(Ytrain_list) - 1):
    data = load(url + Ytrain_list[i + 1])
    y_train = np.append(y_train, data, axis = 0)

In [None]:
y_test = load(url + Ytest_list[0])

for i in range(len(Ytest_list) - 1):
    data = load(url + Ytest_list[i + 1])
    y_test = np.append(y_test, data, axis = 0)

In [None]:
y_train = np.select([y_train == 'baseline', y_train == 'cognitive_load'], [0, 1], y_train)

In [None]:
y_test = np.select([y_test == 'baseline', y_test == 'cognitive_load'], [0, 1], y_test)

In [None]:
Y_train = to_categorical(y_train)

In [None]:
Y_test = to_categorical(y_test)

In [None]:
le = preprocessing.LabelEncoder()

In [None]:
yt_train = le.fit_transform(y_train)

In [None]:
yt_test = le.fit_transform(y_test)

# Parameters to test
### Number of Features: 64, 128, 256
### Number of Epochs: 10, 20
### Batch Size: 30, 60
### Number of Clusters: 2, 4, 6

In [None]:
from datetime import datetime
logdir="logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

In [None]:
model = tf.keras.Sequential()
model.add(layers.Conv1D(16, 3, padding='same', activation=tf.keras.layers.LeakyReLU(), input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(layers.MaxPool1D(3, strides=2, padding='same'))
model.add(layers.Conv1D(32, 3, padding='same', activation=tf.keras.layers.LeakyReLU()))
model.add(layers.MaxPool1D(3, strides=2, padding='same'))
model.add(layers.Conv1D(64, 3, padding='same', activation=tf.keras.layers.LeakyReLU()))
model.add(layers.MaxPool1D(3, strides=2, padding='same'))
model.add(layers.Conv1D(128, 3, padding='same', activation=tf.keras.layers.LeakyReLU()))
model.add(layers.MaxPool1D(3, strides=2, padding='same'))
model.add(layers.Conv1D(256, 3, padding='same', activation=tf.keras.layers.LeakyReLU()))
model.add(layers.MaxPool1D(3, strides=2, padding='same'))
model.add(layers.Conv1D(512, 3, padding='same', activation=tf.keras.layers.LeakyReLU()))
model.add(layers.MaxPool1D(3, strides=2, padding='same'))
model.add(layers.Conv1D(256, 3, padding='same', activation=tf.keras.layers.LeakyReLU()))
model.add(layers.MaxPool1D(3, strides=2, padding='same'))
model.add(layers.Conv1D(128, 3, padding='same', activation=tf.keras.layers.LeakyReLU()))
model.add(layers.MaxPool1D(3, strides=2, padding='same'))
model.add(layers.Dropout(0.8))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation=tf.keras.layers.LeakyReLU()))
model.add(layers.Dense(2, activation='softmax'))

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, Y_train, epochs = 30, batch_size = 900, validation_data= (X_test, Y_test), callbacks=[tensorboard_callback])

feature_extractor = tf.keras.Model(inputs=model.inputs, outputs=model.layers[-2].output)
features_train_f = feature_extractor.predict(X_train)
features_test_f = feature_extractor.predict(X_test)

In [None]:
%tensorboard --logdir logs

In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model)

In [None]:
import tensorboard

In [None]:

dot_img_file = 'tmp/model_1.png'
tf.keras.utils.plot_model(model, to_file=dot_img_file, show_shapes=True)

In [None]:
model.summary()

In [None]:
import visualkeras
from PIL import ImageFont

In [None]:
visualkeras.layered_view(model, legend=True, font=ImageFont.truetype("arial.ttf", 12), draw_volume=True,spacing=30,)

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')

In [None]:
scaled_features_train = preprocessing.normalize(features_train_f)
scaled_features_test = preprocessing.normalize(features_test_f)

In [None]:
kmeans = KMeans(n_clusters = 2)
clusters_kmean = kmeans.fit(scaled_features_train)

In [None]:
y_pred = kmeans.predict(scaled_features_test)

In [None]:
y_pred_t = 1 - y_pred
y_pred = y_pred_t

In [None]:
print(f"Homogeneity: {metrics.homogeneity_score(yt_test, y_pred):.3f}")
print(f"Completeness: {metrics.completeness_score(yt_test, y_pred):.3f}")
print(f"V-measure: {metrics.v_measure_score(yt_test, y_pred):.3f}")
print(f"Adjusted Rand Index: {metrics.adjusted_rand_score(yt_test, y_pred):.3f}")
print(f"Adjusted Mutual Information: {metrics.adjusted_mutual_info_score(yt_test, y_pred):.3f}")
print(f"Silhouette Coefficient: {metrics.silhouette_score(scaled_features_test, y_pred):.3f}")
print(f"Inertia: {kmeans.inertia_:.3f}")

In [None]:
print(classification_report(yt_test, y_pred))

In [None]:
from scipy.spatial.distance import cdist
import numpy as np

class ClusterSimilarityMatrix():

    def __init__(self) -> None:
        self._is_fitted = False

    def fit(self, y_clusters):
        if not self._is_fitted:
            self._is_fitted = True
            self.similarity = self.to_binary_matrix(y_clusters)
            return self

        self.similarity += self.to_binary_matrix(y_clusters)

    def to_binary_matrix(self, y_clusters):
        y_reshaped = np.expand_dims(y_clusters, axis=-1)
        return (cdist(y_reshaped, y_reshaped, 'cityblock')==0).astype(int)


class EnsembleCustering():
    def __init__(self, base_estimators, aggregator, distances=False):
        self.base_estimators = base_estimators
        self.aggregator = aggregator
        self.distances = distances

    def fit(self, X):
        X_ = X.copy()

        clt_sim_matrix = ClusterSimilarityMatrix()
        for model in self.base_estimators:
            clt_sim_matrix.fit(model.fit_predict(X=X_))

        sim_matrix = clt_sim_matrix.similarity
        self.cluster_matrix = sim_matrix/sim_matrix.diagonal()

        if self.distances:
            self.cluster_matrix = np.abs(np.log(self.cluster_matrix + 1e-8)) # Avoid log(0)

    def fit_predict(self, X):
        self.fit(X)
        y = self.aggregator.fit_predict(self.cluster_matrix)
        return y

In [None]:
from sklearn.cluster import MiniBatchKMeans, KMeans, SpectralClustering, Birch, BisectingKMeans, AgglomerativeClustering, DBSCAN, MeanShift, OPTICS
from scipy.sparse.csgraph import connected_components
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [None]:
NUM_KMEANS = 100

clustering_models = NUM_KMEANS*[
    MiniBatchKMeans(n_clusters=16, n_init=1, max_iter=100)
]
aggregator_clt = SpectralClustering(n_clusters=2, affinity="precomputed")

ens_clt=EnsembleCustering(clustering_models, aggregator_clt)
y_ensemble = ens_clt.fit_predict(scaled_features_test)

In [None]:
y_ensemble = 1 - y_ensemble

In [None]:
print(f"Homogeneity: {metrics.homogeneity_score(yt_test, y_ensemble):.3f}")
print(f"Completeness: {metrics.completeness_score(yt_test, y_ensemble):.3f}")
print(f"V-measure: {metrics.v_measure_score(yt_test, y_ensemble):.3f}")
print(f"Adjusted Rand Index: {metrics.adjusted_rand_score(yt_test, y_ensemble):.3f}")
print(f"Adjusted Mutual Information: {metrics.adjusted_mutual_info_score(yt_test, y_ensemble):.3f}")
print(f"Silhouette Coefficient: {metrics.silhouette_score(scaled_features_test, y_ensemble):.3f}")
print(f"Inertia: {kmeans.inertia_:.3f}")

In [None]:
print(classification_report(yt_test, y_ensemble))

In [None]:
from sklearn.cluster import AgglomerativeClustering
f = AgglomerativeClustering(n_clusters=2, metric='manhattan', linkage='complete')
from sklearn.cluster import Birch
brc = Birch(threshold=0.6, branching_factor=128, n_clusters=f)

In [None]:
brc.fit(scaled_features_train)
y_pred = brc.predict(scaled_features_test)

In [None]:
y_pred_t = 1 - y_pred
y_pred = y_pred_t

In [None]:
print(f"Homogeneity: {metrics.homogeneity_score(yt_test, y_pred):.3f}")
print(f"Completeness: {metrics.completeness_score(yt_test, y_pred):.3f}")
print(f"V-measure: {metrics.v_measure_score(yt_test, y_pred):.3f}")
print(f"Adjusted Rand Index: {metrics.adjusted_rand_score(yt_test, y_pred):.3f}")
print(f"Adjusted Mutual Information: {metrics.adjusted_mutual_info_score(yt_test, y_pred):.3f}")
print(f"Silhouette Coefficient: {metrics.silhouette_score(scaled_features_test, y_pred):.3f}")
print(f"Inertia: {kmeans.inertia_:.3f}")

In [None]:
print(classification_report(yt_test, y_pred))

In [None]:
kmeans = KMeans(n_clusters = 2, random_state = 0)
clusters_kmean = kmeans.fit_predict(scaled_features_train)

In [None]:
labels = kmeans.labels_

In [None]:
reduced_data = PCA(n_components = 2).fit_transform(scaled_features_train)

results = pd.DataFrame(reduced_data, columns=['pca1', 'pca2'])
results['label'] = labels

sns.scatterplot(x = results.pca1, y = results.pca2, hue = results.label, data = reduced_data)
plt.show()

In [None]:
reduced_data = PCA(n_components = 2).fit_transform(scaled_features_test)

results = pd.DataFrame(reduced_data, columns=['pca1', 'pca2'])
results['label'] = y_pred

sns.scatterplot(x = results.pca1, y = results.pca2, hue = results.label, data = reduced_data)
plt.show()

In [None]:
correct_labels = sum(yt == labels)
print("Result: %d out of %d samples were correctly labeled." % (correct_labels, yt.size))

In [None]:
y_pred = labels
for i in range(len(y_pred)):
    if y_pred[i] != 0:
        y_pred[i] = 1

In [None]:
correct_labels = sum(yt == y_pred)
print("Result: %d out of %d samples were correctly labeled." % (correct_labels, yt.size))

In [None]:
print(f"Homogeneity: {metrics.homogeneity_score(yt_test, y_pred):.3f}")
print(f"Completeness: {metrics.completeness_score(yt_test, y_pred):.3f}")
print(f"V-measure: {metrics.v_measure_score(yt_test, y_pred):.3f}")
print(f"Adjusted Rand Index: {metrics.adjusted_rand_score(yt_test, y_pred):.3f}")
print(f"Adjusted Mutual Information: {metrics.adjusted_mutual_info_score(yt_test, y_pred):.3f}")
print(f"Silhouette Coefficient: {metrics.silhouette_score(scaled_features_test, y_pred):.3f}")
print(f"Inertia: {kmeans.inertia_:.3f}")

In [None]:
print(classification_report(yt_test, y_pred))