In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
from pathlib import Path
from scipy.spatial.distance import pdist
from sklearn.decomposition import PCA
from sklearn.cluster import DBSCAN, KMeans, MeanShift
from social_dynamics import autoencoder_utils
import tensorflow as tf
import utils

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

root_path = utils.determine_root_path()

In [None]:
autoencoder_clustering_path = root_path.joinpath("large_autoencoder_clustering")
autoencoder_results_path = autoencoder_clustering_path.joinpath("autoencoders_results")
model_input_types = ["cnn-complete", "cnn-cut", "dnn-complete", "dnn-cut"]

results = utils.load_autoencoder_exploration_results(path=autoencoder_clustering_path,
                                                     model_input_types=model_input_types)

series_dir_path = Path("/home/htc/fmalerba/experiments_results/2_opt-h_luzie-alpha_beta_gamma_delta_expl-0.0001t")
datasets, n_agents, n_options = autoencoder_utils.load_all_datasets(series_dir=series_dir_path,
                                                                    downsampling=4)
y_trues = dict([(key, np.array(list(datasets[key].as_numpy_iterator()))[:, 1])
                for key in datasets])
del datasets

In [None]:
model_path = autoencoder_results_path.joinpath(
    utils.select_autoencoder_model(model_input_type='dnn-cut', results=results, mode='best', start=6, end=7))
#model_path = autoencoder_results_path.joinpath('cnn-cut-1')

model_input_type = "-".join(model_path.name.split('-')[:2])
batched_flag = "batched" in model_input_type

embeddings = np.load(model_path.joinpath("embeddings.npy"))
embeddings = np.reshape(embeddings, (embeddings.shape[0], -1))
y_pred = np.load(model_path.joinpath("predictions.npy"))

In [None]:
plt.figure(figsize=(20, 8))
plt.hist(pdist(embeddings, metric='euclidean'), bins=200)
plt.title(f'Distribution of embedding distances for {model_path.name}')
plt.show()

# K-Means Clustering

In [None]:
n_clusters = 4
kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(embeddings)
labels = kmeans.labels_
one_hot_encodings = tf.one_hot(indices=labels, depth=n_clusters).numpy()
pca_plotter = utils.PCAPlotter(X=embeddings, y=one_hot_encodings, classes=[str(i) for i in range(n_clusters)])
pca_plotter.plotPCA_3D()

# Mean shift Clustering

In [None]:
mean_shift = MeanShift(bandwidth=1, n_jobs=10).fit(embeddings)
labels = mean_shift.labels_
unique_labels, counts = np.unique(labels, return_counts=True)
n_clusters = unique_labels.size
print(n_clusters)
print(unique_labels)
print(counts/np.sum(counts))
one_hot_encodings = tf.one_hot(indices=labels, depth=n_clusters).numpy()
if n_clusters <= 5:
    pca_plotter = utils.PCAPlotter(X=embeddings, y=one_hot_encodings, classes=[str(i) for i in range(n_clusters)])
    pca_plotter.plotPCA_3D()

# DBSCAN Clustering

In [None]:
dbscan = DBSCAN(eps=6, n_jobs=10).fit(embeddings)
labels = dbscan.labels_
unique_labels, counts = np.unique(labels, return_counts=True)
n_clusters = unique_labels.size
print(n_clusters)
print(unique_labels)
print(counts/np.sum(counts))
one_hot_encodings = tf.one_hot(indices=labels, depth=n_clusters).numpy()
if n_clusters <= 5:
    pca_plotter = utils.PCAPlotter(X=embeddings, y=one_hot_encodings, classes=[str(i) for i in range(n_clusters)])
    pca_plotter.plotPCA_3D()

# PCA KMeans Clustering

In [None]:
n_clusters = 4
pca = PCA(n_components=3)
reduced_embeddings = pca.fit_transform(embeddings)

kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(reduced_embeddings)
labels = kmeans.labels_
one_hot_encodings = tf.one_hot(indices=labels, depth=n_clusters).numpy()
pca_plotter = utils.PCAPlotter(X=reduced_embeddings, y=one_hot_encodings, classes=[str(i) for i in range(n_clusters)])
pca_plotter.plotPCA_3D()

# Plot predictions from all clusters

In [None]:
n_to_sample = 10

fig, axes = plt.subplots(n_to_sample*n_options, 2*n_clusters, figsize=(40, 60), dpi=150)
for i in range(n_clusters):
    trues, preds = autoencoder_utils.select_predictions(mode='clusters', n_to_sample=n_to_sample, y_true=y_trues[model_input_type],
                                                        y_pred=y_pred, clusters=labels, selected_cluster=i)
    preds = np.transpose(preds, axes=(0,2,1)) if batched_flag else preds
    trues = np.transpose(trues, axes=(0,2,1)) if batched_flag else trues
    autoencoder_utils.plot_preds(axes=axes[:, 2*i:2*(i+1)],
                                 y_true=trues,
                                 y_pred=preds,
                                 n_agents=n_agents,
                                 n_options=n_options)

plt.show()

# Plot predictions from single cluster

In [None]:
trues, preds = autoencoder_utils.select_predictions(mode='clusters', n_to_sample=5, y_true=y_trues[model_input_type],
                                                    y_pred=y_pred, clusters=labels, selected_cluster=4)

fig, axes = plt.subplots(trues.shape[0]*n_options, 2, figsize=(10, 15))
plt.title(model_path.name)
autoencoder_utils.plot_preds(axes=axes,
                             y_true=trues,
                             y_pred=preds,
                             n_agents=n_agents,
                             n_options=n_options)
plt.tight_layout()
plt.show()