In [None]:
import pathlib
import networkx
import pandas as pd
import drawing
import matplotlib.pyplot as plt
import seaborn as sb

from ODEs import ODEs
from clustering.ClusterSets.HierarchicalClusterSets import HierarchicalClusterSets
from clustering.ClusterSets.KMeansClusterSets import KMeansClusterSets
from clustering.Snapshots import Snapshots
from drawing.utils import display_name
from networks.TemporalNetwork import TemporalNetwork

## Declare parameters to use throughout

In [None]:
distance_metric = 'euclidean'
cluster_method = 'ward'
cluster_limit_type = 'maxclust'
cluster_limit_range = [2 + (1 * i) for i in range(0, 10)]

events = [(12 * i, '', 'dashed') for i in range(4 + 1)]
phases = [
    (0, 12, 'Day 1'),
    (12, 24, 'Night 1'),
    (24, 36, 'Day 2'),
    (36, 48, 'Night 2')]

output_directory = None

## Load our temporal network

Can be done in a number of ways; see constructors for `TemporalNetwork` for full list of options.

In [None]:
node_table_filepath = '../data/temporal_data/circadian/circadian_temporal_node_data_mean_48.csv'
static_network_filepath = '../data/static_networks/circadian_net.edgelist'

node_table = pd.read_csv(node_table_filepath, sep='\t', index_col=0)
static_network = networkx.read_edgelist(static_network_filepath)

temporal_network = TemporalNetwork.from_static_network_and_node_table_dataframe(
    static_network,
    node_table,
    combine_node_weights=lambda x, y: x*y,
    threshold=0.5,
    binary=False,
    normalise=True)
temporal_network_name = pathlib.Path(static_network_filepath).stem

## Compute cluster and silhouette data over a range of clusters

In [None]:
snapshots = Snapshots.from_temporal_network(temporal_network, cluster_method, distance_metric)
constructor = HierarchicalClusterSets if cluster_method != 'k_means' else KMeansClusterSets
cluster_sets = constructor(snapshots, cluster_limit_type, cluster_limit_range)

## Plot silhouette scores across range of clusters

In [None]:
# Plot
gridspec_kw = {"width_ratios": [3, 1, 2]}
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(9, 3), gridspec_kw=gridspec_kw, sharey=True)

cluster_sets.plot_with_average_silhouettes((ax1, ax2, ax3))
drawing.utils.adjust_margin(ax=ax1, bottom=0.2 if phases else 0.05, top=0.05)
ODEs.plot_events(events, ax=ax1)
ODEs.plot_phases(phases, ax=ax1, y_pos=0.075, ymax=0.15)

# Format
ax1.set_xlabel("Time")
ax1.set_xticks([6*i for i in range(9)])
ax1.set_axisbelow(True)
ax1.set_ylabel(display_name(cluster_sets.limit_type))

ax2.set_xlabel("Average silhouette")
ax2.set_xlim((-0.1, 1.1))
ax2.yaxis.set_tick_params(labelleft=True)

ax3.set_xlabel("Actual # clusters")
ax3.yaxis.set_tick_params(labelleft=True)

plt.subplots_adjust(wspace=0.4, top=0.8)
fig.suptitle(f"Hier. clust. method: '{cluster_method}' ({temporal_network_name})", weight='bold')

# Save
if output_directory is not None:
    filename = f"{output_directory}/phase_clusters_all_method_{cluster_method}_{temporal_network_name}"
    plt.savefig(f"{filename}.png", dpi=250, bbox_inches="tight")
    plt.savefig(f"{filename}.pdf", dpi=250, bbox_inches="tight")


## Plot silhouette samples across range of clusters

In [None]:
columns = 4
total_subplots = len(cluster_sets)
rows = (total_subplots // columns) + (0 if total_subplots % columns == 0 else 1)
fig, axs = plt.subplots(nrows=rows, ncols=columns, sharex=True, sharey=True, figsize=(10, 2 * rows))

# Plot
flat_axs = axs.flatten()
cluster_sets.plot_silhouette_samples(axs=axs)

# Format
flat_axs[0].set_ylim((snapshots.times[0], snapshots.times[-1]))
drawing.utils.adjust_margin(ax=flat_axs[0], top=0.05, bottom=0.05)

for i, cluster_set in enumerate(cluster_sets):
    title = f'{display_name(cluster_set.limit_type)} = {cluster_set.limit}\n({int(cluster_set.size)} clusters)'
    flat_axs[i].set_title(title)
    sb.despine(ax=flat_axs[i])

xlabel, ylabel = 'Silhouette score', 'Ordered time points'
drawing.utils.label_subplot_grid_with_shared_axes(rows, columns, total_subplots, xlabel, ylabel, fig, axs)
fig.suptitle(f"Sample silhouette, method: '{cluster_method}' ({temporal_network_name})", y=1.00, weight='bold')
plt.subplots_adjust(hspace=0.5)

# Save
if output_directory is not None:
    filename = f"{output_directory}/phase_clusters_silhouette_sample_method_{cluster_method}_{temporal_network_name}"
    plt.savefig(f"{filename}.png", dpi=250, bbox_inches="tight")
    plt.savefig(f"{filename}.pdf", dpi=250, bbox_inches="tight")