In [None]:
import pathlib
import networkx
import matplotlib.pyplot as plt
import seaborn as sb
import pandas as pd
import matplotlib.cm as cm

from utils import drawing, paths
from temporal.TemporalData import TemporalData
from clustering.Snapshots import Snapshots
from clustering.ClusterSet.HierarchicalClusterSet import HierarchicalClusterSet
from clustering.ClusterSet.KMeansClusterSet import KMeansClusterSet
from networks.TemporalNetwork import TemporalNetwork
from utils.plotting import plot_events, plot_phases

## Declare parameters to use throughout

In [None]:
distance_metric = 'euclidean'
cluster_method = 'ward'
cluster_limit_type = 'maxclust'

output_directory = '../data/output'

# Circadian rhythms parameters
cluster_limit = 6
events = [(24 + 12 * i, '', 'dashed') for i in range(4)]
phases = [
    (18, 24, 'N'),
    (24, 36, 'Day'),
    (36, 48, 'Night'),
    (48, 60, 'Day'),
    (60, 66, 'N')]
time_ticks = [18 + 6*i for i in range(8 + 1)]
variables_to_plot = []

# # Cell cycle parameters
# cluster_limit = 5
# events = [
#     (5, 'START', 'dashed'),
#     (33, 'bud', 'solid'),
#     (36, 'ori', 'solid'),
#     (70, 'E3', 'dashed'),
#     (84, 'spn', 'solid'),
#     (100, 'mass', 'solid')]
# phases = [
#     (0, 35, 'G1'),
#     (35, 70, 'S'),
#     (70, 78, 'G2'),
#     (78, 100, 'M')]
# variables_to_plot = ['cln3', 'cln2', 'clb5', 'clb2','mass']
# time_ticks = None

## Load temporal network

Can be done in a number of ways; see constructors for TemporalNetwork for full list of options.

In [None]:
# Circadian rhythms network
node_table_filepath = '../data/temporal_data/circadian/circadian_temporal_node_data_24_hour_genes_mean_normalised_full.csv'
static_network_filepath = '../data/static_networks/circadian_full.edgelist'

node_table = pd.read_csv(node_table_filepath, sep='\t', index_col=0)
static_network = networkx.read_edgelist(static_network_filepath, delimiter=', ')

variables_to_plot = node_table.columns.values.tolist()
threshold = 0.0
binary = False
normalise = None

temporal_network = TemporalNetwork.from_static_network_and_node_table_dataframe(
    static_network,
    node_table,
    combine_node_weights=lambda x, y: x*y,
    threshold=threshold,
    binary=binary,
    normalise=normalise,
    static_edges_default=None)
temporal_network_name = pathlib.Path(node_table_filepath).stem

# # Cell cycle network
# temporal_network_filepath = '../data/temporal_networks/cell_cycle/tedges_combined_weighted_binary_method_percentage_p_0.5_clean2.tedges'
# temporal_network_separator = '\\s*\\t\\s*'
#
# edges = pd.read_csv(temporal_network_filepath, sep=temporal_network_separator, engine='python')
# temporal_network = TemporalNetwork.from_edge_list_dataframe(edges)
# temporal_network_name = pathlib.Path(temporal_network_filepath).stem

## Load temporal data

e.g. via solving system of ODEs, or by loading directly; see TemporalData constructors.

In [None]:
# Circadian temporal data
node_table.sort_index(inplace=True)
series_names = node_table.columns.values.tolist()
all_series = node_table.to_numpy(copy=True)
true_times = node_table.index.values
start_time = true_times[0]
times = true_times if start_time == 0 else true_times - start_time
temporal_data = TemporalData(all_series, series_names, times, true_times)

# # Cell cycle temporal data
# ode_filepath = '../data/ODEs/bychen04_xpp.ode'
# xpp_alias = 'xppmac64'
# start_time = int(temporal_network.true_times[0])
# end_time = 1 + int(temporal_network.true_times[-1])
# temporal_data = TemporalData.from_ODEs(ode_filepath, start_time, end_time, xpp_alias)

## Compute single set of clusters

In [None]:
snapshots = Snapshots.from_temporal_network(temporal_network, cluster_method, distance_metric)
constructor = HierarchicalClusterSet if cluster_method != 'k_means' else KMeansClusterSet
cluster_set = constructor(snapshots, cluster_limit_type, cluster_limit)

## Plot dendrogram, scatter graph and ODE variables

In [None]:
drawing.configure_sch_color_map(cm.get_cmap('tab10'))
sb.set_palette('Dark2', n_colors=8)
norm = True
fig = plt.figure(figsize=(8, 6))

# Plot
if cluster_method != 'k_means':
    ax1 = fig.add_subplot(3, 1, 1)
    cluster_set.plot_dendrogram(ax=ax1)

ax2 = fig.add_subplot(3, 1, 2)
cluster_set.plot(ax=ax2)
plot_events(events, ax=ax2, text_y_pos=0.005, text_x_offset=1)
plot_phases(phases, ax=ax2, y_pos=0.15, ymax=0.3)

ax3 = fig.add_subplot(3, 1, 3)
temporal_data.plot_series(variables_to_plot, ax=ax3, norm=norm, add_labels=False)

# Format
if cluster_method != 'k_means':
    ax1.set_ylabel('Distance threshold')
    ax1.set_xlabel("Times")
    ax1.set_title('Dendrogram: hierarchical clustering of snapshots', weight="bold")

title = f"Phases extracted by '{cluster_method}' clustering with '{cluster_limit_type}' = {cluster_limit}"
ax2.set_title(title, weight="bold")
ax2.set_yticks([])
sb.despine(ax=ax2, left=True)
ax2.grid(axis='x')
if time_ticks:
    ax2.set_xticks(time_ticks)

ax3.set_xlabel('Time')
ax3.set_ylabel('Concentration (normed)' if norm else 'Concentration')
if time_ticks:
    ax3.set_xticks(time_ticks)
sb.despine(ax=ax3)
ax3.autoscale()

ax2.get_shared_x_axes().join(ax2, ax3)
fig.tight_layout()

# Save
if output_directory is not None:
    filename = f"{output_directory}/phases_from_clustering_{paths.slugify(temporal_network_name)}"
    plt.savefig(f"{filename}.png", dpi=250, bbox_inches="tight")

## Plot distance matrix heatmap

In [None]:
fig, ax = plt.subplots(figsize=(9, 9))
snapshots.distance_matrix.plot_heatmap(ax=ax, triangular=False)
title = f'Snapshots distance matrix heatmap'
title += f'\n{temporal_network_name}'
title += f'\nThreshold={threshold}, Binary={binary}'
ax.set_title(title, weight='bold')

# Save
if output_directory is not None:
    filename = f"{output_directory}/distance_matrix_heatmap_{paths.slugify(temporal_network_name)}"
    plt.savefig(f"{filename}.png", dpi=250, bbox_inches="tight")