# Tutorial for AFMpy.REC.HierarchicalDSC

## Imports

In [None]:
# Standard library imports
import json
import logging

# Third party imports
import matplotlib.pyplot as plt

# AFMpy imports
from AFMpy import Stack, DL, Plotting, REC

## Configure Logging

Each module in AFMpy contains logging to for debugging purposes via the default python logging library. Logging for the modules should always be configured at the application level. Included in these tutorials are example logging configuration files that can be loaded with the following functions. You may adjust these logging configuration files as you see fit.

In [None]:
# Load the preconfigured logging settings
with open('logs/HierarchicalDSC_Tutorial_LoggingConfig.json', 'r') as f:
    LOGGING_CONFIG = json.load(f)

# Set up the logging configuration
logging.config.dictConfig(LOGGING_CONFIG)

## Matplotlib Config

Included within the ```Plotting``` module are functions for creating the high quality figures. A default configuration that matches the figures in the publication is activated by running the following function.

In [None]:
Plotting.configure_formatting()

## Check if GPU is accessible by Tensorflow

This tutorial uses Tensorflow and Keras in its deep learning algorithms. The performance, especially for large image stacks is substantially degraded when not using the GPU, so it is highly reccomended to use the GPU if available. The helper function ```DL.is_gpu_available``` will check to see if Tensorflow has GPU access. 

In [None]:
# Check to see if the GPU is available
if DL.is_gpu_available():
    print('GPU is accessible by tensorflow.')
else:
    print('GPU is NOT accessible by tensorflow. If you want to use GPU, please check your AFMpy version and tensorflow installation.')

## Load the Stacks

Here we load the compressed pickle file of our stack. A comprehensive explanation of the loading functions is available in the ```LAFM``` tutorial.

In [None]:
# Set the filepath for the public key to verify the integrity of the stacks.
PUBLIC_KEY_FILEPATH = '../common/keys/Tutorial_Public.pub'

# Load the cytoplasmic and periplasmic stacks
cytoplasmic_stack = Stack.Stack.load_compressed_pickle(pickle_filepath = '../common/stacks/Example_AC-20-4.xz',
                                                       public_key_filepath = PUBLIC_KEY_FILEPATH)

periplasmic_stack = Stack.Stack.load_compressed_pickle(pickle_filepath = '../common/stacks/Example_AP-20-4.xz',
                                                       public_key_filepath = PUBLIC_KEY_FILEPATH)

## Prepare the Convolutional Autoencoders

In [None]:
# Determine the input shape for the CAE. It should be (width,height,channels). In our case (64,64,1)
cyto_input_shape = (*cytoplasmic_stack.images.shape[1:], 1)
peri_input_shape = (*periplasmic_stack.images.shape[1:], 1)

# Create the Convolutional Autoencoder models to train with our data.
cytoplasmic_CAE = DL.DefaultCAE(input_shape = cyto_input_shape)
periplasmic_CAE = DL.DefaultCAE(input_shape = peri_input_shape)

## Apply Hierarchical Deep Spectral Clustering

In [None]:
# Shuffle the stack so that the order of the images doesn't bias the training.
cytoplasmic_stack.shuffle()

# Apply Hierarchical DSC to the cytoplasmic stack.
cyto_clusters = REC.hierarchical_DSC(cytoplasmic_stack,
                                     cytoplasmic_CAE,
                                     k_neighbors = 7,
                                     max_iterations = 5,
                                     lafm_target_resolution = (96,96),
                                     lafm_sigma = 2.25,
                                     stability_threshold = 0.85,
                                     distinct_cluster_threshold = 0.65,
                                     min_cluster_size = 750)

# Calculate the mean images for each cluster
for cluster in cyto_clusters:
    cluster.calc_mean_image()
    cluster.calc_LAFM_image(target_resolution = (96, 96), sigma = 2.25)

In [None]:
fig, ax = plt.subplots(len(cyto_clusters), 2, figsize = (4, len(cyto_clusters) * 2))
for i, cluster in enumerate(cyto_clusters):
    ax[i, 0].imshow(cluster.mean_image, cmap = Plotting.LAFMcmap)
    ax[i, 1].imshow(cluster.LAFM_image, cmap = Plotting.LAFMcmap)

    ax[i,0].set_ylabel(f'Cluster {i}', fontsize = 14)
for axis in ax.ravel():
    axis.set_xticks([])
    axis.set_yticks([])
ax[0, 0].set_title('Mean Image', fontsize = 14)
ax[0, 1].set_title('LAFM Image', fontsize = 14)

In [None]:
# Shuffle the stack so that the order of the images doesn't bias the training.
periplasmic_stack.shuffle()

# Apply Hierarchical DSC to the cytoplasmic stack.
peri_clusters = REC.hierarchical_DSC(periplasmic_stack,
                                     periplasmic_CAE,
                                     k_neighbors = 7,
                                     max_iterations = 5,
                                     lafm_target_resolution = (96,96),
                                     lafm_sigma = 2.25,
                                     stability_threshold = 0.85,
                                     distinct_cluster_threshold = 0.65,
                                     min_cluster_size = 750)

# Calculate the mean images for each cluster
for cluster in peri_clusters:
    cluster.calc_mean_image()
    cluster.calc_LAFM_image(target_resolution = (96, 96), sigma = 2.25)

In [None]:
fig, ax = plt.subplots(len(peri_clusters), 2, figsize = (4, len(peri_clusters) * 2))
for i, cluster in enumerate(peri_clusters):
    ax[i, 0].imshow(cluster.mean_image, cmap = Plotting.LAFMcmap)
    ax[i, 1].imshow(cluster.LAFM_image, cmap = Plotting.LAFMcmap)

    ax[i,0].set_ylabel(f'Cluster {i}', fontsize = 14)
for axis in ax.ravel():
    axis.set_xticks([])
    axis.set_yticks([])
ax[0, 0].set_title('Mean Image', fontsize = 14)
ax[0, 1].set_title('LAFM Image', fontsize = 14)