# CellPLATO | Cell Plasticity Analysis Tool (Trackmate version)

Step 1: Fill in the config file!

Also, make sure your data is set up in the following two tiered format:

        Master
            ├── Condition 1
            │   ├── Replicate 1
            |   |       ├── tracks.h5
            │   ├── Replicate 2
            |   |       ├── tracks.h5            
            │   └── Replicate 3
            |           └── tracks.h5            
            │  
            └── Condition 2,
                ├── Replicate 1
                |       ├── tracks.h5
                ├── Replicate 2
                |       ├── tracks.h5            
                └── Replicate 3
                        └── tracks.h5    

<div class="alert alert-block alert-danger">
Set your kernel to 'cellPLATO' before continuing
</div>

<div class="alert alert-block alert-success">
<h2>1. Start by importing packages for cellPLATO</h1>
</div>

This includes cellPLATO itself, and all of the modules you will need

* Import these packages, checking that you have them
* We're also importing a lot of the modules in cellPLATO, if this cell runs successfully, you are good to go!

In [None]:
import cellPLATO as cp

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import imageio

import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
from pandas.plotting import scatter_matrix
from matplotlib import pyplot
import matplotlib.cm as cm
import plotly.graph_objects as go
import plotly.express as px
import re
import glob
import pandas as pd
from tqdm.notebook import tqdm
import numpy as np
import requests
import zipfile
import ipywidgets as widgets
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import numpy as np
import itertools
from matplotlib.gridspec import GridSpec
import requests


OVERWRITE_DATAFRAMES = True



# Import your experiment list

Check that the list generated in the next cell contains your conditions and replicates

In [None]:
# Get the experiment list from the experiments listed in the config 
exp_list = cp.populate_experiment_list()
display(exp_list)
print(cp.SAVED_DATA_PATH)

### Format the trackmate dataframe

In [None]:
merged_spots_df, spots_metadata = cp.load_and_populate(r'.*spots.*\.csv')

merged_tracks_df, tracks_metadata = cp.load_and_populate(r'.*tracks.*\.csv')

### Change the trackmate dataframe to the cellPLATO format

In [None]:
comb_df = cp.trackmate_to_cellPLATO(merged_spots_df)

<div class="alert alert-block alert-success">
<h2>2. Measurements of morphology and migration</h1>
</div>

### This cell does migration and morphology measurements for all of the cells at each timepoint 

In [None]:
comb_df, new_factors = cp.measurement_pipeline(comb_df, mixed=cp.MIXED_SCALING, factors_to_timeaverage = cp.ALL_FACTORS) 
display(new_factors)

# Returns a filtered dataframe, while also adding included column to comb_df
comb_df, filt_counts = cp.apply_filters(comb_df)

# Process a time-averaged DataFrame
tavg_df = cp.time_average_trackmate(comb_df)

In [None]:
OVERWRITE_DATAFRAMES = True

if OVERWRITE_DATAFRAMES:
    comb_df.to_csv(cp.SAVED_DATA_PATH + 'comb_df.csv', index=False)
    tavg_df.to_csv(cp.SAVED_DATA_PATH + 'tavg_df.csv', index=False)

In [None]:
# load the tavg_df from csv
comb_df = pd.read_csv(cp.SAVED_DATA_PATH + 'comb_df.csv')
tavg_df = pd.read_csv(cp.SAVED_DATA_PATH + 'tavg_df.csv')

### Make a preview plot of any of these factors

In [None]:
trackmate_factors = ['RADIUS',
                    #  'VISIBILITY',                   
                    # 'MANUAL_SPOT_COLOR',
                    'MEAN_INTENSITY_CH1',
                    'MEDIAN_INTENSITY_CH1',
                    'MIN_INTENSITY_CH1',
                    'MAX_INTENSITY_CH1',
                    'TOTAL_INTENSITY_CH1',
                    'STD_INTENSITY_CH1',
                    'CONTRAST_CH1',
                    'SNR_CH1',
                    'ELLIPSE_X0',
                    'ELLIPSE_Y0',
                    'ELLIPSE_MAJOR',
                    'ELLIPSE_MINOR',
                    'ELLIPSE_THETA',
                    'ELLIPSE_ASPECTRATIO',
                    'AREA',
                    'PERIMETER',
                    'CIRCULARITY',
                    'SOLIDITY',
                    'SHAPE_INDEX',
                     ]

In [None]:
f=cp.plots_of_differences_sns(tavg_df,factor='SHAPE_INDEX')
f.show()

In [None]:
# Use filt_df or comb_df depending on what you want to see
f=cp.multi_condition_timeplot(comb_df, factor='CONTRAST_CH1')
f.show()

### Optional: do filtering on the data (on top of what has been stated in the config file)

In [None]:
# User-defined filters in dict {factor:(min, max)}

data_filters = {
#   "speed": (10, 100),
  "area": (50, 10000),
#    "frame": (0, 450), # Warning: range will change if self-normalized
#   "ntpts": (12,1800)
}

# Returns a filtered dataframe, while also adding included column to comb_df
filt_df, filt_counts = cp.apply_filters(comb_df,how='any', filter_dict=data_filters)

fig = cp.visualize_filtering(filt_df, filt_counts)

# Plot all metrics

This cell makes comparative plots for every single metric and saves them in your output folder

* Plots of difference
* Timeplots of difference
* Marginal xy plots
* Simple bar plots
* Superplots - useful for comparing between replicates

<div class="alert alert-block alert-danger">
Check that you are happy with your extra filtering before continuing
Run the next cell on the filtered dataframe or the unfiltered dataframe once you are ready
</div>

In [None]:
# Outputs plots of all metrics for all factors
cp.comparative_visualization_pipeline(comb_df, num_factors=trackmate_factors) 

<div class="alert alert-block alert-success">
<h2>3. Definition of single timepoint behavioural clusters using UMAP and HDBSCAN</h1>
</div>

### Optional: for new datasets perform correlation analysis to understand which factors correlate to one another

This may aid in choosing the most important factors, aiding clustering

In [None]:
df_in = comb_df
cp.correlation_matrix_heatmap(df_in, factors = cp.ALL_FACTORS)

### Optional: use variance thresholder for further insight

In [None]:
chosen_dr_factors = cp.variance_threshold(comb_df, threshold_value=0.03)
chosen_dr_factors

### Optional: define a new list of dr_factors to use for UMAP

In [None]:
DR_FACTORS = trackmate_factors

## Perform UMAP and cluster analysis

### First, do UMAP, save the new df and plot the UMAP

Well separated clusters depend mostly on 1. the input factors and 2. the umap_nn setting

You can change both, depending on the nature of your data, in order to achieve a reasonable level of separation of clusters

In [None]:
###### User alterable parameters ######
tsne_perp=150
umap_nn = 30 #umap nearest neighbours
min_dist = 0.0 #umap minimum distance (usually keep this at 0 or very low)
n_components = 3 # number of umap dimensions to calculate
#######################################



dr_df = cp.dr_pipeline_multiUMAPandTSNE(comb_df, 
                    dr_factors=DR_FACTORS,
                    n_components = n_components,
                    umap_nn=umap_nn,
                    min_dist= min_dist,
                    scalingmethod = 'minmax',) # A number of scaling methods are available: 'choice', 'minmax', 'standard', 'robust', 'normalize', 'quantile', 'maxabs', 'yeo-johnson', 'box-cox'

dr_df.to_csv(cp.SAVED_DATA_PATH + 'dr_df.csv', index=False) # Saves the df

cp.plot_3D_scatter(dr_df, 'UMAP1', 'UMAP2', 'UMAP3', colorby='condition', ticks=False, identifier='dr_df' + '_byCONDITION_',dotsize = 0.01, alpha=0.1, markerscale = 100) #color = label or condition  

### Second, identify clusters and exemplar cells using HDBSCAN

In [None]:
##### User adjustable parameters #####
min_cluster_size = 1000
min_samples = 500
cluster_by = 'UMAPNDIM' # UMAPNDIM = default, clusters on UMAPs. NDIM = alternate, clusters on all dimensions
metric = 'euclidean' # See https://hdbscan.readthedocs.io/en/latest/api.html#hdbscan.HDBSCAN for options
#######################################

lab_dr_df, exemplar_df=cp.hdbscan_clustering(dr_df, min_cluster_size=min_cluster_size, min_samples=min_samples, cluster_by=cluster_by,  metric=metric)

lab_dr_df.name='lab_dr_df'
name = lab_dr_df.name

lab_dr_df.to_csv(cp.SAVED_DATA_PATH + 'lab_dr_df.csv', index=False)
exemplar_df.to_csv(cp.SAVED_DATA_PATH + 'exemplar_df.csv', index=False)

cp.plot_3D_scatter(lab_dr_df_30, 'UMAP1', 'UMAP2', 'UMAP3', colorby='label', ticks=False, identifier=name + '_byCLUSTERID___',dotsize = 0.01, alpha=0.1, markerscale = 100) #color = label or condition   


In [None]:
# cp.plot_3D_scatter_dev(exemplar_cell_tracks_df_500_gold, 'UMAP1', 'UMAP2', 'UMAP3', colorby='uniq_id', ticks=False, identifier='exemplar_cell_tracks_df_500_gold' + '_byCONDITION___',dotsize = 20, alpha=0.1, markerscale = 5) #color = label or condition  2
# cp.plot_3D_scatter_dev(tptlabel_dr_df, 'UMAP1', 'UMAP2', 'UMAP3', colorby='uniq_id', ticks=False, identifier='exemplar_cell_tracks_df_500_gold' + '_byCONDITION___',dotsize = 0.01, alpha=0.1, markerscale = 5) #color = label or condition  2

## Then plot the 'fingerprint' plot of percentage in each cluster per condition

In [None]:
# This is the new combo
cluster_purity_df = cp.purity_pointsinclusterspercondition(lab_dr_df) 
display(cluster_purity_df)
f = cp.purityplot_percentcluspercondition(lab_dr_df, cluster_purity_df) 

### Optional: explore the clusters with interactive 3D plot

In [None]:
cp.interactive_plot_3D_UMAP(df=lab_dr_df,colorby = 'Condition_shortlabel', symbolby = 'Condition_shortlabel', what = ' AllTimeUMAPwithclusters') # TavgUMAPwithclusters

### Optional: all other conditions colored grey, chosen condition in color

In [None]:
df=lab_dr_df

condlist = df['Condition_shortlabel'].unique().tolist() #get unique list of conditions from df
print(condlist) # show the condition list
# chosen_condition = '' #specify a chosen condition from the list
chosen_condition = condlist[0] # or choose the first one
print(chosen_condition)

cp.interactive_plot_3D_UMAP_chosen_condition(df, chosen_condition, opacity_grey=0.01, marker_size_all=2,) #change opacity and marker size to suit the data

### Optional: make UMAP plots colored by metric contributors - the more intense the color, the higher the contribution the metric to a cluster

In [None]:
# First one colors per metric
cp.plot_UMAP_subplots_coloredbymetricsorconditions(df_in=lab_dr_df, x= 'UMAP1', y= 'UMAP2', z = 'UMAP3', n_cols = 5, ticks=False, metrics = cp.ALL_FACTORS, scalingmethod='choice',
                                                   identifier='inferno', colormap='inferno', coloredbycondition = False, samplethedf = False)
#second one colors per condition
# cp.plot_UMAP_subplots_coloredbymetricsorconditions(df_in=tptlabel_dr_df, x= 'UMAP1', y= 'UMAP2', z = 'UMAP3', n_cols = 5, ticks=False, metrics = cp.ALL_FACTORS, scalingmethod='choice',
#                                                    identifier='inferno', colormap='inferno', coloredbycondition = True, samplethedf = False)

### Perform UMAP then HDBSCAN on the tavg_df

### at the moment, just do this step as it is needed for compatibility later on

In [None]:
tsne_perp=150
umap_nn = 20#4#60
min_dist = 0.0 #0.15 
n_components = 3

tavg_dr_df = cp.dr_pipeline_multiUMAPandTSNE(tavg_df, 
                    dr_factors=new_DR_FACTORS,# new_DR_FACTORS # DR_FACTORS #only_tmeans # cp.DR_FACTORS
                    n_components = n_components,
                    umap_nn=umap_nn,
                    min_dist= min_dist,
                    scalingmethod = 'choice',) # log2minmax # powertransformer #minmax

lab_tavg_dr_df, exemplar_tavg_df=cp.hdbscan_clustering(tavg_dr_df, min_cluster_size=50,min_samples=50,cluster_by='UMAPNDIM',  metric='euclidean', plot=False) # 
 

<div class="alert alert-block alert-danger">
Save your dataframes so you can come back to this step if necessary
</div>

In [None]:
OVERWRITE_DATAFRAMES = True

if OVERWRITE_DATAFRAMES:
    tavg_dr_df.to_csv(cp.SAVED_DATA_PATH + 'tavg_dr_df.csv', index=False)
    lab_tavg_dr_df.to_csv(cp.SAVED_DATA_PATH + 'lab_tavg_dr_df.csv', index=False)
    exemplar_tavg_df.to_csv(cp.SAVED_DATA_PATH + 'exemplar_tavg_df.csv', index=False)

In [None]:
#Run this function to put the labels into the lab_tavg_lab_dr_df. Slow function. Can update search by uniq_id alone...

lab_tavg_lab_dr_df=cp.add_tavglabel_todf(lab_dr_df, lab_tavg_dr_df)
lab_tavg_lab_dr_df.to_csv(cp.SAVED_DATA_PATH + 'lab_tavg_lab_dr_df.csv', index=False)

## Quantify the plasticity 

In [None]:
tptlabel_dr_df = cp.count_cluster_changes_with_tavg(lab_tavg_lab_dr_df)
tptlabel_dr_df.to_csv(cp.SAVED_DATA_PATH + 'tptlabel_dr_df.csv', index=False)

### Plots of plasticity

In [None]:
tptlabel_dr_df = pd.read_csv(cp.SAVED_DATA_PATH + 'tptlabel_dr_df.csv')

In [None]:
df=tptlabel_dr_df
# all='\_allcells'
cp.plot_plasticity_changes(df, identifier='\_allcells', maxy=4) #problem with NaNs in the data

In [None]:
df=tptlabel_dr_df
cp.plot_plasticity_countplots(df, identifier='_allcells')

In [None]:
df=tptlabel_dr_df
cp.plot_cumulative_plasticity_changes_main(df, identifier='\_allcells', miny=None, maxy=None, t_window_multiplier = cp.T_WINDOW_MULTIPLIER, plotallcells = False)

# Disambiguate the clusters

### First, choose a number of exemplar cells to pick out from the exemplar cell list to display

In [None]:
# Choose a number of exemplars to display for each cluster
n=2
exemplar_df = exemplar_df.groupby('label').apply(lambda x: x.sample(min(n,len(x)))).reset_index(drop=True)

In [None]:
size=220 #

df= tptlabel_dr_df #from the all analysis part
exemp_df=exemplar_df #from the cluster analysis part.

top_dictionary, contributions_df_singletpoints, scaled_df=cp.contribution_to_clusters(df_in=tptlabel_dr_df,  howmanyfactors=3, dr_factors= newnew_DR_FACTORS) #BEFORE disambiguate_tavg(), then: lab_tavg_dr_df BEFORE disambiguate_timepoint(), then: #tptlabel_dr_df 
cp.plot_cluster_averages(top_dictionary, df, scaled_df)
result_df = cp.create_cluster_averages_table(top_dictionary, df, scaled_df)
cp.disambiguate_timepoint(df, exemp_df, scaled_df, top_dictionary=top_dictionary, XYRange=size,boxoff=True, trajectory = False) 

In [None]:

tptlabel_dr_df = pd.read_csv(cp.SAVED_DATA_PATH + 'tptlabel_dr_df.csv')
exemplar_df = pd.read_csv(cp.SAVED_DATA_PATH + 'exemplar_df.csv')

## Then, to visualize single cells with many timepoints, select cells with lots of timepoints

In [None]:
#### User inputs ####
whole_df = tptlabel_dr_df
exemplar_df = exemplar_df
numberofdesiredtimepoints = int(whole_df['ntpts'].mean())
# numberofdesiredtimepoints = 200
numberofcellspercluster = 40
num_clusters_whole_dataset = len(whole_df['label'].unique())

override = int((numberofcellspercluster*num_clusters_whole_dataset)*0.7)
#####################

exemplar_df_filt, exemplar_cell_tracks_df = cp.filter_exemplars(whole_df=whole_df, exemplar_df = exemplar_df, numberofdesiredtimepoints = numberofdesiredtimepoints, 
                                                                    numberofcellspercluster = numberofcellspercluster, override = override)

In [None]:
df=exemplar_cell_tracks_df
# cp.plot_cumulative_plasticity_changes_test2(df, identifier='\_exemplars_only_3_df__', miny=None, maxy=None, t_window_multiplier = 1, plotallcells = True) #deprecated, use the small multiples version
cp.plot_cumulative_plasticity_changes_main(df, identifier='\_exemplars_only_3_df__', miny=None, maxy=None, t_window_multiplier = 1, plotallcells = False)

### Plot any factor as small multiples from the exemplars

In [None]:
df = exemplar_cell_tracks_df
whichcolumntoplot = 'label'

cp.plot_small_multiples(df, whichcolumntoplot)

### Re'disambiguate the new exemplar df!

In [None]:
size=300 #

df= tptlabel_dr_df #from the all analysis part
exemp_df=exemplar_df #from the cluster analysis part.

top_dictionary, contributions_df_singletpoints, scaled_df=cp.contribution_to_clusters_topdictionary(df_in=tptlabel_dr_df,  howmanyfactors=10, dr_factors= newnew_DR_FACTORS) #BEFORE disambiguate_tavg(), then: lab_tavg_dr_df BEFORE disambiguate_timepoint(), then: #tptlabel_dr_df 
cp.plot_cluster_averages(top_dictionary, df, scaled_df)
cp.disambiguate_timepoint_dev(df, exemp_df, scaled_df, top_dictionary=top_dictionary, XYRange=size,boxoff=True) 

# Useful things

In [None]:
REGIONPROPS_LIST = ['area',
                    # 'bbox_area',
                    'eccentricity',
                    'equivalent_diameter',
                    # 'extent',
                    'filled_area',
                    'major_axis_length',
                    'minor_axis_length',
                    # 'orientation',
                    'perimeter',
                    #  'solidity'
                     ]

MIG_FACTORS = ['euclidean_dist',     
                'cumulative_length', 
                'speed',
                # 'orientedness', 
                # 'directedness',
                # 'turn_angle',
                'endpoint_dir_ratio',
                # 'dir_autocorr',
                'outreach_ratio',
                'MSD',                
                'max_dist',           
                # 'glob_turn_deg',
                'arrest_coefficient']

ADDITIONAL_FACTORS = ['aspect', 'rip_L'] # 'rip_p', 'rip_K', 


newnew_DR_FACTORS = REGIONPROPS_LIST + MIG_FACTORS + ADDITIONAL_FACTORS

<div class="alert alert-block alert-success">
<h2>4. Trajectory measurement: Damerau-Levenshtein</h1>
</div>

In [None]:
tptlabel_dr_df = pd.read_csv(cp.SAVED_DATA_PATH + 'tptlabel_dr_df.csv')

### First filter the tptlabel_dr_df to include only a subset of data of similar timescale

In [None]:
low = 200
high = 220

tptlabel_dr_df_filt = tptlabel_dr_df[tptlabel_dr_df['ntpts'].between(low, high)]

### Verify that the filtered data reflects the total data

In [None]:
factorchoice = 'speed'

In [None]:
# Makes timeplots of the unfiltered and filtered data

f=cp.multi_condition_timeplot(tptlabel_dr_df, factorchoice)
f.show()
f=cp.multi_condition_timeplot(tptlabel_dr_df_filt, factorchoice)
f.show()

In [None]:
# Plot of difference of the unfiltered and filtered data
f = cp.plots_of_differences_sns(tavg_df, factorchoice)
f.show()

In [None]:
tavg_trajectory_df = cp.time_average(tptlabel_dr_df)
f = cp.plots_of_differences_sns(tavg_trajectory_df, factorchoice)
f.show()

### Perform Damerau-Levenshtein analysis

In [None]:
df = tptlabel_dr_df_filt
distance_matrix_dameraulev = cp.calculate_edit_distances(df,distancemetric = 'dameraulev', print_interval=10000) #fastdtw # dameraulev # mongeelkan
print(distance_matrix_dameraulev.shape)

In [None]:
# Save the distance matrix
# np.save(cp.SAVED_DATA_PATH + 'distance_matrix_dameraulev.npy', distance_matrix_dameraulev)

### Perform a UMAP/HDBSCAN parameter sweep, and select plots

In [None]:
'''Sweep'''

df = tptlabel_dr_df_filt
for n_neighbors in [8, 10, 12]:
    for min_samples in [5,8,10, 15, 30, 40]:
        for min_cluster_size in [5,8,10, 15, 30, 40]:
            print(f'min_samples = {min_samples}')
            print(f'min_cluster_size = {min_cluster_size}')
            print(f'n_neighbors = {n_neighbors}')
            tptlabel_dr_df_filt_clusteredtrajectories = cp.cluster_sequences(df, distance_matrix_dameraulev,
             do_umap=True, eps=0.1, min_samples=min_samples, min_cluster_size=min_cluster_size, n_neighbors=n_neighbors)

In [None]:
'''Chosen UMAP and HDBSCAN parameters'''

min_samples = 30
min_cluster_size = 20
n_neighbors = 12

df = tptlabel_dr_df_filt

print(f'min_samples = {min_samples}')
print(f'min_cluster_size = {min_cluster_size}')
print(f'n_neighbors = {n_neighbors}')
tptlabel_dr_df_filt_clusteredtrajectories = cp.cluster_sequences(df, distance_matrix_dameraulev,
 do_umap=True, eps=0.1, min_samples=min_samples, min_cluster_size=min_cluster_size, n_neighbors=n_neighbors)

Get the fingerprint plot of trajectories

In [None]:
df = tptlabel_dr_df_filt_clusteredtrajectories

cluster_purity_df = cp.purity_pointsinclusterspercondition(df, cluster_label='trajectory_id') 
f = cp.purityplot_percentcluspercondition(df, cluster_purity_df, cluster_label='trajectory_id', dotsize = 30) 

 ### Disambiguate the trajectory clustered cells:
 1) Make an exemplar_df_trajectories containing example rows
 2) Get the full tracks from those rows and make exemplar_df_trajectories_fulltrack
 2) Disambiguate with exemplar_df_trajectories
 3) Plot multiples with exemplar_df_trajectories_fulltrack

In [None]:
df = tptlabel_dr_df_filt_clusteredtrajectories
exemplar_df_trajectories, exemplar_df_trajectories_fulltrack  = cp.make_exemplar_df_basedon_trajectories(df, cells_per_traj=6)

In [None]:
# full_tracks_df = pd.read_csv(cp.SAVED_DATA_PATH + 'full_tracks_df.csv')
df = exemplar_df_trajectories_fulltrack
cp.plot_trajectories(df=exemplar_df_trajectories_fulltrack, global_y=True, global_x=True)

In [None]:
size=300 #


df= tptlabel_dr_df_filt_clusteredtrajectories 
exemp_df=exemplar_df_trajectories 

top_dictionary, contributions_df_singletpoints, scaled_df=cp.contribution_to_clusters(df_in=tptlabel_dr_df,  howmanyfactors=2, dr_factors= newnew_DR_FACTORS) #BEFORE disambiguate_tavg(), then: lab_tavg_dr_df BEFORE disambiguate_timepoint(), then: #tptlabel_dr_df 
cp.plot_cluster_averages(top_dictionary, df, scaled_df)
result_df = cp.create_cluster_averages_table(top_dictionary, df, scaled_df)
cp.disambiguate_timepoint(df, exemp_df, scaled_df, top_dictionary=top_dictionary, XYRange=size,boxoff=True, trajectory = True) 

# Percent fingerprint plot for cluster IDs per TRAJECTORY

In [None]:
# tptlabel_dr_df_filt_clusteredtrajectories = pd.read_csv(cp.SAVED_DATA_PATH + 'tptlabel_dr_df_filt_clusteredtrajectories_FINAL_10-12-2023.csv')

df = tptlabel_dr_df_filt_clusteredtrajectories
cp.fingerprintplot_clusters_per_trajectory(df)

# Plasticity of cells per trajectory

In [None]:
tptlabel_dr_df_filt_clusteredtrajectories = pd.read_csv(cp.SAVED_DATA_PATH + 'tptlabel_dr_df_filt_clusteredtrajectories_FINAL_10-17-2023.csv')

In [None]:
df=tptlabel_dr_df_filt_clusteredtrajectories
cp.plasticity_per_trajectory(df)

In [None]:
df=tptlabel_dr_df_filt_clusteredtrajectories
# all='\_allcells'
cp.plot_plasticity_changes_trajectories(df, identifier='\_allcells', maxy=9 , t_window_multiplier = 1) #problem with NaNs in the data

# Animations of trajectories

In [None]:
df = tptlabel_dr_df_filt_clusteredtrajectories
cp.make_trajectory_animations(df, exemplar_df_trajectories, number_of_trajectories=2, colormode='cluster') # singlecluster, cluster, trajectory

Select a number of example cells from each trajectory ID to map back on to the data and display as stacks of PNGs

In [None]:
number_of_trajectories = 10 # Select a number of trajectories to plot

df = tptlabel_dr_df_filt_clusteredtrajectories

trajectory_ids = df['trajectory_id'].unique()

uniq_id_choices_list = []

for trajectory_id_choice in trajectory_ids:
    # for each trajectory_id, get a list of possible uniq_ids from the df
    uniq_id_choices = tptlabel_dr_df_filt_clusteredtrajectories[tptlabel_dr_df_filt_clusteredtrajectories['trajectory_id']==trajectory_id_choice]['uniq_id'].values
    # Make sure each once is unique in that list
    uniq_id_choices = np.unique(uniq_id_choices)
    # choose a number of random uniq_ids from that list based on number_of_trajectories
    uniq_id_choices = np.random.choice(uniq_id_choices, number_of_trajectories)
    # append each choice to a list
    uniq_id_choices_list.append(uniq_id_choices)
# flatten the list
chosen_uniq_ids = [item for sublist in uniq_id_choices_list for item in sublist]
    
print(chosen_uniq_ids)

In [None]:
df = tptlabel_dr_df_filt_clusteredtrajectories
cp.make_png_behaviour_trajectories(df,chosen_uniq_ids,XYRange = 300, follow_cell = False, invert=False)

In [None]:
df = tptlabel_dr_df_filt_clusteredtrajectories
cp.make_raw_cell_pngstacks(df,chosen_uniq_ids,XYRange = 220, follow_cell=False, invert=False)