# Plotting Cycle Based Features
-This notebook is for plotting cycle based features that were extracted in Processing notebook and stored as df_gen

-Note that df_gen as saved has not been filtered at all, all incomplete cycles are filtered explicitly here to keep track, and will be filtered in other notebooks when combining with other dataframes so that it's easy to track what's been removed

-Lengths here are in um (though always check what you're pulling from processing), times are in minutes, and rate is um/min (but will be plotted as um/hr most likely)

In [None]:
from pathlib import Path
import seaborn as sns
from matplotlib import pyplot as plt
import xarray as xr
import numpy as np
import pandas as pd
import os
import re
from collections import defaultdict
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt

# makes figures look better in Jupyter
sns.set_context('talk')
sns.set_style("ticks")
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42

In [None]:
plot_output = ''
df_gen = pd.read_csv(os.path.join(plot_output, 'CellCycle_Stats.csv'))
df_gen

In [None]:
#Filtering incomplete cycles
df_gen_no_end_cycle = df_gen[df_gen['end_time']!=1080] #removes incomplete cycles at the end of the expt

#Now remove cycles where cells fall out end of channel - defined as cycles that both don't have a following cycle and don't have valid cell IDs through the last time point
df_gen_no_end_cycle = df_gen_no_end_cycle.sort_values(by=['unique_ID', 'cell_id', 'cycle']) #sort
df_gen_no_end_cycle['next_cycle'] = df_gen_no_end_cycle.groupby(['unique_ID','cell_id'])['cycle'].shift(-1) #find following cycle
df_gen_no_lost_cells = df_gen_no_end_cycle[(df_gen_no_end_cycle['next_cycle'] == df_gen_no_end_cycle['cycle'] + 1) | (df_gen_no_end_cycle['last_valid_cell_time'] == 1080)].copy() #filter

#Remove first cycle as that's also incomplete
df_filtered = df_gen_no_lost_cells[df_gen_no_lost_cells['start_time']!=0]
df_filtered = df_filtered[df_filtered['cycle_duration']>=10]


print(len(df_gen))
print(len(df_gen_no_end_cycle))
print(len(df_gen_no_lost_cells))
print(len(df_filtered))

In [None]:
fig, ax = plt.subplots()
sns.histplot(data=df_filtered['start_length'], kde=True, bins = 40, ax=ax, color = 'gray', stat = 'percent')
start_length_skew = df_filtered['start_length'].skew()
sns.histplot(data=df_filtered['end_length'], kde=True, bins = 40, ax=ax, color = 'steelblue', stat = 'percent')
end_length_skew = df_filtered['end_length'].skew()
ax.annotate(f"Skewness = {start_length_skew:.2f}", xy=(0.5, 0.9), xycoords="axes fraction", color = 'grey')
ax.annotate(f"Skewness = {end_length_skew:.2f}", xy=(0.5, 0.8), xycoords="axes fraction", color = 'steelblue')
ax.axvline(x=df_filtered['end_length'].mean(), ls ='-', color='darkblue')
ax.axvline(x=df_filtered['start_length'].mean(), color = 'black', ls='-')
#ax.axvline(x=df_filtered['end_length'].std()+df_filtered['end_length'].mean(), color = 'steelblue', ls='--')
#ax.axvline(x=df_filtered['end_length'].mean()- df_filtered['end_length'].std(), color = 'steelblue', ls='--')
#ax.axvline(x=df_filtered['start_length'].std()+df_filtered['start_length'].mean(), color = 'grey', ls='--')
#ax.axvline(x=df_filtered['start_length'].mean()- df_filtered['start_length'].std(), color = 'grey', ls='--')
plt.savefig(plot_output+'/start_end_length_distribution.pdf')

In [None]:
fig, ax = plt.subplots()
sns.histplot(data=df_filtered['cycle_duration'], kde=True, bins = 40, ax=ax, color = 'gray', stat = 'percent')
dur_skew = df_filtered['cycle_duration'].skew()
ax.annotate(f"Skewness = {dur_skew:.2f}", xy=(0.5, 0.9), xycoords="axes fraction", color = 'grey')
ax.axvline(x=df_filtered['cycle_duration'].mean(), color = 'black', ls='-')
ax.axvline(x=df_filtered['cycle_duration'].std()+df_filtered['cycle_duration'].mean(), color = 'grey', ls='--')
ax.axvline(x=df_filtered['cycle_duration'].mean()- df_filtered['cycle_duration'].std(), color = 'grey', ls='--')
ax.set_xticks([0, 120, 240, 360, 480, 600, 720, 840])
plt.savefig(plot_output+'/cycle_duration.pdf')

In [None]:
fig, ax = plt.subplots()
sns.histplot(data=df_filtered['total_growth'], kde=True, bins = 40, ax=ax, color = 'gray', stat = 'percent')
dur_skew = df_filtered['total_growth'].skew()
ax.annotate(f"Skewness = {dur_skew:.2f}", xy=(0.5, 0.9), xycoords="axes fraction", color = 'grey')
ax.axvline(x=df_filtered['total_growth'].mean(), color = 'black', ls='-')
ax.axvline(x=df_filtered['total_growth'].std()+df_filtered['total_growth'].mean(), color = 'grey', ls='--')
ax.axvline(x=df_filtered['total_growth'].mean()- df_filtered['total_growth'].std(), color = 'grey', ls='--')
plt.savefig(plot_output+'/total_growth.pdf')

In [None]:
fig, ax = plt.subplots()
df_filtered['elong_hours']=df_filtered['avg_elong_rate']*60
sns.histplot(data=df_filtered['elong_hours'], kde=True, bins = 40, ax=ax, color = 'gray', stat = 'percent')
dur_skew = df_filtered['elong_hours'].skew()
ax.annotate(f"Skewness = {dur_skew:.2f}", xy=(0.5, 0.9), xycoords="axes fraction", color = 'grey')
ax.axvline(x=df_filtered['elong_hours'].mean(), color = 'black', ls='-')
ax.axvline(x=df_filtered['elong_hours'].std()+df_filtered['elong_hours'].mean(), color = 'grey', ls='--')
ax.axvline(x=df_filtered['elong_hours'].mean()- df_filtered['elong_hours'].std(), color = 'grey', ls='--')
plt.savefig(plot_output+'/elong_rate.pdf')

In [None]:
df_gen_mothers = df_gen[df_gen['parent']==0]
df_gen_mothers

In [None]:
df_count = df_gen_mothers.groupby('unique_ID').size().reset_index(name='div_count')
df_count

In [None]:
sns.histplot(data = df_count, x='div_count', color = 'gray', binwidth=1, stat = 'percent')
plt.savefig(plot_output+'/mother_divisions.pdf')

In [None]:
cell_count_df = (df_gen.groupby('unique_ID')['cell_id'].nunique()
    .reset_index()
    .rename(columns={'cell_id': 'cell_count'}))
cell_count_df_sorted = cell_count_df.sort_values(by='cell_count')
cell_count_df_sorted['fraction_total'] = cell_count_df_sorted['cell_count']/cell_count_df_sorted['cell_count'].sum()

In [None]:
fig, ax = plt.subplots()
sns.histplot(data=cell_count_df_sorted, x='cell_count', color='gray', stat = 'percent', ax=ax)
ax.set_xticks([0, 10, 20, 30, 40, 50, 60])
#ax.axvline(x=cell_count_df_sorted['cell_count'].mean(), color = 'black', ls='-')
plt.savefig(plot_output+'/total_cell_count.pdf')