## Packages

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import NMF
from numpy import asarray
from numpy import savetxt
import matplotlib.colors as mcolors
import matplotlib.cm
import matplotlib.patches as mpatches
import umap
import matplotlib as mpl
import scanpy as sc
import scipy
import kmapper as km
from kmapper import jupyter
from kmapper.plotlyviz import plotlyviz
from kmapper.plotlyviz import *
import plotly.graph_objs as go
import sys
from sklearn import ensemble
import tqdm
from ripser import Rips
from ripser import ripser
from persim import plot_diagrams
from sklearn.metrics.pairwise import euclidean_distances
import umap
from numpy import random
from sklearn.decomposition import PCA
from mpl_toolkits import mplot3d

In [None]:
# ----------------------- Palettes ------------------------

blue = ["blue", "cornflowerblue", "lightskyblue"]
red = ["maroon", "indianred", "salmon"]
orange = ["orangered","coral", "orange"]
purple = ["indigo", "rebeccapurple", "mediumpurple"]
green = ["darkgreen", "limegreen", "lightgreen"]
yellow = ["darkgoldenrod", "goldenrob", "gold"]
teal = ["darkcyan","lightseagreen","paleturquoise"]

pal_array = np.array([np.array(blue), np.array(red), np.array(orange), np.array(purple), np.array(green), np.array(yellow), np.array(teal)])

custom_palette = pd.DataFrame(pal_array, index = ["blue", "red", "orange", "purple", "green", "yellow", "teal"], columns = ["dark", "medium", "light"])

In [None]:
def persistance_diagram(expression_matrix, color, title_plt):
    
    # expression_matrix - expression matrix to be fed into Risper (will be transposed)
    # color - "blue", "red", "orange", "purple", "green", "yellow"
    # title_plt - title for returned persistance diagram
    # returns birth-death pairs for lifetime
    
    bd = ripser(expression_matrix.T, maxdim=1)['dgms']
    
    xy_H_0 = bd[0]
    xy_H_1 = bd[1]

    y_max = sorted(list(xy_H_0[:,1]))[-2]
    print(sorted(list(xy_H_0[:,1]))[-1])
    x_max = sorted(list(xy_H_1[:,1]))[-1]
    either_max = np.amax(np.array([x_max, y_max]))
    
    buffer = either_max*0.1
    
    x = np.linspace(-1*buffer,either_max,500)

    plt.figure(figsize=(5, 5))
            
    sns.scatterplot(xy_H_0[:,0], xy_H_0[:,1], hue = np.ones(xy_H_0.shape[0]), s = 5, linewidth=0, palette = [custom_palette.loc[color]["medium"]], label = "H_0")
    sns.scatterplot(xy_H_1[:,0], xy_H_1[:,1], hue = np.ones(xy_H_1.shape[0]), s = 5, linewidth=0, palette = [custom_palette.loc[color]["dark"]], label = "H_1")
    plt.plot(x, x, ':k', label='Birth = Death')
    plt.plot(x, either_max*np.ones(x.shape), ':k', label='Fully Connected Component')
    plt.ylabel('Death')
    plt.xlabel('Birth')
    plt.ylim((-1*buffer,either_max*1.1))
    plt.xlim((-1*buffer,either_max*1.1))
    plt.title(title_plt)
    plt.legend(bbox_to_anchor=(1.2, 1),borderaxespad=0)
    #plt.savefig('.png')
    plt.show()
    
    return bd, color

In [None]:
def birth_lifetime(bd, data_type, color):
    
    # color - "blue", "red", "orange", "purple", "green", "yellow"
    # bd - birth-death pairs for lifetime
    
    if data_type==0:
        title_plt = "All Genes"

    elif data_type==1:
        title_plt = "Cell Cycle Genes"

    else:
        title_plt = "Random Genes"

    
    H_1 = np.array(bd[1])
    lifetime = []

    for i in range(H_1.shape[0]):
        lifetime.append(H_1[i, 1] - H_1[i, 0])
    lifetime = np.array(lifetime)

    plt.figure(figsize=(5, 5))
            
    plt.scatter(H_1[:, 0], lifetime, s = 5, linewidth=0, c = custom_palette.loc[color][data_type], label = "H_1")
    plt.ylabel('Lifetime')
    plt.xlabel('Birth')
    plt.title(title_plt)
    plt.legend(bbox_to_anchor=(1.2, 1),borderaxespad=0)
    #plt.savefig('.png')
    plt.show()

In [None]:
def violin_lifetime(bd, data_type, color, sublabel):
    #bd and color sane as persistance_diagram
    #data_type refers to 0:All genes, 1:Cell cycle genes, 2:Random genes_1
    
    if data_type==0:
        y_label = "All Genes"
        y_top = 100
    elif data_type==1:
        y_label = "Cell Cycle Genes"
        y_top = 20
    else:
        y_label = "Random Genes"
        y_top = 20
    
    H_1 = np.array(bd[1])
    lifetime = []

    for i in range(H_1.shape[0]):
        lifetime.append(H_1[i, 1] - H_1[i, 0]) #append lifetime of each feature
    
    lifetime = np.reshape(np.array(lifetime), (H_1.shape[0],1))

    plt.figure(figsize=(2,5))
    sns.violinplot(y=lifetime, palette = [custom_palette.loc[color][data_type]])
    sns.swarmplot(x=np.array([y_label]), y=np.array([np.amax(lifetime)]), color=custom_palette.loc[color][0], s = 10)
    plt.ylabel("Lifetime of H1 features")
    plt.xlabel(sublabel, fontsize=8)
    plt.ylim((0,y_top))
    plt.show()
    


In [None]:
def visualize(data, pca_n, plot_label, hue):
    pca = PCA(n_components=pca_n)
    pca.fit(data.T)
    X = pca.transform(data.T)
    print(X.shape)

    reducer = umap.UMAP(random_state=42, n_components=2)
    embedding = reducer.fit_transform(X)
    print(embedding.shape)
    
    reducer_3 = umap.UMAP(random_state=42, n_components=3)
    embedding_3 = reducer_3.fit_transform(X)
    print(embedding_3.shape)
    
    #var = np.array(pca.explained_variance_ratio_)
    #pcs = np.arange(1,21)

    #sns.scatterplot(pcs, var)
    #plt.ylabel('Explained variance ratio')
    #plt.xlabel('Principle components')
    #plt.show()
    
    plt.figure(figsize=(8, 6))
            
    sns.scatterplot(embedding[:,0], embedding[:,1], hue = np.ones(embedding.shape[0]), s = 20, linewidth=0, palette = [color_selection[hue]])

    plt.ylabel('UMAP2')
    plt.xlabel('UMAP1')
    plt.title(plot_label)
    plt.legend(bbox_to_anchor=(1.2, 1),borderaxespad=0)
    #plt.savefig('.png')
    plt.show()
    
    fig = plt.figure()
    ax = plt.axes(projection='3d')
    ax.scatter3D(embedding_3[:,0], embedding_3[:,1], embedding_3[:,2], c=color_selection[hue])
    plt.show()
    
    fig = plt.figure()
    ax = plt.axes(projection='3d')
    ax.scatter3D(embedding_3[:,1], embedding_3[:,2], embedding_3[:,0], c=color_selection[hue])
    plt.show()
    
    fig = plt.figure()
    ax = plt.axes(projection='3d')
    ax.scatter3D(embedding_3[:,2], embedding_3[:,0], embedding_3[:,1], c=color_selection[hue])
    plt.show()

In [None]:
def find_random(size, seed):
    np.random.seed(seed)
    CDK1 = "ENSG00000170312"
    UBE2C = "ENSG00000175063"
    TOP2A = "ENSG00000131747"
    #H4C5 = "ENSG00000276966"
    H4C3 = "ENSG00000197061"

    # Cell cycle genes for comparison
    cell_cycle = [CDK1, UBE2C, TOP2A, H4C3]
    compare = gene_expression.loc[cell_cycle]
    
    # Bin expression - find bins for cell cycle genes
    q = 0.05
    quantiles = []
    for i in range(10):
        quantiles.append(np.quantile(gene_expression.values.T, q, axis = 1))
        q += 0.1
    
    quantiles.append(np.quantile(gene_expression.values.T, 1, axis = 1)+1)
    quantiles = np.array(quantiles)
    quantiles = np.mean(quantiles, axis = 1)
    
    
    cell_cycle_bin = []
    for i in range(size):
        loc = np.where(np.mean(gene_expression.loc[cell_cycle[i]]) > quantiles)[0]
        cell_cycle_bin.append(loc[-1])
        
    
    # Take mean expression for finding other genes in bin
    gene_expression_mean = pd.DataFrame(np.mean(gene_expression.values, axis = 1), index = gene_expression.index, columns = ["Mean_exp"])
    
    # For each cell cycle gene - find the bin, sample a random gene from that same bin
    rand_gene_list = []
    for i in cell_cycle_bin:
        range_min = quantiles[i]
        range_max = quantiles[i + 1]
        random_possibilities = gene_expression_mean[(gene_expression_mean["Mean_exp"] > range_min) & (gene_expression_mean["Mean_exp"] < range_max)]
        random_possibilities_index = random_possibilities.index
        
        
        rand_gene_index = np.random.randint(0, random_possibilities_index.shape[0], size=1)
        rand_gene = random_possibilities_index[rand_gene_index][0]
        rand_gene_list.append(rand_gene)
    
    
    return rand_gene_list

## Read in data

In [None]:
gene_expression = pd.read_csv("final_geneexpression.csv", sep=',', header = 0, index_col = 0)
labels = pd.read_csv("final_labels.csv", sep=',', header = 0, index_col = 0)

In [None]:
gene_expression

In [None]:
labels

In [None]:
pca = PCA(n_components=20)
pca.fit(gene_expression.values.T)
X = pca.transform(gene_expression.values.T)
print(X.shape)
    
reducer = umap.UMAP(random_state=42, n_components=2)
embedding = reducer.fit_transform(X)
print(embedding.shape)

In [None]:
plt.figure(figsize=(8, 6))
            
sns.scatterplot(embedding[:,0], embedding[:,1], hue = labels["theta"], s = 20, linewidth=0, palette = "twilight", hue_norm = (0,2*3.1415))

plt.ylabel('UMAP2')
plt.xlabel('UMAP1')
plt.title('UMAP - Theta')
plt.legend(bbox_to_anchor=(1.2, 1),borderaxespad=0)
#plt.savefig('.png')
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
            
sns.scatterplot(embedding[:,0], embedding[:,1], hue = labels["rfp.median.log10sum.adjust"], s = 20, linewidth=0, palette = "coolwarm", hue_norm=(-1,1))

plt.ylabel('UMAP2')
plt.xlabel('UMAP1')
plt.title('UMAP - RFP')
plt.legend(bbox_to_anchor=(1.1, 1),borderaxespad=0)
#plt.savefig('.png')
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
            
sns.scatterplot(embedding[:,0], embedding[:,1], hue = labels["gfp.median.log10sum.adjust"], s = 20, linewidth=0, palette = "coolwarm", hue_norm=(-1,1))

plt.ylabel('UMAP2')
plt.xlabel('UMAP1')
plt.title('UMAP - GFP')
plt.legend(bbox_to_anchor=(1.1, 1),borderaxespad=0)
#plt.savefig('.png')
plt.show()

In [None]:
color_selection = custom_palette.loc["blue"]

In [None]:
visualize(gene_expression.values, 20, 'UMAP - All Cells, All Genes', 0)

In [None]:
plt.figure(figsize=(8, 6))
            
sns.scatterplot(embedding[:,0], embedding[:,1], hue = np.ones(embedding.shape[0]), s = 20, linewidth=0, palette = [color_selection[0]])

plt.ylabel('UMAP2')
plt.xlabel('UMAP1')
plt.title('UMAP - All Cells, All Genes')
plt.legend(bbox_to_anchor=(1.2, 1),borderaxespad=0)
#plt.savefig('.png')
plt.show()

## Persistence Diagram

In [None]:
bd, color = persistance_diagram(gene_expression.values, "blue", "All Cells, All Genes")

In [None]:
birth_lifetime(bd, 0, color)

In [None]:
sublabel = ""

violin_lifetime(bd, 0, color, sublabel)

## Reduce data to cyclic genes

In [None]:
CDK1 = "ENSG00000170312"
UBE2C = "ENSG00000175063"
TOP2A = "ENSG00000131747"
#H4C5 = "ENSG00000276966"
H4C3 = "ENSG00000197061"

sublabel_cycle = "CDK1, UBE2C, TOP2A, H4C3"

In [None]:
gene_sub = gene_expression.loc[[CDK1, UBE2C, TOP2A, H4C3]]

In [None]:
pca = PCA(n_components=4)
pca.fit(gene_sub.values.T)
X = pca.transform(gene_sub.values.T)
print(X.shape)
    
reducer = umap.UMAP(random_state=42, n_components=2)
embedding_sub = reducer.fit_transform(X)
print(embedding_sub.shape)

In [None]:
visualize(gene_sub.values, 4, 'UMAP - All Cells, Cell Cycle Genes', 1)

In [None]:
plt.figure(figsize=(8, 6))
            
sns.scatterplot(embedding_sub[:,0], embedding_sub[:,1], hue = np.ones(embedding_sub.shape[0]), s = 20, linewidth=0, palette = [color_selection[1]])
plt.ylabel('UMAP2')
plt.xlabel('UMAP1')
plt.title('UMAP - All Cells, Cell Cycle Genes')
plt.legend(bbox_to_anchor=(1.2, 1),borderaxespad=0)
#plt.savefig('.png')
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
            
sns.scatterplot(embedding_sub[:,0], embedding_sub[:,1], hue = labels["theta"], s = 20, linewidth=0, palette = "twilight", hue_norm = (0,2*3.1415))

plt.ylabel('UMAP2')
plt.xlabel('UMAP1')
plt.title('UMAP - Theta')
plt.legend(bbox_to_anchor=(1.1, 1),borderaxespad=0)
#plt.savefig('.png')
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
            
sns.scatterplot(embedding_sub[:,0], embedding_sub[:,1], hue = labels["rfp.median.log10sum.adjust"], s = 20, linewidth=0, palette = "coolwarm", hue_norm=(-1,1))

plt.ylabel('UMAP2')
plt.xlabel('UMAP1')
plt.title('UMAP - RFP')
plt.legend(bbox_to_anchor=(1.1, 1),borderaxespad=0)
#plt.savefig('.png')
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
            
sns.scatterplot(embedding_sub[:,0], embedding_sub[:,1], hue = labels["gfp.median.log10sum.adjust"], s = 20, linewidth=0, palette = "coolwarm", hue_norm=(-1,1))

plt.ylabel('UMAP2')
plt.xlabel('UMAP1')
plt.title('UMAP - GFP')
plt.legend(bbox_to_anchor=(1.1, 1),borderaxespad=0)
#plt.savefig('.png')
plt.show()

In [None]:
bd, color = persistance_diagram(gene_sub.values, "blue", "All Cells, Cell Cycle Genes")

In [None]:
birth_lifetime(bd, 1, color)

In [None]:
violin_lifetime(bd, 1, color, sublabel_cycle)

## Select 4 random genes

In [None]:
rand_gene = find_random(4, 0)
print(rand_gene)

In [None]:
sublabel_1 = "POLR3K, PRDX5, HSP90B1, MDH1"

gene_sub_rand1 = gene_expression.loc[rand_gene]

In [None]:
visualize(gene_sub_rand1.values, 4, 'UMAP - All Cells, Random Genes_1', 2)

In [None]:
bd, color = persistance_diagram(gene_sub_rand1.values, "blue", "All Cells, Random Genes_1")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_1)

## Select 4 random genes_2

In [None]:
rand_gene = find_random(4, 10)
print(rand_gene)

In [None]:
sublabel_2 = "ARPC3, CUTA, EEF1A1, TUBA1A"

gene_sub_rand2 = gene_expression.loc[rand_gene]

In [None]:
visualize(gene_sub_rand2.values, 4, 'UMAP - All Cells, Random Genes_2', 2)

In [None]:
bd, color = persistance_diagram(gene_sub_rand2.values, "blue", "All Cells, Random Genes_2")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_2)

## Select 4 random genes_3

In [None]:
rand_gene = find_random(4, 20)
print(rand_gene)

In [None]:
sublabel_3 = "CCND2, OSTC, CNIH4, SRRM1"

gene_sub_rand3 = gene_expression.loc[rand_gene]

In [None]:
visualize(gene_sub_rand3.values, 4, 'UMAP - All Cells, Random Genes_3', 2)

In [None]:
bd, color = persistance_diagram(gene_sub_rand3.values, "blue", "All Cells, Random Genes_3")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_3)

## Select 4 random genes_4

In [None]:
rand_gene = find_random(4, 30)
print(rand_gene)

In [None]:
sublabel_4 = "BSG, EIF3K, MTRNR2L1, RPS15"

gene_sub_rand4 = gene_expression.loc[rand_gene]

In [None]:
visualize(gene_sub_rand4.values, 4, 'UMAP - All Cells, Random Genes_4', 2)

In [None]:
bd, color = persistance_diagram(gene_sub_rand4.values, "blue", "All Cells, Random Genes_4")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_4)

## Select 4 random genes_5

In [None]:
rand_gene = find_random(4, 40)
print(rand_gene)

In [None]:
sublabel_5 = "FARSB, EIF5A, RPS20, ACAT2"

gene_sub_rand5 = gene_expression.loc[rand_gene]

In [None]:
visualize(gene_sub_rand5.values, 4, 'UMAP - All Cells, Random Genes_5', 2)

In [None]:
bd, color = persistance_diagram(gene_sub_rand5.values, "blue", "All Cells, Random Genes_5")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_5)

## Cut cycle from UMAP

In [None]:
color_selection = custom_palette.loc["green"]

In [None]:
plt.figure(figsize=(8, 6))
            
sns.scatterplot(embedding_sub[:,0], embedding_sub[:,1], hue = labels["gfp.median.log10sum.adjust"], s = 20, linewidth=0, palette = "coolwarm", hue_norm=(-1,1))
plt.vlines(-1, -2, 9, linestyles ="dotted", colors ="k")
plt.ylabel('UMAP2')
plt.xlabel('UMAP1')
plt.title('UMAP - GFP')
plt.legend(bbox_to_anchor=(1.1, 1),borderaxespad=0)
#plt.savefig('.png')
plt.show()

In [None]:
location_include = np.where(embedding_sub[:,0] > -1)[0]
print(location_include.shape)

name_include = []
embedding_include_x = []
embedding_include_y = []

for i in range(location_include.shape[0]):
    name_include.append(gene_expression.columns[location_include[i]])  
    embedding_include_x.append(embedding_sub[location_include[i],0]) 
    embedding_include_y.append(embedding_sub[location_include[i],1]) 
    
include_gene_expression = gene_expression[name_include]
embedding_include_x = np.array(embedding_include_x)
embedding_include_y = np.array(embedding_include_y)
embedding_include_x = np.reshape(embedding_include_x, (embedding_include_x.shape[0],1))
embedding_include_y = np.reshape(embedding_include_y, (embedding_include_x.shape[0],1))

embedding_include = np.append(embedding_include_x, embedding_include_y, axis = 1)

In [None]:
plt.figure(figsize=(8, 6))
            
sns.scatterplot(embedding_sub[:,0], embedding_sub[:,1], hue = np.ones(embedding_sub.shape[0]), s = 20, linewidth=0, palette = ["lightgrey"])
sns.scatterplot(embedding_include[:,0], embedding_include[:,1], hue = np.ones(embedding_include.shape[0]), s = 20, linewidth=0, palette = [color_selection[1]])
plt.vlines(-1, -2, 9, linestyles ="dotted", colors ="k")
plt.ylabel('UMAP2')
plt.xlabel('UMAP1')
plt.legend(bbox_to_anchor=(1.2, 1),borderaxespad=0)
#plt.savefig('.png')
plt.show()

In [None]:
visualize(include_gene_expression.values, 20, 'UMAP - Large Loop Cells, All Genes', 0)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "green", "Large Loop Cells, All Genes")

In [None]:
birth_lifetime(bd, 0, color)

In [None]:
violin_lifetime(bd, 0, color, sublabel)

In [None]:
include_gene_expression = gene_sub[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Large Loop Cells, All Genes', 1)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "green", "Large Loop Cells, Cell Cycle Genes")

In [None]:
birth_lifetime(bd, 1, color)

In [None]:
violin_lifetime(bd, 1, color, sublabel_cycle)

In [None]:
include_gene_expression = gene_sub_rand1[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Large Loop Cells, Random Genes_1', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "green", "Large Loop Cells, Random Genes_1")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_1)

In [None]:
include_gene_expression = gene_sub_rand2[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Large Loop Cells, Random Genes_2', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "green", "Large Loop Cells, Random Genes_2")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_2)

In [None]:
include_gene_expression = gene_sub_rand3[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Large Loop Cells, Random Genes_3', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "green", "Large Loop Cells, Random Genes_3")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_3)

In [None]:
include_gene_expression = gene_sub_rand4[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Large Loop Cells, Random Genes_4', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "green", "Large Loop Cells, Random Genes_4")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_4)

In [None]:
include_gene_expression = gene_sub_rand5[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Large Loop Cells, Random Genes_5', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "green", "Large Loop Cells, Random Genes_5")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_5)

## Cut cycle from UMAP

In [None]:
color_selection = custom_palette.loc["orange"]

In [None]:
plt.figure(figsize=(8, 6))
            
sns.scatterplot(embedding_sub[:,0], embedding_sub[:,1], hue = labels["gfp.median.log10sum.adjust"], s = 20, linewidth=0, palette = "coolwarm", hue_norm=(-1,1))
plt.vlines(6, -2, 9, linestyles ="dotted", colors ="k")
plt.ylabel('UMAP2')
plt.xlabel('UMAP1')
plt.title('UMAP - GFP')
plt.legend(bbox_to_anchor=(1.1, 1),borderaxespad=0)
#plt.savefig('.png')
plt.show()

In [None]:
location_include = np.where(embedding_sub[:,0] < 6)[0]
print(location_include.shape)

name_include = []
embedding_include_x = []
embedding_include_y = []

for i in range(location_include.shape[0]):
    name_include.append(gene_expression.columns[location_include[i]])  
    embedding_include_x.append(embedding_sub[location_include[i],0]) 
    embedding_include_y.append(embedding_sub[location_include[i],1]) 
    
include_gene_expression = gene_expression[name_include]
embedding_include_x = np.array(embedding_include_x)
embedding_include_y = np.array(embedding_include_y)
embedding_include_x = np.reshape(embedding_include_x, (embedding_include_x.shape[0],1))
embedding_include_y = np.reshape(embedding_include_y, (embedding_include_x.shape[0],1))

embedding_include = np.append(embedding_include_x, embedding_include_y, axis = 1)

In [None]:
plt.figure(figsize=(8, 6))
            
sns.scatterplot(embedding_sub[:,0], embedding_sub[:,1], hue = np.ones(embedding_sub.shape[0]), s = 20, linewidth=0, palette = ["lightgrey"])
sns.scatterplot(embedding_include[:,0], embedding_include[:,1], hue = np.ones(embedding_include.shape[0]), s = 20, linewidth=0, palette = [color_selection[1]])
plt.vlines(6, -2, 9, linestyles ="dotted", colors ="k")
plt.ylabel('UMAP2')
plt.xlabel('UMAP1')

plt.legend(bbox_to_anchor=(1.2, 1),borderaxespad=0)
#plt.savefig('.png')
plt.show()

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Cut Cells, All Genes', 0)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "orange", "Cut Cells, All Genes")

In [None]:
birth_lifetime(bd, 0, color)

In [None]:
violin_lifetime(bd, 0, color, sublabel)

In [None]:
include_gene_expression = gene_sub[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Cut Cells, Cell Cycle Genes', 1)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "orange", "Cut Cells, Cell Cycle Genes")

In [None]:
birth_lifetime(bd, 1, color)

In [None]:
violin_lifetime(bd, 1, color, sublabel_cycle)

In [None]:
include_gene_expression = gene_sub_rand1[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Cut Cells, Random Genes_1', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "orange", "Cut Cells, Random Genes_1")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_1)

In [None]:
include_gene_expression = gene_sub_rand2[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Cut Cells, Random Genes_2', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "orange", "Cut Cells, Random Genes_2")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_2)

In [None]:
include_gene_expression = gene_sub_rand3[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Cut Cells, Random Genes_3', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "orange", "Cut Cells, Random Genes_3")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_3)

In [None]:
include_gene_expression = gene_sub_rand4[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Cut Cells, Random Genes_4', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "orange", "Cut Cells, Random Genes_4")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_4)

In [None]:
include_gene_expression = gene_sub_rand5[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Cut Cells, Random Genes_5', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "orange", "Cut Cells, Random Genes_5")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_5)

## Cut cycle from UMAP

In [None]:
color_selection = custom_palette.loc["red"]

In [None]:
plt.figure(figsize=(8, 6))
            
sns.scatterplot(embedding_sub[:,0], embedding_sub[:,1], hue = labels["gfp.median.log10sum.adjust"], s = 20, linewidth=0, palette = "coolwarm", hue_norm=(-1,1))
plt.vlines(-0.8, -2, 9, linestyles ="dotted", colors ="k")
plt.vlines(10, -2, 9, linestyles ="dotted", colors ="k")
plt.ylabel('UMAP2')
plt.xlabel('UMAP1')
plt.title('UMAP - GFP')
plt.legend(bbox_to_anchor=(1.1, 1),borderaxespad=0)
#plt.savefig('.png')
plt.show()

In [None]:
location_include = np.where((embedding_sub[:,0] < 10) & (embedding_sub[:,0] > -0.8))[0]
print(location_include.shape)

name_include = []
embedding_include_x = []
embedding_include_y = []

for i in range(location_include.shape[0]):
    name_include.append(gene_expression.columns[location_include[i]])  
    embedding_include_x.append(embedding_sub[location_include[i],0]) 
    embedding_include_y.append(embedding_sub[location_include[i],1]) 
    
include_gene_expression = gene_expression[name_include]
embedding_include_x = np.array(embedding_include_x)
embedding_include_y = np.array(embedding_include_y)
embedding_include_x = np.reshape(embedding_include_x, (embedding_include_x.shape[0],1))
embedding_include_y = np.reshape(embedding_include_y, (embedding_include_x.shape[0],1))

embedding_include = np.append(embedding_include_x, embedding_include_y, axis = 1)

In [None]:
plt.figure(figsize=(8, 6))
            
sns.scatterplot(embedding_sub[:,0], embedding_sub[:,1], hue = np.ones(embedding_sub.shape[0]), s = 20, linewidth=0, palette = ["lightgrey"])
sns.scatterplot(embedding_include[:,0], embedding_include[:,1], hue = np.ones(embedding_include.shape[0]), s = 20, linewidth=0, palette = [color_selection[1]])
plt.vlines(-0.8, -2, 9, linestyles ="dotted", colors ="k")
plt.vlines(10, -2, 9, linestyles ="dotted", colors ="k")
plt.ylabel('UMAP2')
plt.xlabel('UMAP1')

plt.legend(bbox_to_anchor=(1.2, 1),borderaxespad=0)
#plt.savefig('.png')
plt.show()

In [None]:
visualize(include_gene_expression.values, 20, 'UMAP - Small Loop Cells, All Genes', 0)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "red", "Cut Cells, All Genes")

In [None]:
birth_lifetime(bd, 0, color)

In [None]:
violin_lifetime(bd, 0, color, sublabel)

In [None]:
include_gene_expression = gene_sub[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Small Loop Cells, Cell Cycle Genes', 1)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "red", "Cut Cells, Cell Cycle Genes")

In [None]:
birth_lifetime(bd, 1, color)

In [None]:
violin_lifetime(bd, 1, color, sublabel_cycle)

In [None]:
include_gene_expression = gene_sub_rand1[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Small Loop Cells, Random Genes_1', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "red", "Cut Cells, Random Genes_1")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_1)

In [None]:
include_gene_expression = gene_sub_rand2[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Small Loop Cells, Random Genes_2', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "red", "Cut Cells, Random Genes_2")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_2)

In [None]:
include_gene_expression = gene_sub_rand3[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Small Loop Cells, Random Genes_3', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "red", "Cut Cells, Random Genes_3")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_3)

In [None]:
include_gene_expression = gene_sub_rand4[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Small Loop Cells, Random Genes_4', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "red", "Cut Cells, Random Genes_4")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_4)

In [None]:
include_gene_expression = gene_sub_rand5[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Small Loop Cells, Random Genes_5', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "red", "Cut Cells, Random Genes_5")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_5)

## Cut cycle from UMAP

In [None]:
color_selection = custom_palette.loc["purple"]

In [None]:
plt.figure(figsize=(8, 6))
            
sns.scatterplot(embedding_sub[:,0], embedding_sub[:,1], hue = labels["gfp.median.log10sum.adjust"], s = 20, linewidth=0, palette = "coolwarm", hue_norm=(-1,1))
plt.vlines(10, -2, 9, linestyles ="dotted", colors ="k")
plt.ylabel('UMAP2')
plt.xlabel('UMAP1')
plt.title('UMAP - GFP')
plt.legend(bbox_to_anchor=(1.1, 1),borderaxespad=0)
#plt.savefig('.png')
plt.show()

In [None]:
location_include = np.where((embedding_sub[:,0] > 10) & (embedding_sub[:,1] > 4))[0]
print(location_include.shape)

name_include = []
embedding_include_x = []
embedding_include_y = []

for i in range(location_include.shape[0]):
    name_include.append(gene_expression.columns[location_include[i]])  
    embedding_include_x.append(embedding_sub[location_include[i],0]) 
    embedding_include_y.append(embedding_sub[location_include[i],1]) 
    
include_gene_expression = gene_expression[name_include]
embedding_include_x = np.array(embedding_include_x)
embedding_include_y = np.array(embedding_include_y)
embedding_include_x = np.reshape(embedding_include_x, (embedding_include_x.shape[0],1))
embedding_include_y = np.reshape(embedding_include_y, (embedding_include_x.shape[0],1))

embedding_include = np.append(embedding_include_x, embedding_include_y, axis = 1)

In [None]:
plt.figure(figsize=(8, 6))
            
sns.scatterplot(embedding_sub[:,0], embedding_sub[:,1], hue = np.ones(embedding_sub.shape[0]), s = 20, linewidth=0, palette = ["lightgrey"])
sns.scatterplot(embedding_include[:,0], embedding_include[:,1], hue = np.ones(embedding_include.shape[0]), s = 20, linewidth=0, palette = [color_selection[1]])
plt.vlines(10, -2, 9, linestyles ="dotted", colors ="k")
plt.hlines(4, -6, 13.5, linestyles ="dotted", colors ="k")
plt.ylabel('UMAP2')
plt.xlabel('UMAP1')

plt.legend(bbox_to_anchor=(1.2, 1),borderaxespad=0)
#plt.savefig('.png')
plt.show()

In [None]:
visualize(include_gene_expression.values, 20, 'UMAP - Right Cluster Cells, All Genes', 0)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "purple", "Cut Cells, All Genes")

In [None]:
birth_lifetime(bd, 0, color)

In [None]:
violin_lifetime(bd, 0, color, sublabel)

In [None]:
include_gene_expression = gene_sub[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Right Cluster Cells, Cell Cycle Genes', 1)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "purple", "Cut Cells, Cell Cycle Genes")

In [None]:
birth_lifetime(bd, 1, color)

In [None]:
violin_lifetime(bd, 1, color, sublabel_cycle)

In [None]:
include_gene_expression = gene_sub_rand1[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Right Cluster Cells, Random Genes_1', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "purple", "Cut Cells, Random Genes_1")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_1)

In [None]:
include_gene_expression = gene_sub_rand2[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Right Cluster Cells, Random Genes_2', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "purple", "Cut Cells, Random Genes_2")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_2)

In [None]:
include_gene_expression = gene_sub_rand3[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Right Cluster Cells, Random Genes_3', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "purple", "Cut Cells, Random Genes_3")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_3)

In [None]:
include_gene_expression = gene_sub_rand4[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Right Cluster Cells, Random Genes_4', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "purple", "Cut Cells, Random Genes_4")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_4)

In [None]:
include_gene_expression = gene_sub_rand5[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Right Cluster Cells, Random Genes_5', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "purple", "Cut Cells, Random Genes_5")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_5)

## Cut cycle from UMAP

In [None]:
color_selection = custom_palette.loc["teal"]

In [None]:
plt.figure(figsize=(8, 6))
            
sns.scatterplot(embedding_sub[:,0], embedding_sub[:,1], hue = labels["gfp.median.log10sum.adjust"], s = 20, linewidth=0, palette = "coolwarm", hue_norm=(-1,1))
plt.vlines(-1, -2, 9, linestyles ="dotted", colors ="k")
plt.ylabel('UMAP2')
plt.xlabel('UMAP1')
plt.title('UMAP - GFP')
plt.legend(bbox_to_anchor=(1.1, 1),borderaxespad=0)
#plt.savefig('.png')
plt.show()

In [None]:
location_include = np.where(embedding_sub[:,0] < -1)[0]
print(location_include.shape)

name_include = []
embedding_include_x = []
embedding_include_y = []

for i in range(location_include.shape[0]):
    name_include.append(gene_expression.columns[location_include[i]])  
    embedding_include_x.append(embedding_sub[location_include[i],0]) 
    embedding_include_y.append(embedding_sub[location_include[i],1]) 
    
include_gene_expression = gene_expression[name_include]
embedding_include_x = np.array(embedding_include_x)
embedding_include_y = np.array(embedding_include_y)
embedding_include_x = np.reshape(embedding_include_x, (embedding_include_x.shape[0],1))
embedding_include_y = np.reshape(embedding_include_y, (embedding_include_x.shape[0],1))

embedding_include = np.append(embedding_include_x, embedding_include_y, axis = 1)

In [None]:
plt.figure(figsize=(8, 6))
            
sns.scatterplot(embedding_sub[:,0], embedding_sub[:,1], hue = np.ones(embedding_sub.shape[0]), s = 20, linewidth=0, palette = ["lightgrey"])
sns.scatterplot(embedding_include[:,0], embedding_include[:,1], hue = np.ones(embedding_include.shape[0]), s = 20, linewidth=0, palette = [color_selection[1]])
plt.vlines(-1, -2, 9, linestyles ="dotted", colors ="k")
plt.ylabel('UMAP2')
plt.xlabel('UMAP1')
plt.legend(bbox_to_anchor=(1.2, 1),borderaxespad=0)
#plt.savefig('.png')
plt.show()

In [None]:
visualize(include_gene_expression.values, 20, 'UMAP - Large Loop Cells, All Genes', 0)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "teal", "Large Loop Cells, All Genes")

In [None]:
birth_lifetime(bd, 0, color)

In [None]:
violin_lifetime(bd, 0, color, sublabel)

In [None]:
include_gene_expression = gene_sub[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Large Loop Cells, All Genes', 1)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "teal", "Large Loop Cells, Cell Cycle Genes")

In [None]:
birth_lifetime(bd, 1, color)

In [None]:
violin_lifetime(bd, 1, color, sublabel_cycle)

In [None]:
include_gene_expression = gene_sub_rand1[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Large Loop Cells, Random Genes_1', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "teal", "Large Loop Cells, Random Genes_1")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_1)

In [None]:
include_gene_expression = gene_sub_rand2[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Large Loop Cells, Random Genes_2', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "teal", "Large Loop Cells, Random Genes_2")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_2)

In [None]:
include_gene_expression = gene_sub_rand3[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Large Loop Cells, Random Genes_3', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "teal", "Large Loop Cells, Random Genes_3")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_3)

In [None]:
include_gene_expression = gene_sub_rand4[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Large Loop Cells, Random Genes_4', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "teal", "Large Loop Cells, Random Genes_4")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_4)

In [None]:
include_gene_expression = gene_sub_rand5[name_include]

In [None]:
visualize(include_gene_expression.values, 4, 'UMAP - Large Loop Cells, Random Genes_5', 2)

In [None]:
bd, color = persistance_diagram(include_gene_expression.values, "teal", "Large Loop Cells, Random Genes_5")

In [None]:
birth_lifetime(bd, 2, color)

In [None]:
violin_lifetime(bd, 2, color, sublabel_5)