In [None]:
import pandas as pd
import numpy as np

import plotly.express as px
import phate

from sklearn.manifold import TSNE
import umap
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA


import scFates as scf
import anndata

In [None]:
exif_df = pd.read_csv(r'./Cellprofiler outputs/full_exif_cells.csv')
td_exif_df = pd.read_csv(r'./Cellprofiler outputs/td_exif_cells.csv')

In [2]:
def embed(df, reducer, features):
    scaler = StandardScaler()
    scaled_virtual = scaler.fit_transform(df[features])

    pca = PCA(n_components = 'mle')
    pca_virtual = pca.fit_transform(scaled_virtual)

    embedding = pd.DataFrame(reducer.fit_transform(pca_virtual), columns=['x', 'y'])
    embedding = embedding.join(df.reset_index())
    embedding.condition = embedding.condition.astype(str)

    return embedding

In [4]:
general_marker_features = []
variable_predicted_features = []

for x in exif_df.columns:
    if x.startswith('Intensity') or x.startswith('RadialDistribution') or x.startswith('Texture') :
        if ('DAPI' in x) or ('Bcatenin' in x) or ('Phalloidin' in x):
            general_marker_features.append(x)
        else:
            variable_predicted_features.append(x)

In [None]:
# reducer = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=10)
# reducer = umap.UMAP()
reducer = phate.PHATE(n_components=2, knn=5, decay=40)
embedded_common = embed(exif_df, reducer, general_marker_features)

px.scatter(
    embedded_common,
    x='x',
    y='y',
    color='condition'

)

In [None]:
td_common_features = []
td_var_features = []

for x in td_exif_df.columns:
    if x.startswith('Intensity') or x.startswith('RadialDistribution') or x.startswith('Texture') :
        if ('DAPI' not in x) and ('Bcatenin' not in x) and ('Phalloidin' not in x):
            td_var_features.append(x)

        if 'pred' in x:
            td_common_features.append(x)

# reducer = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=10)
# reducer = umap.UMAP()
reducer = phate.PHATE(n_components=2, knn=5, decay=40)
embedded_td_exif = embed(td_exif_df, reducer, td_common_features + td_var_features)

px.scatter(
    embedded_td_exif,
    x='x',
    y='y',
    color='condition'
)

In [None]:
# reducer = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=10)
# reducer = umap.UMAP()
reducer = phate.PHATE(n_components=2, knn=5, decay=40)
embedded_exif = embed(exif_df, reducer, general_marker_features + variable_predicted_features)

px.scatter(
    embedded_exif,
    x='x',
    y='y',
    color='condition'
)

In [10]:
def run_pseudotime(df):
    # Assign pre-calcuated manifold to annotated dataframe
    df['condition'] = df['condition'].astype('uint8')
    adata = anndata.AnnData(df[['x', 'y', 'condition']])
    adata.raw=adata

    adata.obsm['X_phate'] = np.array(df[['x', 'y']])
    # adata.obsm['X_phatecommon'] = np.array(phate_common[['x', 'y']])
    # adata.obsm['X_tdphate'] = np.array(td_phate_virtual[['x', 'y']])

    scf.tl.curve(adata,Nodes=25,use_rep="X_phate",ndims_rep=2)
    scf.tl.convert_to_soft(adata,0.001,1000, n_steps=1)
    scf.pl.graph(adata,basis="phate")
    scf.tl.root(adata,"condition")
    scf.tl.pseudotime(adata,n_jobs=20,n_map=100,seed=42)

    scf.pl.trajectory(adata,basis="phate",arrows=True,arrow_offset=3)

    df['ptime'] = np.array(adata.uns['pseudotime_list']['1']['t'])
    

In [None]:
run_pseudotime(embedded_common)
run_pseudotime(embedded_td_exif)
run_pseudotime(embedded_exif)


In [18]:
def melt_expression(df, feature='Intensity_MeanIntensity', suffix=''):
    markers = ['Bcatenin', 'DAPI', 'Phalloidin', 'CD44', 'CD44std', 'CD44v9', 'Ecadherin', 'EpCAM', 'Ncadherin', 'PTEN', 'Vimentin']
    tmp = df[[
        'x',
        'y',
        'condition',
        'ptime'] + [f'{feature}_{x}{suffix}' for x in markers]
    ]
    
    for col in [f'{feature}_{x}{suffix}' for x in markers]:
        tmp[col] = (tmp[col] - tmp[col].mean()) / tmp[col].std()    

    melt = tmp.melt(id_vars=['x', 'y', 'condition', 'ptime'])

    melt['pseudotime'] = melt['ptime'].round(decimals=3)
    return melt

In [None]:
melt_embedded_common= melt_expression(embedded_common).groupby(['pseudotime', 'variable']).agg(np.median).reset_index()

fig = px.line(
    melt_embedded_common,
    x='pseudotime',
    y='value',
    color='variable'
)

fig.show()

In [None]:
# all cell/labels (3 common markers here are purely experimentally labelled)

melt_embedded_exif= melt_expression(embedded_exif).groupby(['pseudotime', 'variable']).agg(np.median).reset_index()

fig = px.line(
    melt_embedded_exif,
    x='pseudotime',
    y='value',
    color='variable'
)

fig.show()

In [None]:
# only experimental labels (variable markers)

# import dataset with experimental labelling (in a subset of cells)
datapath = r'./Cellprofiler outputs/test_set_features.csv'
cell_df = pd.read_csv(datapath)

# join to the exif dataset using shared randomly chosen real columns as a key 
real_labels = cell_df[(cell_df['Metadata_inputs'] == 'TD_DAPI_phalloidin_Bcatenin')]
virtual_with_reals = embedded_exif.merge(real_labels[['Texture_Variance_Phalloidin_3_00_256', 'Intensity_MeanIntensity_real']], on=['Texture_Variance_Phalloidin_3_00_256'])

virtual_with_reals = virtual_with_reals[['Metadata_marker', 'ptime', 'Intensity_MeanIntensity_real']]
virtual_with_reals['pseudotime'] = virtual_with_reals['ptime'].round(decimals=3)

virtual_with_reals['Intensity_MeanIntensity_real'] = virtual_with_reals.groupby(['Metadata_marker']).transform(lambda x: (x-x.mean()) / x.std())['Intensity_MeanIntensity_real']
virtual_with_reals = virtual_with_reals.groupby(['pseudotime', 'Metadata_marker']).agg(np.median).reset_index()

fig = px.line(
    virtual_with_reals,
    x='pseudotime',
    y='Intensity_MeanIntensity_real',
    color='Metadata_marker'
)

fig.show()
