In [None]:
# example-landau-fit-for-bool-ode-data.ipynb
#
# Bryan Daniels
# 2023/10/25
#
# Example of running the "Landau" data analysis code (found 
# here: https://github.com/Collective-Logic-Lab/landau )
# on data produced by BoolODE.
#
# Much of this code was forked from what I wrote for the
# Spring 2023 grant proposal, found in 
# Control-of-Synthetic-Boolean-Networks/code/BoolODE_data.ipynb
#

# load BoolODE data

In [13]:
import pandas as pd
import numpy as np

In [11]:
# read expression data into pandas DataFrame
dataDir = '../Bool ODE outputs/pRogress_4/'
expression = pd.read_csv('{}/ExpressionData.csv'.format(dataDir),
                         index_col=0).T

In [10]:
# look at the DataFrame to see what we're working with
# (here 1000 cells with 5 gene expressions each)
expression

Unnamed: 0,g1,g2,g3,g4,g5
E0_237,2.364032,1.818055,0.000512,0.035562,0.003712
E1_233,1.916502,1.499494,0.000728,0.003854,0.072445
E2_186,1.467022,2.211753,0.038807,0.018330,0.016627
E3_438,2.222070,2.299621,0.012180,0.015372,0.001413
E4_268,1.980141,1.762307,0.014171,0.021510,0.007766
...,...,...,...,...,...
E995_60,1.688257,0.018741,0.015908,0.109149,0.039773
E996_206,2.288173,1.770626,0.002179,0.057767,0.015837
E997_234,2.071044,1.774852,0.001091,0.003413,0.012631
E998_11,0.561608,0.001382,0.519313,0.848467,0.466121


In the BoolODE data, we know the timepoint at which each cell was sampled.  (In real data, we won't know this, but we'll deal with this fact later using RNA velocity.)  In BoolODE, the relevant time is known as the "pseudotime" (I think).  We load these times here from the file "PseudoTime.csv":

In [19]:
times_raw = pd.read_csv('{}/PseudoTime.csv'.format(dataDir),
                       index_col=0)
# keep just the 'PseudoTime' column and just the rows corresponding 
# to the sampled cells (those in expression.index)
times = times_raw['PseudoTime'].loc[expression.index]

In [20]:
times

E0_237      0.473896
E1_233      0.465863
E2_186      0.371486
E3_438      0.877510
E4_268      0.536145
              ...   
E995_60     0.118474
E996_206    0.411647
E997_234    0.467871
E998_11     0.020080
E999_499    1.000000
Name: PseudoTime, Length: 1000, dtype: float64

# do landau analysis to highlight potential control nodes

In [1]:
from landau import landauAnalysis

For the analysis, we want a population of cells that all have similar pseudotime.  So we will pick a time window that includes enough cells — let's say roughly 100 cells.

In [None]:
# do landau analysis on a series of time windows

delta_t = 0.05
Nwindows = 100
landauDataList = []
cells_to_use_list = []
t_min_list = np.linspace(0,1,Nwindows)

for t_min in t_min_list:
    # restrict to varying time window
    t_max = t_min+delta_t
    cells_to_use = (times >= t_min) & (times < t_max)
    cells_to_use_list.append(cells_to_use)
    
    landauData = landauAnalysis.landauAnalysis(expression[cells_to_use])
    landauDataList.append(landauData)