In [1]:
# Import packages
import os, sys, glob, re, math, pickle
import phate, scprep, magic, meld
import graphtools as gt
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import time,random,datetime
import networkx as nx
import scvelo as scv
from sklearn import metrics
from sklearn import model_selection
from scipy import sparse
from scipy.stats import mannwhitneyu, tiecorrect, rankdata
from statsmodels.stats.multitest import multipletests
import scanpy as sc
from sklearn.dummy import DummyClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.cluster import SpectralClustering, OPTICS, cluster_optics_dbscan, AgglomerativeClustering
from bbknn import bbknn
import warnings
%matplotlib inline
%load_ext memory_profiler
import rpy2.rinterface_lib.callbacks
import logging

from rpy2.robjects import pandas2ri
import anndata2ri

# Ignore R warning messages
#Note: this can be commented out to get more verbose R output
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)

# Automatically convert rpy2 outputs to pandas dataframes
pandas2ri.activate()
anndata2ri.activate()
%load_ext rpy2.ipython
# settings
plt.rc('font', size = 8)
plt.rc('font', family='sans serif')
plt.rcParams['pdf.fonttype']=42
plt.rcParams['ps.fonttype']=42
plt.rcParams['text.usetex']=False
plt.rcParams['legend.frameon']=False
plt.rcParams['axes.grid']=False
plt.rcParams['legend.markerscale']=0.5
sc.set_figure_params(dpi=300,dpi_save=600,
                     frameon=False,
                     fontsize=8)
plt.rcParams['savefig.dpi']=600
sc.settings.verbosity=2
sc._settings.ScanpyConfig.n_jobs=-1

# reproducibility
rs = np.random.seed(42)

# utils
def mwu(X,Y,gene_names,correction=None,debug=False) :
    '''
    Benjamini-Hochberg correction implemented. Can change to Bonferonni
    gene_names (list)
    if X,Y single gene expression array, input x.reshape(-1,1), y.reshape(-1,1)
    NOTE: get zeros sometimes because difference (p-value is so small)
    '''
    p=pd.DataFrame()
    print('Mann-Whitney U w/Benjamini/Hochberg correction\n')
    start = time.time()
    for i,g in enumerate(gene_names) :
        if i==np.round(np.quantile(np.arange(len(gene_names)),0.25)) :
            print('... 25% completed in {:.2f}-s'.format(time.time()-start))
        elif i==np.round(np.quantile(np.arange(len(gene_names)),0.5)) :
            print('... 50% completed in {:.2f}-s'.format(time.time()-start))
        elif i==np.round(np.quantile(np.arange(len(gene_names)),0.75)) :
            print('... 75% completed in {:.2f}-s'.format(time.time()-start))
        p.loc[i,'Gene']=g
        if (tiecorrect(rankdata(np.concatenate((np.asarray(X[:,i]),np.asarray(Y[:,i])))))==0) :
            if debug :
                print('P-value not calculable for {}'.format(g))
            p.loc[i,'pval']=np.nan
        else :
            _,p.loc[i,'pval']=mannwhitneyu(X[:,i],Y[:,i]) # continuity correction is True
    print('... mwu computed in {:.2f}-s\n'.format(time.time() - start))
    # ignore NaNs, since can't do a comparison on these (change numbers for correction)
    p_corrected = p.loc[p['pval'].notna(),:]
    new_pvals = multipletests(p_corrected['pval'],method='fdr_bh')
    p_corrected['pval_corrected'] = new_pvals[1]
    return p_corrected

def log2aveFC(X,Y,gene_names,AnnData=None) :
    '''not sensitivity to directionality due to subtraction
    X and Y full arrays, subsetting performed here
    `gene_names` (list): reduced list of genes to calc
    `adata` (sc.AnnData): to calculate reduced list. NOTE: assumes X,Y drawn from adata.var_names
    '''
    if not AnnData is None :
        g_idx = [i for i,g in enumerate(AnnData.var_names) if g in gene_names]
        fc=pd.DataFrame({'Gene':AnnData.var_names[g_idx],
                         'log2FC':np.log2(X[:,g_idx].mean(axis=0)) - np.log2(Y[:,g_idx].mean(axis=0))}) # returns NaN if negative value 
    else :
        fc=pd.DataFrame({'Gene':gene_names,
                         'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
    return fc


# fps
dfp = '/home/cl2292/project/SBMA/26wk/data/'
pfp = '/home/cl2292/project/SBMA/26wk/results/'
pdfp = '/home/cl2292/project/SBMA/26wk/data/processed/'
sc.settings.figdir = pfp

In [2]:
# Load Data

if True :
    start = time.time()
    backed=None # None if not
    fname='220223_subcluster.h5ad' # for full, can maybe get away with ~300G
    %memit adata = sc.read_h5ad(os.path.join(pdfp,fname),backed=backed)
    print('loaded @'+datetime.datetime.now().strftime('%y%m%d.%H:%M:%S'))
    print('took {:.2f}-s to load data'.format(time.time()-start))

peak memory: 16016.39 MiB, increment: 15716.36 MiB
loaded @220224.13:01:40
took 36.13-s to load data


In [36]:
## # of cell types in each timepoint&genotype
print('WT 26wk\n\n',adata[(adata.obs['genotype']=='WT')&(adata.obs['timepoint']=='26wk'),:].obs['sub3'].value_counts(), '\n\n')
print('WT 52wk\n\n',adata[(adata.obs['genotype']=='WT')&(adata.obs['timepoint']=='52wk'),:].obs['sub3'].value_counts(), '\n\n')
print('AR 26wk\n\n',adata[(adata.obs['genotype']=='AR')&(adata.obs['timepoint']=='26wk'),:].obs['sub3'].value_counts(), '\n\n')
print('AR 52wk\n\n',adata[(adata.obs['genotype']=='AR')&(adata.obs['timepoint']=='52wk'),:].obs['sub3'].value_counts(), '\n\n')

WT 26wk

 OL     3050
NU     2512
AS2     927
MG      305
AS1     251
OPC     227
END     161
EP       48
MN       38
CN       20
PER      16
Name: sub3, dtype: int64 


WT 52wk

 OL     8140
NU     7778
AS2    3325
AS1     920
OPC     748
MG      738
END     358
EP       94
CN       75
PER      23
MN       18
Name: sub3, dtype: int64 


AR 26wk

 OL     2942
NU     2391
AS2     980
MG      251
OPC     224
AS1     217
END     118
EP       42
MN       24
CN       24
PER      17
Name: sub3, dtype: int64 


AR 52wk

 OL     9621
NU     8038
AS2    3607
AS1    1156
MG      968
OPC     777
END     377
EP      187
CN       80
MN       50
PER      21
Name: sub3, dtype: int64 




In [37]:
adata[(adata.obs['genotype']=='WT'),:].obs['timepoint'].value_counts()

52wk    22217
26wk     7555
Name: timepoint, dtype: int64

In [38]:
adata[(adata.obs['genotype']=='AR'),:].obs['timepoint'].value_counts()

52wk    24882
26wk     7230
Name: timepoint, dtype: int64

In [3]:
# Load Data

if True :
    start = time.time()
    backed=None # None if not
    fname='220224_WT_imp.h5ad' # for full, can maybe get away with ~300G
    %memit wt = sc.read_h5ad(os.path.join(pdfp,fname),backed=backed)
    print('loaded @'+datetime.datetime.now().strftime('%y%m%d.%H:%M:%S'))
    print('took {:.2f}-s to load data'.format(time.time()-start))
    
if True :
    start = time.time()
    backed=None # None if not
    fname='220224_SBMA_imp.h5ad' # for full, can maybe get away with ~300G
    %memit mut = sc.read_h5ad(os.path.join(pdfp,fname),backed=backed)
    print('loaded @'+datetime.datetime.now().strftime('%y%m%d.%H:%M:%S'))
    print('took {:.2f}-s to load data'.format(time.time()-start))

peak memory: 29748.84 MiB, increment: 13742.96 MiB
loaded @220224.13:02:15
took 34.65-s to load data
peak memory: 44572.57 MiB, increment: 14823.74 MiB
loaded @220224.13:02:51
took 35.65-s to load data


In [40]:
## EMD; SUB3, IMP

if True :
    dge_grandtotal = time.time()
    group='sub3'
    fname = 'imp' 
    dge = pd.DataFrame()
    for t in ['26wk','52wk'] :
        print('Evaluating {}'.format(t))
        t_total = time.time()
        dge_total = time.time()
        start_t=time.time()
        
        # up down dichotomy
        print('\n--------')
        print('...')
        print('--------\n')
#        dge = pd.DataFrame()
        for i in wt.obs[group].unique() :
            start = time.time()
            print('\n{}, WT vs SBMA'.format(i))
            print('----')
            X = wt[((wt.obs[group]==i) & (wt.obs['timepoint']==t)), :].layers['imputed']
            Y = mut[((mut.obs[group]==i) & (mut.obs['timepoint']==t)), :].layers['imputed']
            

            X = np.asarray(X)
            Y = np.asarray(Y)
        
            print('    Ncells in X:{}'.format(X.shape[0]))
            print('    Ncells in Y:{}\n'.format(Y.shape[0]))            
            
            emd = scprep.stats.differential_expression(X,Y,
                                                       measure = 'emd',
                                                       direction='both',
                                                       gene_names=adata.var_names,
                                                       n_jobs=-1)
            
            # mann-whitney u, corrected p-values
            p = mwu(X,Y,wt.var_names)
            emd['Gene']=emd.index
            emd=emd.drop(columns='rank')
            fc = log2aveFC(X,Y,adata.var_names.to_list())
            gene_mismatch = fc['Gene'].isin(p['Gene'])
            if gene_mismatch.any():
                fc = fc.loc[gene_mismatch,:]
                warnings.warn('Warning: {} genes dropped due to p-val NA.'.format((gene_mismatch==False).sum()))
            dt = pd.merge(p,fc,how='left',on="Gene")
            gene_mismatch = emd['Gene'].isin(p['Gene'])
            if gene_mismatch.any():
                emd = emd.loc[gene_mismatch,:]
            dt = pd.merge(dt,emd,how='left',on='Gene')
            dt['Cell type']=[i]*dt.shape[0]
            dt['timepoint']=[str(t)]*dt.shape[0]
            dt['nlog10pvalcorrected']=(-1)*np.log10(dt['pval_corrected'])
            dge = dge.append(dt, ignore_index=True)
            print('... computed in {:.2f}-s'.format(time.time()-start))
        print('\nFinished timepoint {} in {:.2f}-min'.format(t,(time.time()-start_t)/60))  
    dgeup = dge.loc[dge['emd']>0,:] # take only 'up' (switch for down)
    dgedown = dge.loc[dge['emd']<0,:] # take only 'down'
    dgeup.to_csv(os.path.join(pfp,'dge_'+fname+'_SBMA_down.csv'),index=False)
    dgedown.to_csv(os.path.join(pfp,'dge_'+fname+'_SBMA_up.csv'),index=False)
        

    print('DGE finished in {:.2f}-min'.format((time.time()-dge_grandtotal)/60))


Evaluating 26wk

--------
...
--------


OL, WT vs SBMA
----
    Ncells in X:3050
    Ncells in Y:2942

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 28.81-s
... 50% completed in 60.39-s
... 75% completed in 95.94-s
... mwu computed in 135.56-s

... computed in 156.29-s

AS2, WT vs SBMA
----
    Ncells in X:927
    Ncells in Y:980

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 20.04-s
... 50% completed in 43.16-s
... 75% completed in 70.35-s
... mwu computed in 101.53-s

... computed in 108.35-s

NU, WT vs SBMA
----
    Ncells in X:2512
    Ncells in Y:2391

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 27.74-s
... 50% completed in 58.60-s
... 75% completed in 93.40-s
... mwu computed in 132.43-s

... computed in 145.71-s

OPC, WT vs SBMA
----
    Ncells in X:227
    Ncells in Y:224

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 16.80-s
... 50% completed in 36.84-s
... 75% completed in 61.19-s
.

In [4]:
## EMD; SUB3, No IMP,

if True :
    dge_grandtotal = time.time()
    group='sub3'
    fname = 'no_imp' 
    dge = pd.DataFrame()
    for t in ['26wk','52wk'] :
        print('Evaluating {}'.format(t))
        t_total = time.time()
        dge_total = time.time()
        start_t=time.time()
        
        # up down dichotomy
        print('\n--------')
        print('...')
        print('--------\n')
#        dge = pd.DataFrame()
        for i in wt.obs[group].unique() :
            start = time.time()
            print('\n{}, WT vs SBMA'.format(i))
            print('----')
            X = wt.X[((wt.obs[group]==i) & (wt.obs['timepoint']==t)), :]
            Y = mut.X[((mut.obs[group]==i) & (mut.obs['timepoint']==t)), :]
            
            if sparse.issparse(X):
                X = X.todense()
            if sparse.issparse(Y):
                Y = Y.todense()
                
            X = np.asarray(X)
            Y = np.asarray(Y)
        
            print('    Ncells in X:{}'.format(X.shape[0]))
            print('    Ncells in Y:{}\n'.format(Y.shape[0]))            
            
            emd = scprep.stats.differential_expression(X,Y,
                                                       measure = 'emd',
                                                       direction='both',
                                                       gene_names=adata.var_names,
                                                       n_jobs=-1)
            
            # mann-whitney u, corrected p-values
            p = mwu(X,Y,adata.var_names)
            emd['Gene']=emd.index
            emd=emd.drop(columns='rank')
            fc = log2aveFC(X,Y,adata.var_names.to_list())
            gene_mismatch = fc['Gene'].isin(p['Gene'])
            if gene_mismatch.any():
                fc = fc.loc[gene_mismatch,:]
                warnings.warn('Warning: {} genes dropped due to p-val NA.'.format((gene_mismatch==False).sum()))
            dt = pd.merge(p,fc,how='left',on="Gene")
            gene_mismatch = emd['Gene'].isin(p['Gene'])
            if gene_mismatch.any():
                emd = emd.loc[gene_mismatch,:]
            dt = pd.merge(dt,emd,how='left',on='Gene')
            dt['Cell type']=[i]*dt.shape[0]
            dt['timepoint']=[str(t)]*dt.shape[0]
            dt['nlog10pvalcorrected']=(-1)*np.log10(dt['pval_corrected'])
            dge = dge.append(dt, ignore_index=True)
            print('... computed in {:.2f}-s'.format(time.time()-start))
        print('\nFinished timepoint {} in {:.2f}-min'.format(t,(time.time()-start_t)/60))  
    dgeup = dge.loc[dge['emd']>0,:] # take only 'up' (switch for down)
    dgedown = dge.loc[dge['emd']<0,:] # take only 'down'
    dgeup.to_csv(os.path.join(pfp,'dge_'+fname+'_SBMA_down.csv'),index=False)
    dgedown.to_csv(os.path.join(pfp,'dge_'+fname+'_SBMA_up.csv'),index=False)
        

    print('DGE finished in {:.2f}-min'.format((time.time()-dge_grandtotal)/60))


Evaluating 26wk

--------
...
--------


OL, WT vs SBMA
----
    Ncells in X:3050
    Ncells in Y:2942

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 21.38-s
... 50% completed in 45.54-s
... 75% completed in 73.86-s
... mwu computed in 106.32-s

... computed in 121.73-s

AS2, WT vs SBMA
----
    Ncells in X:927
    Ncells in Y:980

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 17.72-s
... 50% completed in 38.53-s
... 75% completed in 63.42-s
... mwu computed in 92.36-s

... computed in 97.92-s

NU, WT vs SBMA
----
    Ncells in X:2512
    Ncells in Y:2391

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 21.89-s
... 50% completed in 46.76-s
... 75% completed in 75.88-s
... mwu computed in 108.92-s

... computed in 118.20-s

OPC, WT vs SBMA
----
    Ncells in X:227
    Ncells in Y:224

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 15.86-s
... 50% completed in 34.81-s
... 75% completed in 58.02-s
...

In [5]:
## Sampling

wt26 = wt.obs.loc[wt.obs['timepoint']=='26wk', :].sample(n=7230, replace=False).index.to_list()
wt52 = wt.obs.loc[wt.obs['timepoint']=='52wk', :].sample(n=7230, replace=False).index.to_list()
sample_wt = wt[(wt.obs.index.isin(wt26)|wt.obs.index.isin(wt52)), :]

mut26 = mut.obs.loc[mut.obs['timepoint']=='26wk', :].sample(n=7230, replace=False).index.to_list()
mut52 = mut.obs.loc[mut.obs['timepoint']=='52wk', :].sample(n=7230, replace=False).index.to_list()
sample_mut = mut[(mut.obs.index.isin(mut26)|mut.obs.index.isin(mut52)), :]

# save data objects
sample_wt.write(os.path.join(dfp,'220224_WT_sampling.h5ad'))
sample_mut.write(os.path.join(dfp,'220224_SBMA_sampling.h5ad'))
print('saved @'+datetime.datetime.now().strftime('%y%m%d.%H:%M:%S'))

saved @220224.13:45:19


In [6]:
for i in sample_wt.obs['timepoint'].unique():
    wtt = sample_wt[sample_wt.obs['timepoint']==i,:]
    print("WT " + i)
    print(wtt.obs['sub3'].value_counts())
    print(" ")

WT 26wk
OL     2924
NU     2406
AS2     877
MG      293
AS1     239
OPC     220
END     156
EP       47
MN       35
CN       17
PER      16
Name: sub3, dtype: int64
 
WT 52wk
OL     2707
NU     2486
AS2    1054
AS1     325
OPC     251
MG      232
END     120
EP       28
CN       22
MN        3
PER       2
Name: sub3, dtype: int64
 


In [7]:
for i in sample_mut.obs['timepoint'].unique():
    mutt = sample_mut[sample_mut.obs['timepoint']==i,:]
    print("SBMA " + i)
    print(mutt.obs['sub3'].value_counts())
    print(" ")

SBMA 26wk
OL     2942
NU     2391
AS2     980
MG      251
OPC     224
AS1     217
END     118
EP       42
MN       24
CN       24
PER      17
Name: sub3, dtype: int64
 
SBMA 52wk
OL     2767
NU     2341
AS2    1109
AS1     351
MG      267
OPC     199
END     110
EP       53
CN       18
MN        9
PER       6
Name: sub3, dtype: int64
 


In [None]:
## EMD; SUB3, IMP, Sample
wt = sample_wt
mut = sample_mut
if True :
    dge_grandtotal = time.time()
    group='sub3'
    fname = 'imp_samp' 
    dge = pd.DataFrame()
    for t in ['26wk','52wk'] :
        print('Evaluating {}'.format(t))
        t_total = time.time()
        dge_total = time.time()
        start_t=time.time()
        
        # up down dichotomy
        print('\n--------')
        print('...')
        print('--------\n')
#        dge = pd.DataFrame()
        for i in wt.obs[group].unique() :
            start = time.time()
            print('\n{}, WT vs SBMA'.format(i))
            print('----')
            X = wt[((wt.obs[group]==i) & (wt.obs['timepoint']==t)), :].layers['imputed']
            Y = mut[((mut.obs[group]==i) & (mut.obs['timepoint']==t)), :].layers['imputed']
            

            X = np.asarray(X)
            Y = np.asarray(Y)
        
            print('    Ncells in X:{}'.format(X.shape[0]))
            print('    Ncells in Y:{}\n'.format(Y.shape[0]))            
            
            emd = scprep.stats.differential_expression(X,Y,
                                                       measure = 'emd',
                                                       direction='both',
                                                       gene_names=adata.var_names,
                                                       n_jobs=-1)
            
            # mann-whitney u, corrected p-values
            p = mwu(X,Y,adata.var_names)
            emd['Gene']=emd.index
            emd=emd.drop(columns='rank')
            fc = log2aveFC(X,Y,adata.var_names.to_list())
            gene_mismatch = fc['Gene'].isin(p['Gene'])
            if gene_mismatch.any():
                fc = fc.loc[gene_mismatch,:]
                warnings.warn('Warning: {} genes dropped due to p-val NA.'.format((gene_mismatch==False).sum()))
            dt = pd.merge(p,fc,how='left',on="Gene")
            gene_mismatch = emd['Gene'].isin(p['Gene'])
            if gene_mismatch.any():
                emd = emd.loc[gene_mismatch,:]
            dt = pd.merge(dt,emd,how='left',on='Gene')
            dt['Cell type']=[i]*dt.shape[0]
            dt['timepoint']=[str(t)]*dt.shape[0]
            dt['nlog10pvalcorrected']=(-1)*np.log10(dt['pval_corrected'])
            dge = dge.append(dt, ignore_index=True)
            print('... computed in {:.2f}-s'.format(time.time()-start))
        print('\nFinished timepoint {} in {:.2f}-min'.format(t,(time.time()-start_t)/60))  
    dgeup = dge.loc[dge['emd']>0,:] # take only 'up' (switch for down)
    dgedown = dge.loc[dge['emd']<0,:] # take only 'down'
    dgeup.to_csv(os.path.join(pfp,'dge_'+fname+'_SBMA_down.csv'),index=False)
    dgedown.to_csv(os.path.join(pfp,'dge_'+fname+'_SBMA_up.csv'),index=False)
        

    print('DGE finished in {:.2f}-min'.format((time.time()-dge_grandtotal)/60))


Evaluating 26wk

--------
...
--------


OL, WT vs SBMA
----
    Ncells in X:2924
    Ncells in Y:2942

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 28.83-s
... 50% completed in 60.96-s
... 75% completed in 97.55-s
... mwu computed in 138.76-s

... computed in 158.62-s

AS2, WT vs SBMA
----
    Ncells in X:877
    Ncells in Y:980

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 20.25-s
... 50% completed in 43.69-s
... 75% completed in 71.53-s
... mwu computed in 103.65-s

... computed in 110.84-s

NU, WT vs SBMA
----
    Ncells in X:2406
    Ncells in Y:2391

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 27.68-s
... 50% completed in 58.52-s
... 75% completed in 93.60-s
... mwu computed in 132.81-s

... computed in 146.39-s

OPC, WT vs SBMA
----
    Ncells in X:220
    Ncells in Y:224

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 16.92-s
... 50% completed in 37.03-s
... 75% completed in 61.30-s
.

In [None]:
## EMD; SUB3, No IMP,
wt = sample_wt
mut = sample_mut

if True :
    dge_grandtotal = time.time()
    group='sub3'
    fname = 'no_imp_samp' 
    dge = pd.DataFrame()
    for t in ['26wk','52wk'] :
        print('Evaluating {}'.format(t))
        t_total = time.time()
        dge_total = time.time()
        start_t=time.time()
        
        # up down dichotomy
        print('\n--------')
        print('...')
        print('--------\n')
#        dge = pd.DataFrame()
        for i in wt.obs[group].unique() :
            start = time.time()
            print('\n{}, WT vs SBMA'.format(i))
            print('----')
            X = wt.X[((wt.obs[group]==i) & (wt.obs['timepoint']==t)), :]
            Y = mut.X[((mut.obs[group]==i) & (mut.obs['timepoint']==t)), :]
            
            if sparse.issparse(X):
                X = X.todense()
            if sparse.issparse(Y):
                Y = Y.todense()
                
            X = np.asarray(X)
            Y = np.asarray(Y)
        
            print('    Ncells in X:{}'.format(X.shape[0]))
            print('    Ncells in Y:{}\n'.format(Y.shape[0]))            
            
            emd = scprep.stats.differential_expression(X,Y,
                                                       measure = 'emd',
                                                       direction='both',
                                                       gene_names=adata.var_names,
                                                       n_jobs=-1)
            
            # mann-whitney u, corrected p-values
            p = mwu(X,Y,adata.var_names)
            emd['Gene']=emd.index
            emd=emd.drop(columns='rank')
            fc = log2aveFC(X,Y,adata.var_names.to_list())
            gene_mismatch = fc['Gene'].isin(p['Gene'])
            if gene_mismatch.any():
                fc = fc.loc[gene_mismatch,:]
                warnings.warn('Warning: {} genes dropped due to p-val NA.'.format((gene_mismatch==False).sum()))
            dt = pd.merge(p,fc,how='left',on="Gene")
            gene_mismatch = emd['Gene'].isin(p['Gene'])
            if gene_mismatch.any():
                emd = emd.loc[gene_mismatch,:]
            dt = pd.merge(dt,emd,how='left',on='Gene')
            dt['Cell type']=[i]*dt.shape[0]
            dt['timepoint']=[str(t)]*dt.shape[0]
            dt['nlog10pvalcorrected']=(-1)*np.log10(dt['pval_corrected'])
            dge = dge.append(dt, ignore_index=True)
            print('... computed in {:.2f}-s'.format(time.time()-start))
        print('\nFinished timepoint {} in {:.2f}-min'.format(t,(time.time()-start_t)/60))  
    dgeup = dge.loc[dge['emd']>0,:] # take only 'up' (switch for down)
    dgedown = dge.loc[dge['emd']<0,:] # take only 'down'
    dgeup.to_csv(os.path.join(pfp,'dge_'+fname+'_SBMA_down.csv'),index=False)
    dgedown.to_csv(os.path.join(pfp,'dge_'+fname+'_SBMA_up.csv'),index=False)
        

    print('DGE finished in {:.2f}-min'.format((time.time()-dge_grandtotal)/60))
