In [2]:
# Import packages
import os, sys, glob, re, math, pickle
import phate, scprep, magic
import graphtools as gt
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import time,random,datetime
import networkx as nx
#import scvelo as scv
from sklearn import metrics
from sklearn import model_selection
from scipy import sparse
from scipy.stats import mannwhitneyu, tiecorrect, rankdata
from statsmodels.stats.multitest import multipletests
import scanpy as sc
from sklearn.dummy import DummyClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.cluster import SpectralClustering, OPTICS, cluster_optics_dbscan, AgglomerativeClustering
from bbknn import bbknn
import warnings
%matplotlib inline
%load_ext memory_profiler

# settings
plt.rc('font', size = 8)
plt.rc('font', family='sans serif')
plt.rcParams['pdf.fonttype']=42
plt.rcParams['ps.fonttype']=42
plt.rcParams['text.usetex']=False
plt.rcParams['legend.frameon']=False
plt.rcParams['axes.grid']=False
plt.rcParams['legend.markerscale']=0.5
sc.set_figure_params(dpi=300,dpi_save=600,
                     frameon=False,
                     fontsize=8)
plt.rcParams['savefig.dpi']=600
sc.settings.verbosity=2
sc._settings.ScanpyConfig.n_jobs=-1

# reproducibility
rs = np.random.seed(42)

# utils
def mwu(X,Y,gene_names,correction=None,debug=False) :
    '''
    Benjamini-Hochberg correction implemented. Can change to Bonferonni
    gene_names (list)
    if X,Y single gene expression array, input x.reshape(-1,1), y.reshape(-1,1)
    NOTE: get zeros sometimes because difference (p-value is so small)
    '''
    p=pd.DataFrame()
    print('Mann-Whitney U w/Benjamini/Hochberg correction\n')
    start = time.time()
    for i,g in enumerate(gene_names) :
        if i==np.round(np.quantile(np.arange(len(gene_names)),0.25)) :
            print('... 25% completed in {:.2f}-s'.format(time.time()-start))
        elif i==np.round(np.quantile(np.arange(len(gene_names)),0.5)) :
            print('... 50% completed in {:.2f}-s'.format(time.time()-start))
        elif i==np.round(np.quantile(np.arange(len(gene_names)),0.75)) :
            print('... 75% completed in {:.2f}-s'.format(time.time()-start))
        p.loc[i,'Gene']=g
        if (tiecorrect(rankdata(np.concatenate((np.asarray(X[:,i]),np.asarray(Y[:,i])))))==0) :
            if debug :
                print('P-value not calculable for {}'.format(g))
            p.loc[i,'pval']=np.nan
        else :
            _,p.loc[i,'pval']=mannwhitneyu(X[:,i],Y[:,i]) # continuity correction is True
    print('... mwu computed in {:.2f}-s\n'.format(time.time() - start))
    # ignore NaNs, since can't do a comparison on these (change numbers for correction)
    p_corrected = p.loc[p['pval'].notna(),:]
    new_pvals = multipletests(p_corrected['pval'],method='fdr_bh')
    p_corrected['pval_corrected'] = new_pvals[1]
    return p_corrected

def log2aveFC(X,Y,gene_names,AnnData=None) :
    '''not sensitivity to directionality due to subtraction
    X and Y full arrays, subsetting performed here
    `gene_names` (list): reduced list of genes to calc
    `adata` (sc.AnnData): to calculate reduced list. NOTE: assumes X,Y drawn from adata.var_names
    '''
    if not AnnData is None :
        g_idx = [i for i,g in enumerate(AnnData.var_names) if g in gene_names]
        fc=pd.DataFrame({'Gene':AnnData.var_names[g_idx],
                         'log2FC':np.log2(X[:,g_idx].mean(axis=0)) - np.log2(Y[:,g_idx].mean(axis=0))}) # returns NaN if negative value 
    else :
        fc=pd.DataFrame({'Gene':gene_names,
                         'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
    return fc


# fps
dfp = '/home/cl2292/project/SBMA/data/'
pfp = '/home/cl2292/project/SBMA/results/'
pdfp = '/home/cl2292/project/SBMA/data/processed/'
sc.settings.figdir = pfp

In [3]:
# Load Data

if True :
    start = time.time()
    backed=None # None if not
    fname='230201_ctype.h5ad' # for full, can maybe get away with ~300G
    %memit adata = sc.read_h5ad(os.path.join(pdfp,fname),backed=backed)
    print('loaded @'+datetime.datetime.now().strftime('%y%m%d.%H:%M:%S'))
    print('took {:.2f}-s to load data'.format(time.time()-start))

peak memory: 2627.67 MiB, increment: 2347.10 MiB
loaded @230201.10:45:50
took 4.32-s to load data


In [4]:
# Load Data

if True :
    start = time.time()
    backed=None # None if not
    fname='230201_WT_imp.h5ad' # for full, can maybe get away with ~300G
    %memit wt = sc.read_h5ad(os.path.join(pdfp,fname),backed=backed)
    print('loaded @'+datetime.datetime.now().strftime('%y%m%d.%H:%M:%S'))
    print('took {:.2f}-s to load data'.format(time.time()-start))
    
if True :
    start = time.time()
    backed=None # None if not
    fname='230201_SBMA_imp.h5ad' # for full, can maybe get away with ~300G
    %memit mut = sc.read_h5ad(os.path.join(pdfp,fname),backed=backed)
    print('loaded @'+datetime.datetime.now().strftime('%y%m%d.%H:%M:%S'))
    print('took {:.2f}-s to load data'.format(time.time()-start))

peak memory: 9628.80 MiB, increment: 7001.12 MiB
loaded @230201.10:45:58
took 8.05-s to load data
peak memory: 17075.84 MiB, increment: 7447.03 MiB
loaded @230201.10:46:06
took 8.59-s to load data


In [5]:
## EMD; cell type, IMP

if True :
    dge_grandtotal = time.time()
    group='ctype'
    fname = 'ctype_imp' 
    dge = pd.DataFrame()
    for t in ['26wk','52wk'] :
        print('Evaluating {}'.format(t))
        t_total = time.time()
        dge_total = time.time()
        start_t=time.time()
        
        # up down dichotomy
        print('\n--------')
        print('...')
        print('--------\n')
#        dge = pd.DataFrame()
        for i in wt.obs[group].unique() :
            start = time.time()
            print('\n{}, WT vs SBMA'.format(i))
            print('----')
            X = wt[((wt.obs[group]==i) & (wt.obs['timepoint']==t)), :].layers['imputed']
            Y = mut[((mut.obs[group]==i) & (mut.obs['timepoint']==t)), :].layers['imputed']
            

            X = np.asarray(X)
            Y = np.asarray(Y)
        
            print('    Ncells in X:{}'.format(X.shape[0]))
            print('    Ncells in Y:{}\n'.format(Y.shape[0]))            
            
            emd = scprep.stats.differential_expression(X,Y,
                                                       measure = 'emd',
                                                       direction='both',
                                                       gene_names=adata.var_names,
                                                       n_jobs=-1)
            
            # mann-whitney u, corrected p-values
            p = mwu(X,Y,wt.var_names)
            emd['Gene']=emd.index
            emd=emd.drop(columns='rank')
            fc = log2aveFC(X,Y,adata.var_names.to_list())
            gene_mismatch = fc['Gene'].isin(p['Gene'])
            if gene_mismatch.any():
                fc = fc.loc[gene_mismatch,:]
                warnings.warn('Warning: {} genes dropped due to p-val NA.'.format((gene_mismatch==False).sum()))
            dt = pd.merge(p,fc,how='left',on="Gene")
            gene_mismatch = emd['Gene'].isin(p['Gene'])
            if gene_mismatch.any():
                emd = emd.loc[gene_mismatch,:]
            dt = pd.merge(dt,emd,how='left',on='Gene')
            dt['Cell type']=[i]*dt.shape[0]
            dt['timepoint']=[str(t)]*dt.shape[0]
            dt['nlog10pvalcorrected']=(-1)*np.log10(dt['pval_corrected'])
            dge = dge.append(dt, ignore_index=True)
            print('... computed in {:.2f}-s'.format(time.time()-start))
        print('\nFinished timepoint {} in {:.2f}-min'.format(t,(time.time()-start_t)/60))  
    dgeup = dge.loc[dge['emd']>0,:] # take only 'up' (switch for down)
    dgedown = dge.loc[dge['emd']<0,:] # take only 'down'
    dgeup.to_csv(os.path.join(pfp,'230201_dge_'+fname+'_SBMA_down.csv'),index=False)
    dgedown.to_csv(os.path.join(pfp,'230201_dge_'+fname+'_SBMA_up.csv'),index=False)
        

    print('DGE finished in {:.2f}-min'.format((time.time()-dge_grandtotal)/60))

Evaluating 26wk

--------
...
--------


MN_chat, WT vs SBMA
----
    Ncells in X:19
    Ncells in Y:23

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 19.86-s
... 50% completed in 42.22-s
... 75% completed in 67.86-s
... mwu computed in 96.32-s

... computed in 102.12-s

NU, WT vs SBMA
----


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})


    Ncells in X:1947
    Ncells in Y:1972

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 43.28-s
... 50% completed in 89.42-s
... 75% completed in 139.05-s
... mwu computed in 191.67-s



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  result = getattr(ufunc, method)(*inputs, **kwargs)


... computed in 202.26-s

EP, WT vs SBMA
----
    Ncells in X:41
    Ncells in Y:38

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 19.17-s
... 50% completed in 40.99-s
... 75% completed in 66.44-s
... mwu computed in 95.09-s

... computed in 98.44-s

AS, WT vs SBMA
----


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})


    Ncells in X:1032
    Ncells in Y:1035

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 29.72-s
... 50% completed in 62.15-s
... 75% completed in 98.07-s
... mwu computed in 136.99-s

... computed in 143.14-s

OPC, WT vs SBMA
----


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  result = getattr(ufunc, method)(*inputs, **kwargs)


    Ncells in X:214
    Ncells in Y:222

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 21.32-s
... 50% completed in 45.25-s
... 75% completed in 72.68-s
... mwu computed in 103.03-s

... computed in 106.53-s

MEN, WT vs SBMA
----
    Ncells in X:58
    Ncells in Y:49



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})


Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 19.16-s
... 50% completed in 41.18-s
... 75% completed in 66.82-s
... mwu computed in 95.37-s

... computed in 98.58-s

UNID, WT vs SBMA
----
    Ncells in X:130
    Ncells in Y:125



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})


Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 20.15-s
... 50% completed in 42.86-s
... 75% completed in 69.33-s
... mwu computed in 98.69-s

... computed in 101.91-s

OL, WT vs SBMA
----


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})


    Ncells in X:2922
    Ncells in Y:2938

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 51.70-s
... 50% completed in 105.37-s
... 75% completed in 162.47-s
... mwu computed in 222.68-s



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  result = getattr(ufunc, method)(*inputs, **kwargs)


... computed in 235.97-s

PER, WT vs SBMA
----
    Ncells in X:77
    Ncells in Y:66

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 19.62-s
... 50% completed in 41.86-s
... 75% completed in 67.76-s
... mwu computed in 96.43-s

... computed in 99.85-s

MG, WT vs SBMA
----
    Ncells in X:271
    Ncells in Y:227



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})


Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 20.21-s
... 50% completed in 43.03-s
... 75% completed in 69.58-s
... mwu computed in 99.00-s

... computed in 102.66-s

Finished timepoint 26wk in 21.53-min
Evaluating 52wk

--------
...
--------


MN_chat, WT vs SBMA
----
    Ncells in X:77
    Ncells in Y:81



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})


Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 20.77-s
... 50% completed in 44.19-s
... 75% completed in 71.22-s
... mwu computed in 101.02-s

... computed in 104.17-s

NU, WT vs SBMA
----


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})


    Ncells in X:7436
    Ncells in Y:7799

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 121.54-s
... 50% completed in 244.94-s
... 75% completed in 372.38-s
... mwu computed in 503.20-s



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  result = getattr(ufunc, method)(*inputs, **kwargs)


... computed in 539.07-s

EP, WT vs SBMA
----
    Ncells in X:138
    Ncells in Y:161

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 20.19-s
... 50% completed in 42.94-s
... 75% completed in 69.35-s
... mwu computed in 98.72-s

... computed in 103.95-s

AS, WT vs SBMA
----


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})


    Ncells in X:4024
    Ncells in Y:4165

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 67.82-s
... 50% completed in 138.20-s
... 75% completed in 212.01-s
... mwu computed in 289.20-s



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  result = getattr(ufunc, method)(*inputs, **kwargs)


... computed in 307.97-s

OPC, WT vs SBMA
----
    Ncells in X:746
    Ncells in Y:747

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 27.26-s
... 50% completed in 57.17-s
... 75% completed in 90.91-s
... mwu computed in 127.40-s

... computed in 133.32-s

MEN, WT vs SBMA
----


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})


    Ncells in X:37
    Ncells in Y:94

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 19.81-s
... 50% completed in 42.24-s
... 75% completed in 68.38-s
... mwu computed in 97.30-s

... computed in 100.42-s

UNID, WT vs SBMA
----


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})


    Ncells in X:310
    Ncells in Y:233

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 21.72-s
... 50% completed in 45.90-s
... 75% completed in 74.02-s
... mwu computed in 104.98-s

... computed in 108.69-s

OL, WT vs SBMA
----


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})


    Ncells in X:8080
    Ncells in Y:9111

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 118.80-s
... 50% completed in 238.78-s
... 75% completed in 362.62-s
... mwu computed in 489.55-s



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  result = getattr(ufunc, method)(*inputs, **kwargs)


... computed in 524.08-s

PER, WT vs SBMA
----
    Ncells in X:215
    Ncells in Y:185

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 21.50-s
... 50% completed in 45.68-s
... 75% completed in 73.69-s
... mwu computed in 104.54-s

... computed in 109.97-s

MG, WT vs SBMA
----


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})


    Ncells in X:727
    Ncells in Y:951

Mann-Whitney U w/Benjamini/Hochberg correction

... 25% completed in 24.82-s
... 50% completed in 52.10-s
... 75% completed in 83.45-s
... mwu computed in 117.56-s



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_corrected['pval_corrected'] = new_pvals[1]
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  'log2FC':np.log2(X.mean(axis=0)) - np.log2(Y.mean(axis=0))})
  result = getattr(ufunc, method)(*inputs, **kwargs)


... computed in 123.44-s

Finished timepoint 52wk in 35.92-min
DGE finished in 57.51-min
