In [4]:
import scanpy as sc
import pandas as pd
import random
import numpy as np
import matplotlib as plt
import gc 
import ctypes
import scvi



In [6]:
adata = sc.read_h5ad('../data/luca_query_reannotated.h5ad')

In [7]:
adata.obs

Unnamed: 0,sample,uicc_stage,ever_smoker,age,donor_id,origin,dataset,ann_fine,cell_type_predicted,doublet_status,...,IFN_TAMs,Reg_TAMs,Inflam_TAMs,LA_TAMs,Angio_TAMs,RTM_TAMs,Prolif_TAMs,Subtype,Projection_CellType,ident
001C_AAACCTGTCAACACCA-0,Adams_Kaminski_2020_001C,non-cancer,no,22.0,Adams_Kaminski_2020_001C,normal,Adams_Kaminski_2020,Macrophage alveolar,Macrophage,singlet,...,0.353933,0.571664,0.298638,0.541069,0.427279,0.565822,0.401695,Reg_TAMs,RTM_TAMs,local
001C_AAACGGGAGGCTCATT-0,Adams_Kaminski_2020_001C,non-cancer,no,22.0,Adams_Kaminski_2020_001C,normal,Adams_Kaminski_2020,Macrophage,Macrophage,singlet,...,0.477315,0.451494,0.370148,0.492239,0.537751,0.511095,0.348617,Angio_TAMs,LA_TAMs,local
001C_AAACGGGGTATAATGG-0,Adams_Kaminski_2020_001C,non-cancer,no,22.0,Adams_Kaminski_2020_001C,normal,Adams_Kaminski_2020,Macrophage alveolar,Macrophage,singlet,...,0.387389,0.497444,0.282365,0.403790,0.443018,0.548354,0.328568,RTM_TAMs,RTM_TAMs,local
001C_AAACGGGTCACAAACC-0,Adams_Kaminski_2020_001C,non-cancer,no,22.0,Adams_Kaminski_2020_001C,normal,Adams_Kaminski_2020,Macrophage,Macrophage,singlet,...,0.333177,0.592321,0.262999,0.594631,0.444907,0.450925,0.406192,LA_TAMs,LA_TAMs,local
001C_AAAGATGAGTGCTGCC-0,Adams_Kaminski_2020_001C,non-cancer,no,22.0,Adams_Kaminski_2020_001C,normal,Adams_Kaminski_2020,Macrophage alveolar,Macrophage,singlet,...,0.358754,0.648105,0.282604,0.482257,0.408499,0.454457,0.291259,Reg_TAMs,RTM_TAMs,local
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAAGCTGTCTA-1-38-8,Leader_Merad_2021_414,II,no,64.0,Leader_Merad_2021_729,tumor_primary,Leader_Merad_2021_10x_3p_v2_beads,Macrophage,,singlet,...,0.551482,0.640531,0.489101,0.623875,0.525708,0.514441,0.527691,Reg_TAMs,IFN_TAMs,local
TTTGTCAAGGATATAC-1-38-8,Leader_Merad_2021_414,II,no,64.0,Leader_Merad_2021_729,tumor_primary,Leader_Merad_2021_10x_3p_v2_beads,Macrophage,,singlet,...,0.492014,0.699073,0.539889,0.612688,0.608443,0.551273,0.460248,Reg_TAMs,Reg_TAMs,local
TTTGTCAAGTACGCGA-1-38-8,Leader_Merad_2021_414,II,no,64.0,Leader_Merad_2021_729,tumor_primary,Leader_Merad_2021_10x_3p_v2_beads,Macrophage,,singlet,...,0.470449,0.588994,0.611164,0.427515,0.471975,0.431026,0.455491,Inflam_TAMs,Inflam_TAMs,local
TTTGTCACATCTATGG-1-38-8,Leader_Merad_2021_414,II,no,64.0,Leader_Merad_2021_729,tumor_primary,Leader_Merad_2021_10x_3p_v2_beads,Macrophage,,singlet,...,0.515349,0.589936,0.633147,0.577088,0.550079,0.487074,0.425062,Inflam_TAMs,LA_TAMs,local


In [8]:
adata.obs['Projection_CellType'].value_counts()


Projection_CellType
RTM_TAMs       94074
LA_TAMs        32428
Reg_TAMs       17933
IFN_TAMs       12215
Inflam_TAMs     6484
Angio_TAMs      5741
Int.Node.3      4221
Prolif_TAMs     3771
Int.Node.4       922
Int.Node.5       150
Name: count, dtype: int64

In [9]:
from pydeseq2.dds import DeseqDataSet
from pydeseq2.ds import DeseqStats

In [10]:
import gc 
import ctypes

In [6]:
def make_pseudobulk(adata, sample_colname, cell_type_colname, metacols):
    '''
    To create an an data with pseudo-bulk gene expression profiles for the samples and cell_types given.
    The function also carries metadata columns from the obs. metacols: as a list of col names.
    This is how to use:
        ps_adata = make_pseudobulk(hlca, sample_colname = 'sample', cell_type_colname = 'cell_type', metacols= ['patient', 'disease', 'tissue', 'study', 'sex'])
    '''
    ps_list = []
    for s in adata.obs[sample_colname].unique():
        adata_sub = adata[adata.obs[sample_colname] == s]
        
        for ct in adata_sub.obs[cell_type_colname].unique():
            adata_sub_ct = adata_sub[adata_sub.obs[cell_type_colname] == ct]
            
            adata_rep = sc.AnnData(X = adata_sub_ct.X.sum(axis = 0),
                                   var = adata_sub_ct.var[[]])
            samp_name=adata_sub.obs[sample_colname].iloc[0]
            cell= adata_sub_ct.obs[cell_type_colname].iloc[0]
            adata_rep.obs_names = [f"{samp_name}{cell}"]
            adata_rep.obs[cell_type_colname] = adata_sub_ct.obs[cell_type_colname].iloc[0]
            adata_rep.obs[metacols] = adata_sub_ct.obs[metacols].iloc[0]
            ps_list.append(adata_rep)

    ps_adata = sc.concat(ps_list)
    
    return ps_adata


In [None]:
def make_pseudobulk2(adata, sample_colname, cell_type_colname, metacols):
    '''
    Creates an AnnData object with pseudo-bulk gene expression profiles 
    for the samples and cell types specified. Also adds the number of cells 
    used per pseudobulk as "n_cells" in .obs.

    Parameters:
        adata: AnnData
        sample_colname: str
        cell_type_colname: str
        metacols: list of str
    
    Returns:
        AnnData with pseudobulked profiles and metadata.
    '''
    ps_list = []
    for s in adata.obs[sample_colname].unique():
        adata_sub = adata[adata.obs[sample_colname] == s]
        
        for ct in adata_sub.obs[cell_type_colname].unique():
            adata_sub_ct = adata_sub[adata_sub.obs[cell_type_colname] == ct]
            
            # Count how many cells are in this group
            n_cells = adata_sub_ct.shape[0]
            
            adata_rep = sc.AnnData(X=adata_sub_ct.X.sum(axis=0),
                                   var=adata_sub_ct.var[[]])
            
            samp_name = adata_sub.obs[sample_colname].iloc[0]
            cell = adata_sub_ct.obs[cell_type_colname].iloc[0]
            adata_rep.obs_names = [f"{samp_name}_{cell}"]
            
            # Fill in metadata and number of cells
            adata_rep.obs[cell_type_colname] = cell
            adata_rep.obs[metacols] = adata_sub_ct.obs[metacols].iloc[0]
            adata_rep.obs["n_cells"] = n_cells
            
            ps_list.append(adata_rep)

    ps_adata = sc.concat(ps_list)
    ps_adata.obs["n_cells"] = ps_adata.obs["n_cells"].astype(int)
    ps_adata.obs['log2_n_cells'] = np.log2(ps_adata.obs['n_cells'])

    return ps_adata


In [2]:
import scanpy as sc
import numpy as np
import random

def make_pseudobulk3(adata, sample_colname, cell_type_colname, metacols, min_cells=40, max_cells=50, random_state=0):
    '''
    Creates an AnnData object with pseudo-bulk gene expression profiles 
    for the samples and cell types specified. Also adds the number of cells 
    used per pseudobulk as "n_cells" in .obs.

    Parameters:
        adata: AnnData
        sample_colname: str
        cell_type_colname: str
        metacols: list of str
        min_cells: int, minimum number of cells required to include a group
        max_cells: int, maximum number of cells to include per group (randomly sampled if exceeded)
        random_state: int, for reproducibility of random sampling

    Returns:
        AnnData with pseudobulked profiles and metadata.
    '''
    ps_list = []
    rng = np.random.default_rng(random_state)

    for s in adata.obs[sample_colname].unique():
        adata_sub = adata[adata.obs[sample_colname] == s]
        
        for ct in adata_sub.obs[cell_type_colname].unique():
            adata_sub_ct = adata_sub[adata_sub.obs[cell_type_colname] == ct]
            n_cells_total = adata_sub_ct.shape[0]

            # Skip if too few cells
            if n_cells_total < min_cells:
                continue

            # Downsample if too many
            if n_cells_total > max_cells:
                selected_idx = rng.choice(adata_sub_ct.shape[0], size=max_cells, replace=False)
                adata_sub_ct = adata_sub_ct[selected_idx]
            else:
                selected_idx = slice(None)  # Use all

            n_cells = adata_sub_ct.shape[0]

            adata_rep = sc.AnnData(X=adata_sub_ct.X.sum(axis=0),
                                   var=adata_sub_ct.var[[]])
            
            samp_name = adata_sub.obs[sample_colname].iloc[0]
            cell = adata_sub_ct.obs[cell_type_colname].iloc[0]
            adata_rep.obs_names = [f"{samp_name}_{cell}"]
            
            # Fill in metadata and number of cells
            adata_rep.obs[cell_type_colname] = cell
            adata_rep.obs[metacols] = adata_sub_ct.obs[metacols].iloc[0]
            adata_rep.obs["n_cells"] = n_cells
            
            ps_list.append(adata_rep)

    ps_adata = sc.concat(ps_list)
    ps_adata.obs["n_cells"] = ps_adata.obs["n_cells"].astype(int)
    ps_adata.obs['log2_n_cells'] = np.log2(ps_adata.obs['n_cells']).astype(float)

    return ps_adata


In [11]:
import pandas as pd
import scipy.sparse

def savePBdata2(adata, filename):
    # Convert counts to dense matrix if it's sparse
    if scipy.sparse.issparse(adata.X):
        counts = adata.X.toarray()
    else:
        counts = adata.X

    # Convert to DataFrame
    counts_df = pd.DataFrame(counts, index=adata.obs_names, columns=adata.var_names)

    # (Optional) Convert to integer
    counts_df = counts_df.astype(int, errors='ignore')

    # Save AnnData object
    adata.write(filename)
    
    # Save counts as CSV for inspection (optional)
    counts_df.to_csv(filename.replace(".h5ad", "_counts.csv"))

    print(f"Saved AnnData to {filename} and count matrix to CSV.")


In [8]:
ps_adata_macs = make_pseudobulk(adata, sample_colname = 'sample', cell_type_colname = 'Projection_CellType', metacols= ['assay', 'donor_id', 'disease', 'tissue', 'study', 'sex', 'age', 'uicc_stage', 'tumor_stage'])



In [25]:
ps_adata_macs = make_pseudobulk2(adata, sample_colname = 'sample', cell_type_colname = 'Projection_CellType', metacols= ['assay', 'donor_id', 'disease', 'tissue', 'study', 'sex', 'age', 'uicc_stage', 'tumor_stage'])



In [12]:
#Pseudo-bulking with random down sampling once. min_cells=40, max_cells=50
ps_adata_macs = make_pseudobulk3(adata, sample_colname = 'sample', cell_type_colname = 'Projection_CellType', metacols= ['assay', 'donor_id', 'disease', 'tissue', 'study', 'sex', 'age', 'uicc_stage', 'tumor_stage'])



In [13]:
ps_adata_macs.obs.shape

(613, 12)

In [14]:
ps_adata_macs.obs

Unnamed: 0,Projection_CellType,assay,donor_id,disease,tissue,study,sex,age,uicc_stage,tumor_stage,n_cells,log2_n_cells
Adams_Kaminski_2020_001C_RTM_TAMs,RTM_TAMs,10x 3' v2,Adams_Kaminski_2020_001C,normal,lung,Adams_Kaminski_2020,male,22.0,non-cancer,non-cancer,50,5.643856
Adams_Kaminski_2020_001C_LA_TAMs,LA_TAMs,10x 3' v2,Adams_Kaminski_2020_001C,normal,lung,Adams_Kaminski_2020,male,22.0,non-cancer,non-cancer,50,5.643856
Adams_Kaminski_2020_001C_Int.Node.3,Int.Node.3,10x 3' v2,Adams_Kaminski_2020_001C,normal,lung,Adams_Kaminski_2020,male,22.0,non-cancer,non-cancer,45,5.491853
Adams_Kaminski_2020_002C_RTM_TAMs,RTM_TAMs,10x 3' v2,Adams_Kaminski_2020_002C,normal,lung,Adams_Kaminski_2020,female,25.0,non-cancer,non-cancer,50,5.643856
Adams_Kaminski_2020_002C_LA_TAMs,LA_TAMs,10x 3' v2,Adams_Kaminski_2020_002C,normal,lung,Adams_Kaminski_2020,female,25.0,non-cancer,non-cancer,50,5.643856
...,...,...,...,...,...,...,...,...,...,...,...,...
Leader_Merad_2021_414_RTM_TAMs,RTM_TAMs,10x 3' v2,Leader_Merad_2021_729,lung adenocarcinoma,lung,Leader_Merad_2021,female,64.0,II,early,50,5.643856
Leader_Merad_2021_414_Angio_TAMs,Angio_TAMs,10x 3' v2,Leader_Merad_2021_729,lung adenocarcinoma,lung,Leader_Merad_2021,female,64.0,II,early,50,5.643856
Leader_Merad_2021_414_IFN_TAMs,IFN_TAMs,10x 3' v2,Leader_Merad_2021_729,lung adenocarcinoma,lung,Leader_Merad_2021,female,64.0,II,early,50,5.643856
Leader_Merad_2021_414_Reg_TAMs,Reg_TAMs,10x 3' v2,Leader_Merad_2021_729,lung adenocarcinoma,lung,Leader_Merad_2021,female,64.0,II,early,50,5.643856


In [15]:
savePBdata2(ps_adata_macs, filename="../data/ps_adata_macs.h5ad")


Saved AnnData to ../data/ps_adata_macs.h5ad and count matrix to CSV.


In [16]:
with open('../data/Homo_sapiens.GRCh38.104.gtf') as f:
    gtf = list(f)

#prep the gtf file
gtf = [x for x in gtf if not x.startswith('#')]
gtf = [x for x in gtf if 'gene_id "' in x and 'gene_name "' in x]
gtf = list(map(lambda x: (x.split('gene_id "')[1].split('"')[0], x.split('gene_name "')[1].split('"')[0]), gtf))

In [17]:
ctyps = ps_adata_macs.obs['Projection_CellType'][~ps_adata_macs.obs['Projection_CellType'].isin(['Int.Node.3', 'Int.Node.4', 'Int.Node.5'])].unique()
ctyps

['RTM_TAMs', 'LA_TAMs', 'Reg_TAMs', 'IFN_TAMs', 'Angio_TAMs', 'Inflam_TAMs', 'Prolif_TAMs']
Categories (9, object): ['Angio_TAMs', 'IFN_TAMs', 'Inflam_TAMs', 'Int.Node.3', ..., 'LA_TAMs', 'Prolif_TAMs', 'RTM_TAMs', 'Reg_TAMs']

In [18]:
#sub_canc = ps_adata_macs[ps_adata_macs.obs['cell_type'] == 'malignant cell' ]
df = pd.DataFrame()

for c in ctyps:
    sub_c = ps_adata_macs[ps_adata_macs.obs['Projection_CellType'] == c ]
    sub_c.obs['Contrast'] = c
    pbs = [sub_c]
    print(c)
    sub_ct = ps_adata_macs[ps_adata_macs.obs['Projection_CellType'] != c ]
    sub_ct.obs['Contrast'] = 'others'
    pbs.append(sub_ct)
    pb = sc.concat(pbs)
    print(pb)
    counts= pd.DataFrame(pb.X, columns = pb.var_names)
    counts = counts.astype(int, errors='ignore')
    #To get rid off all the 0s in count matrix (to prevent problems)
    counts=counts+1
    dds = DeseqDataSet(counts = counts, metadata=pb.obs, design_factors = ["assay", "Contrast"])
    sc.pp.filter_genes(dds, min_cells = 10)
    dds.deseq2()
    stat_res = DeseqStats(dds, contrast=('Contrast', c, 'others'))
    stat_res.summary()
    de  = stat_res.results_df
    de = de.sort_values('log2FoldChange', ascending = False)
        
    #get rownames aka gene symbols
    row_names = de.index.tolist()
    #row_names
    #add rownames as a new column 
    de['gene_symbols'] = row_names
    gtf=dict(gtf)
    de['gene_name'] = de['gene_symbols'].map(gtf)
    de = de[(de['padj'] < 0.05) & (de['log2FoldChange'] > 1.0) ]
    de['SubType'] = c
    de['inContrastTo'] = 'others'
    df = pd.concat([df, de])
    #save deg list
    df.to_csv('../results/macs_PB-DEGs.csv', index=True)

  sub_c.obs['Contrast'] = c
  sub_ct.obs['Contrast'] = 'others'


RTM_TAMs
AnnData object with n_obs × n_vars = 613 × 17811
    obs: 'Projection_CellType', 'assay', 'donor_id', 'disease', 'tissue', 'study', 'sex', 'age', 'uicc_stage', 'tumor_stage', 'n_cells', 'log2_n_cells', 'Contrast'


  dds = DeseqDataSet(counts = counts, metadata=pb.obs, design_factors = ["assay", "Contrast"])
Fitting size factors...


Using None as control genes, passed at DeseqDataSet initialization


... done in 0.55 seconds.

  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
 

Log2 fold change & Wald test p-value: Contrast RTM_TAMs vs others
                  baseMean  log2FoldChange     lfcSE       stat        pvalue  \
ENSG00000121410   8.899818        0.008443  0.074765   0.112923  9.100918e-01   
ENSG00000268895   2.051674        0.125820  0.096400   1.305189  1.918284e-01   
ENSG00000175899  42.494688       -2.087932  0.131767 -15.845585  1.508115e-56   
ENSG00000245105   1.312772       -0.016022  0.122799  -0.130470  8.961948e-01   
ENSG00000166535   1.299172       -0.114902  0.125016  -0.919098  3.580442e-01   
...                    ...             ...       ...        ...           ...   
ENSG00000070476   4.463204        0.369784  0.106934   3.458053  5.440938e-04   
ENSG00000203995   1.254236       -0.100240  0.125890  -0.796254  4.258846e-01   
ENSG00000162378   8.232734        0.658906  0.062900  10.475521  1.119197e-25   
ENSG00000159840  37.619983        0.497643  0.063200   7.874045  3.433551e-15   
ENSG00000074755   8.015019        0.208859 

  sub_c.obs['Contrast'] = c
  sub_ct.obs['Contrast'] = 'others'


LA_TAMs
AnnData object with n_obs × n_vars = 613 × 17811
    obs: 'Projection_CellType', 'assay', 'donor_id', 'disease', 'tissue', 'study', 'sex', 'age', 'uicc_stage', 'tumor_stage', 'n_cells', 'log2_n_cells', 'Contrast'


  dds = DeseqDataSet(counts = counts, metadata=pb.obs, design_factors = ["assay", "Contrast"])
Fitting size factors...


Using None as control genes, passed at DeseqDataSet initialization


... done in 0.56 seconds.

Fitting dispersions...
... done in 5.14 seconds.

Fitting dispersion trend curve...
  self._fit_parametric_dispersion_trend(vst)
... done in 0.27 seconds.

Fitting MAP dispersions...
... done in 4.74 seconds.

Fitting LFCs...
... done in 5.86 seconds.

Calculating cook's distance...
... done in 0.94 seconds.

Replacing 17 outlier genes.

Fitting dispersions...
... done in 0.07 seconds.

Fitting MAP dispersions...
... done in 0.03 seconds.

Fitting LFCs...
... done in 0.03 seconds.

Running Wald tests...
... done in 4.88 seconds.

  sub_c.obs['Contrast'] = c
  sub_ct.obs['Contrast'] = 'others'


Log2 fold change & Wald test p-value: Contrast LA_TAMs vs others
                  baseMean  log2FoldChange     lfcSE      stat        pvalue  \
ENSG00000121410   8.899818        0.272048  0.077084  3.529264  4.167172e-04   
ENSG00000268895   2.051674       -0.106182  0.102824 -1.032651  3.017674e-01   
ENSG00000175899  42.494688        0.857696  0.139569  6.145306  7.980942e-10   
ENSG00000245105   1.312772        0.013001  0.125083  0.103942  9.172151e-01   
ENSG00000166535   1.299172        0.023827  0.123691  0.192632  8.472470e-01   
...                    ...             ...       ...       ...           ...   
ENSG00000070476   4.463204       -0.029312  0.112871 -0.259697  7.950974e-01   
ENSG00000203995   1.254236        0.016742  0.125596  0.133303  8.939538e-01   
ENSG00000162378   8.232734       -0.061495  0.072585 -0.847209  3.968785e-01   
ENSG00000159840  37.619983        0.073926  0.066669  1.108841  2.674990e-01   
ENSG00000074755   8.015019        0.072485  0.098270  0

  dds = DeseqDataSet(counts = counts, metadata=pb.obs, design_factors = ["assay", "Contrast"])
Fitting size factors...


Using None as control genes, passed at DeseqDataSet initialization


... done in 0.55 seconds.

Fitting dispersions...
... done in 4.75 seconds.

Fitting dispersion trend curve...
  self._fit_parametric_dispersion_trend(vst)
... done in 0.27 seconds.

Fitting MAP dispersions...
... done in 5.05 seconds.

Fitting LFCs...
... done in 5.98 seconds.

Calculating cook's distance...
... done in 0.96 seconds.

Replacing 16 outlier genes.

Fitting dispersions...
... done in 0.03 seconds.

Fitting MAP dispersions...
... done in 0.03 seconds.

Fitting LFCs...
... done in 0.04 seconds.

Running Wald tests...
... done in 4.87 seconds.

  sub_c.obs['Contrast'] = c
  sub_ct.obs['Contrast'] = 'others'


Log2 fold change & Wald test p-value: Contrast Reg_TAMs vs others
                  baseMean  log2FoldChange     lfcSE      stat        pvalue  \
ENSG00000121410   8.899818       -0.031434  0.081043 -0.387873  6.981102e-01   
ENSG00000268895   2.051674        0.144226  0.101759  1.417335  1.563850e-01   
ENSG00000175899  42.494688        1.131906  0.138613  8.165973  3.188537e-16   
ENSG00000245105   1.312772        0.064588  0.127735  0.505637  6.131117e-01   
ENSG00000166535   1.299172       -0.065587  0.129757 -0.505458  6.132372e-01   
...                    ...             ...       ...       ...           ...   
ENSG00000070476   4.463204       -0.288935  0.116783 -2.474119  1.335653e-02   
ENSG00000203995   1.254236       -0.123394  0.133255 -0.926001  3.544452e-01   
ENSG00000162378   8.232734       -0.370068  0.075217 -4.920014  8.653823e-07   
ENSG00000159840  37.619983       -0.356773  0.067272 -5.303405  1.136622e-07   
ENSG00000074755   8.015019       -0.247818  0.101308 -

  dds = DeseqDataSet(counts = counts, metadata=pb.obs, design_factors = ["assay", "Contrast"])
Fitting size factors...


Using None as control genes, passed at DeseqDataSet initialization


... done in 0.55 seconds.

Fitting dispersions...
... done in 4.71 seconds.

Fitting dispersion trend curve...
  self._fit_parametric_dispersion_trend(vst)
... done in 0.27 seconds.

Fitting MAP dispersions...
... done in 4.74 seconds.

Fitting LFCs...
... done in 6.10 seconds.

Calculating cook's distance...
... done in 0.96 seconds.

Replacing 23 outlier genes.

Fitting dispersions...
... done in 0.05 seconds.

Fitting MAP dispersions...
... done in 0.05 seconds.

Fitting LFCs...
... done in 0.06 seconds.

Running Wald tests...
... done in 4.97 seconds.

  sub_c.obs['Contrast'] = c
  sub_ct.obs['Contrast'] = 'others'


Log2 fold change & Wald test p-value: Contrast IFN_TAMs vs others
                  baseMean  log2FoldChange     lfcSE      stat    pvalue  \
ENSG00000121410   8.899818       -0.205220  0.108082 -1.898749  0.057598   
ENSG00000268895   2.051674        0.057733  0.135752  0.425284  0.670630   
ENSG00000175899  42.494688       -0.233446  0.191738 -1.217526  0.223404   
ENSG00000245105   1.312772        0.013542  0.170169  0.079582  0.936570   
ENSG00000166535   1.299172       -0.075338  0.177046 -0.425527  0.670452   
...                    ...             ...       ...       ...       ...   
ENSG00000070476   4.463204       -0.390637  0.157514 -2.480008  0.013138   
ENSG00000203995   1.254236       -0.115303  0.180158 -0.640010  0.522166   
ENSG00000162378   8.232734       -0.438157  0.102312 -4.282549  0.000018   
ENSG00000159840  37.619983       -0.002833  0.089387 -0.031695  0.974715   
ENSG00000074755   8.015019       -0.452625  0.136981 -3.304290  0.000952   

                     

  dds = DeseqDataSet(counts = counts, metadata=pb.obs, design_factors = ["assay", "Contrast"])
Fitting size factors...


Using None as control genes, passed at DeseqDataSet initialization


... done in 0.53 seconds.

Fitting dispersions...
... done in 5.07 seconds.

Fitting dispersion trend curve...
  self._fit_parametric_dispersion_trend(vst)
... done in 0.27 seconds.

Fitting MAP dispersions...
... done in 4.74 seconds.

Fitting LFCs...
... done in 5.74 seconds.

Calculating cook's distance...
... done in 0.96 seconds.

Replacing 16 outlier genes.

Fitting dispersions...
... done in 0.04 seconds.

Fitting MAP dispersions...
... done in 0.04 seconds.

Fitting LFCs...
... done in 0.03 seconds.

Running Wald tests...
... done in 4.79 seconds.

  sub_c.obs['Contrast'] = c
  sub_ct.obs['Contrast'] = 'others'


Log2 fold change & Wald test p-value: Contrast Angio_TAMs vs others
                  baseMean  log2FoldChange     lfcSE      stat        pvalue  \
ENSG00000121410   8.899818       -0.014996  0.185931 -0.080655  9.357161e-01   
ENSG00000268895   2.051674       -0.373952  0.277992 -1.345191  1.785635e-01   
ENSG00000175899  42.494688       -2.119796  0.360039 -5.887679  3.916565e-09   
ENSG00000245105   1.312772       -0.114169  0.324557 -0.351770  7.250111e-01   
ENSG00000166535   1.299172        0.170287  0.304473  0.559285  5.759669e-01   
...                    ...             ...       ...       ...           ...   
ENSG00000070476   4.463204       -0.074680  0.277009 -0.269593  7.874736e-01   
ENSG00000203995   1.254236        0.310252  0.293438  1.057301  2.903743e-01   
ENSG00000162378   8.232734       -0.204744  0.182612 -1.121197  2.622039e-01   
ENSG00000159840  37.619983       -0.203860  0.164357 -1.240344  2.148481e-01   
ENSG00000074755   8.015019        0.060182  0.239265

  dds = DeseqDataSet(counts = counts, metadata=pb.obs, design_factors = ["assay", "Contrast"])
Fitting size factors...


Using None as control genes, passed at DeseqDataSet initialization


... done in 0.55 seconds.

Fitting dispersions...
... done in 4.96 seconds.

Fitting dispersion trend curve...
  self._fit_parametric_dispersion_trend(vst)
... done in 0.27 seconds.

Fitting MAP dispersions...
... done in 4.74 seconds.

Fitting LFCs...
... done in 5.91 seconds.

Calculating cook's distance...
... done in 0.96 seconds.

Replacing 15 outlier genes.

Fitting dispersions...
... done in 0.05 seconds.

Fitting MAP dispersions...
... done in 0.03 seconds.

Fitting LFCs...
... done in 0.03 seconds.

Running Wald tests...
... done in 5.01 seconds.

  sub_c.obs['Contrast'] = c
  sub_ct.obs['Contrast'] = 'others'


Log2 fold change & Wald test p-value: Contrast Inflam_TAMs vs others
                  baseMean  log2FoldChange     lfcSE      stat    pvalue  \
ENSG00000121410   8.899818       -0.005922  0.136795 -0.043294  0.965467   
ENSG00000268895   2.051674        0.104013  0.175980  0.591047  0.554489   
ENSG00000175899  42.494688       -0.299017  0.251738 -1.187813  0.234907   
ENSG00000245105   1.312772       -0.154469  0.237573 -0.650195  0.515566   
ENSG00000166535   1.299172       -0.099833  0.237490 -0.420369  0.674216   
...                    ...             ...       ...       ...       ...   
ENSG00000070476   4.463204       -0.563720  0.211865 -2.660751  0.007797   
ENSG00000203995   1.254236       -0.156244  0.243236 -0.642355  0.520643   
ENSG00000162378   8.232734       -0.396156  0.135582 -2.921899  0.003479   
ENSG00000159840  37.619983        0.037311  0.116963  0.319003  0.749724   
ENSG00000074755   8.015019       -0.480585  0.181025 -2.654804  0.007935   

                  

  dds = DeseqDataSet(counts = counts, metadata=pb.obs, design_factors = ["assay", "Contrast"])
Fitting size factors...


Using None as control genes, passed at DeseqDataSet initialization


... done in 0.55 seconds.

Fitting dispersions...
... done in 5.02 seconds.

Fitting dispersion trend curve...
  self._fit_parametric_dispersion_trend(vst)
... done in 0.27 seconds.

Fitting MAP dispersions...
... done in 5.04 seconds.

Fitting LFCs...
... done in 6.28 seconds.

Calculating cook's distance...
... done in 0.97 seconds.

Replacing 17 outlier genes.

Fitting dispersions...
... done in 0.03 seconds.

Fitting MAP dispersions...
... done in 0.05 seconds.

Fitting LFCs...
... done in 0.05 seconds.

Running Wald tests...


Log2 fold change & Wald test p-value: Contrast Prolif_TAMs vs others
                  baseMean  log2FoldChange     lfcSE      stat    pvalue  \
ENSG00000121410   8.899818       -0.503848  0.205633 -2.450224  0.014277   
ENSG00000268895   2.051674       -0.205029  0.271770 -0.754420  0.450597   
ENSG00000175899  42.494688       -1.719012  0.370766 -4.636382  0.000004   
ENSG00000245105   1.312772        0.103912  0.319890  0.324836  0.745305   
ENSG00000166535   1.299172       -0.051154  0.350714 -0.145856  0.884035   
...                    ...             ...       ...       ...       ...   
ENSG00000070476   4.463204       -0.608320  0.313775 -1.938715  0.052536   
ENSG00000203995   1.254236        0.047520  0.342024  0.138937  0.889500   
ENSG00000162378   8.232734       -0.141577  0.190036 -0.745001  0.456271   
ENSG00000159840  37.619983       -0.562453  0.171196 -3.285425  0.001018   
ENSG00000074755   8.015019       -0.667045  0.268499 -2.484347  0.012979   

                  

... done in 5.17 seconds.

