# LIANA tumor vs normal core atlas v1

## 00. Libraries

In [4]:
import numpy as np
import pandas as pd
import scanpy as sc

#import plotnine as p9

#import liana as li
import decoupler as dc
#import omnipath as op

# Import DESeq2
from pydeseq2.dds import DeseqDataSet
from pydeseq2.ds import DeseqStats

In [5]:
adata = sc.read_h5ad(
    "/data/projects/2022/CRCA/results/v1/final/h5ads/paired_tumor-adata.h5ad"
)

In [6]:
sample_key = 'sample_id'
groupby = 'cell_type_fine'
condition_key = 'sample_type'

## 01. Pseudobulk

In [7]:
pdata = dc.get_pseudobulk(
    adata,
    sample_col=sample_key,
    groups_col=groupby,
    layer='counts',
    mode='sum',
    min_cells=10,
    min_counts=10000
)
pdata

AnnData object with n_obs × n_vars = 6907 × 28135
    obs: 'medical_condition', 'cancer_type', 'sample_id', 'sample_type', 'sample_tissue', 'anatomic_region', 'anatomic_location', 'tumor_stage', 'tumor_stage_TNM', 'tumor_stage_TNM_T', 'tumor_stage_TNM_N', 'tumor_stage_TNM_M', 'tumor_size', 'tumor_dimensions', 'tumor_grade', 'histological_type', 'microsatellite_status', 'mismatch_repair_deficiency_status', 'MLH1_promoter_methylation_status', 'MLH1_status', 'PIK3CA_status', 'SMAD4_status', 'NRAS_status', 'MSH6_status', 'FBXW7_status', 'NOTCH1_status', 'MSH2_status', 'PMS2_status', 'POLE_status', 'ERBB2_status', 'STK11_status', 'HER2_status', 'CTNNB1_status', 'BRAS_status', 'patient_id', 'sex', 'age', 'ethnicity', 'treatment_status_before_resection', 'treatment_drug', 'treatment_response', 'RECIST', 'matrix_type', 'tissue_dissociation', 'tissue_processing_lab', 'hospital_location', 'country', 'NCBI_BioProject_accession', 'SOLO_doublet_status', 'cell_type_coarse', 'cell_type_fine', 'psbulk

## 02. Subset the adata to remove the cell types that do not have both tumor and normal 

In [None]:
# First, group the data by 'cell_type_fine' and check which cell types have both 'tumor' and 'normal'
valid_cell_types = []

# Iterate over unique cell types
for cell_type in adata.obs['cell_type_fine'].unique():
    # Subset the data for the current cell type
    subset = adata[adata.obs['cell_type_fine'] == cell_type]
    
    # Check if both 'tumor' and 'normal' are present
    if all(x in subset.obs['sample_type'].values for x in ['tumor', 'normal']):
        valid_cell_types.append(cell_type)

# Subset the AnnData object to keep only the valid cell types
adata_subset = adata[adata.obs['cell_type_fine'].isin(valid_cell_types)].copy()

## 03. PYDESEQ2

In [42]:
adata = adata_subset

In [49]:
dea_results = {}
quiet = True

for cell_group in pdata.obs[groupby].unique():
    try:
        # Select cell profiles
        ctdata = pdata[pdata.obs[groupby] == cell_group].copy()

        # Obtain genes that pass the edgeR-like thresholds
        # NOTE: QC thresholds might differ between cell types, consider applying them by cell type
        genes = dc.filter_by_expr(ctdata,
                                  group=condition_key,
                                  min_count=5, # a minimum number of counts in a number of samples
                                  min_total_count=10 # a minimum total number of reads across samples
                                  )

        # Filter by these genes
        ctdata = ctdata[:, genes].copy()

        # Build DESeq2 object
        # NOTE: this data is actually paired, so one could consider fitting the patient label as a confounder
        dds = DeseqDataSet(
            adata=ctdata,
            counts=pdata.X + 1,
            metadata=pdata.obs,
            design_factors="sample_type",
            ref_level=["sample_type", 'normal'], # set control as reference
            refit_cooks=True,
            quiet=quiet
        )

        # Compute LFCs
        dds.deseq2()
        # Contrast between stim and ctrl
        stat_res = DeseqStats(dds, contrast=["sample-type", 'tumor', 'normal'], quiet=quiet)
        stat_res.quiet = quiet
        # Compute Wald test
        stat_res.summary()
        # Shrink LFCs
        stat_res.lfc_shrink(coeff='sample-type_tumor_vs_normal') # {condition_key}_cond_vs_ref

        dea_results[cell_group] = stat_res.results_df

    except Exception as e:
        print(f"An error occurred for cell_type_fine '{cell_group}': {e}")
        continue


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.02 seconds.

Fitting MAP dispersions...
... done in 0.02 seconds.

Fitting LFCs...
... done in 0.02 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
         baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
A1BG    10.115952       -0.397364  0.129769 -3.494330  0.000475  0.002824
AAGAB    5.653157        0.307447  0.120828  3.014928  0.002570  0.011026
AAMP    10.228345        0.142425  0.073479  2.129223  0.033236  0.082092
AARS1    3.602263        0.404288  0.122620  3.702105  0.000214  0.001484
AARSD1   6.179178        0.526740  0.182165  3.463734  0.000533  0.003069
...           ...             ...       ...       ...       ...       ...
ZXDC     4.214497        0.176858  0.162822  1.619661  0.105305  0.198635
ZYG11B   4.173594        0.227674  0.129534  2.204152  0.027514  0.070634
ZYX      8.133982       -0.126846  0.127308 -1.312831  0.189240  0.306178
ZZEF1    7.972100        0.186147  0.120949  1.931891  0.053373  0.118138
ZZZ3     7.253166        0.078211  0.135322  0.793821  0.427300  0.556253

[6585 rows x 6 columns]


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.01 seconds.

Fitting MAP dispersions...
... done in 0.01 seconds.

Fitting LFCs...
... done in 0.01 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
          baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
AATF      4.193323        0.092994  0.099528  1.610583  0.107271  0.406204
ABCF1     6.055411        0.118523  0.189233  2.096653  0.036024  0.254859
ABHD14B  10.021402       -0.113541  0.143275 -1.837997  0.066063  0.338427
ABI1      6.827472        0.060646  0.093546  1.164917  0.244053  0.589294
ABI3      6.009743       -0.066815  0.122861 -1.349917  0.177043  0.523650
...            ...             ...       ...       ...       ...       ...
ZNF791    6.951296        0.023291  0.099741  0.517074  0.605104  0.852961
ZNF800    4.032408       -0.052592  0.105976 -1.055790  0.291064  0.637815
ZNHIT1    6.338959        0.024398  0.074259  0.506133  0.612763  0.858670
ZNHIT3    4.652724       -0.136912  0.112078 -2.050716  0.040295  0.271355
ZRANB2    9.219034        0.021414  0.088843  0.451157  0.651876  0.880016

[2357 rows x 6 columns]


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.01 seconds.

Fitting MAP dispersions...
... done in 0.01 seconds.

Fitting LFCs...
... done in 0.01 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
          baseMean  log2FoldChange     lfcSE      stat        pvalue  \
AAMP      5.565646        0.232225  0.097713  2.533372  1.129709e-02   
AATF      6.089680        0.521171  0.122282  4.532162  5.838304e-06   
ABCB4     6.029107       -0.165513  0.129575 -1.415630  1.568837e-01   
ABCE1     6.658915        0.010913  0.095388  0.120352  9.042046e-01   
ABCF1    11.566777        1.159514  0.170346  7.050489  1.782902e-12   
...            ...             ...       ...       ...           ...   
ZNHIT3    6.717982       -0.165991  0.126606 -1.461071  1.439960e-01   
ZPR1      5.814931        0.399271  0.124524  3.460538  5.390970e-04   
ZRANB2   15.733565        0.127158  0.107547  1.277738  2.013417e-01   
ZRSR2     6.396979        0.790647  0.149064  5.614567  1.970548e-08   
ZSCAN18   4.998955        0.327918  0.115650  3.047279  2.309234e-03   

                 padj  
AAMP     2.522054e-02  
AATF     3.339

  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
Fitting dispersions...
... done in 0.02 seconds.

Fitting MAP dispersions...
... done in 0.02 seconds.

Fitting LFCs...
... done in 0.02 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
           baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
A1BG      23.619584       -0.421721  0.130795 -3.614538  0.000301  0.001635
A1BG-AS1   4.179555       -0.130336  0.151689 -1.189921  0.234077  0.359625
AAAS       8.668737       -0.195549  0.094806 -2.289665  0.022041  0.056894
AACS       5.368224       -0.037794  0.103989 -0.432461  0.665407  0.765978
AAGAB     18.671321        0.084296  0.090372  1.147974  0.250979  0.378259
...             ...             ...       ...       ...       ...       ...
ZXDC      10.878822        0.073479  0.140813  0.697434  0.485531  0.615162
ZYG11B    11.249994       -0.262868  0.114634 -2.655371  0.007922  0.024968
ZYX       41.423200       -0.019070  0.103285 -0.124702  0.900759  0.936945
ZZEF1     27.234138        0.137214  0.095925  1.616513  0.105983  0.195937
ZZZ3      19.607347       -0.210861  0.113794 -2.111061  0.034767  0.081411

[9192 rows x 6

  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
Fitting dispersions...
... done in 0.06 seconds.

Fitting MAP dispersions...
... done in 0.05 seconds.

Fitting LFCs...
... done in 0.07 seconds.

  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))


Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
         baseMean  log2FoldChange     lfcSE      stat    pvalue     padj
A1BG     4.535857   -6.832085e-07  0.003468 -0.446687  0.655101  0.99898
AAAS     6.278274    1.445992e-06  0.004438  0.710205  0.477577  0.99898
AAGAB    9.011990    6.556205e-07  0.004431  0.421971  0.673046  0.99898
AAK1    47.174157   -9.994939e-07  0.003815 -0.337818  0.735501  0.99898
AAMDC    5.851753    4.219353e-07  0.004162  0.301496  0.763036  0.99898
...           ...             ...       ...       ...       ...      ...
ZXDC     2.876576    8.202610e-07  0.005356  1.052768  0.292447  0.99898
ZYG11B   3.753308   -8.188299e-07  0.003737 -0.608614  0.542780  0.99898
ZYX     29.889419    2.700025e-06  0.005035  1.545292  0.122276  0.99898
ZZEF1    7.237230   -1.007673e-06  0.003952 -0.623604  0.532888  0.99898
ZZZ3     5.266747    9.488074e-07  0.004657  0.733290  0.463381  0.99898

[9296 rows x 6 columns]


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
Fitting dispersions...
... done in 0.03 seconds.

Fitting MAP dispersions...
... done in 0.03 seconds.

Fitting LFCs...
... done in 0.11 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
           baseMean  log2FoldChange     lfcSE      stat        pvalue  \
A1BG      41.818845       -0.691642  0.134558 -5.481015  4.228915e-08   
A1BG-AS1   8.224383       -0.488564  0.141914 -3.834362  1.258907e-04   
A2M        8.823571        0.390706  0.229568  2.140494  3.231488e-02   
A2M-AS1    8.949980        0.334497  0.197691  2.117937  3.418037e-02   
AAAS      13.586782        0.052667  0.090888  0.565587  5.716745e-01   
...             ...             ...       ...       ...           ...   
ZXDC      16.001711       -0.009842  0.133925 -0.081681  9.349004e-01   
ZYG11B     9.536412       -0.175674  0.133867 -1.515977  1.295253e-01   
ZYX       89.054732       -0.216390  0.098271 -2.499083  1.245153e-02   
ZZEF1     34.874862        0.107308  0.089159  1.212944  2.251511e-01   
ZZZ3      26.397629       -0.185749  0.111203 -1.540645  1.234033e-01   

                  padj  
A1BG      4.752517e-07  


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.01 seconds.

Fitting MAP dispersions...
... done in 0.01 seconds.

Fitting LFCs...
... done in 0.01 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
              baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
AAK1         13.994181       -0.043511  0.170929 -0.424000  0.671566  0.872989
AAMP          6.889637        0.062125  0.163284  0.625313  0.531766  0.805467
AATF          7.349466        0.004439  0.212424  0.046521  0.962895  0.990462
ABCE1         4.657733       -0.061625  0.185665 -0.634564  0.525713  0.803409
ABCF1         9.339632       -0.044152  0.226654 -0.509317  0.610530  0.845961
...                ...             ...       ...       ...       ...       ...
ZRANB2       10.264655       -0.102968  0.170574 -1.018627  0.308380  0.640808
ZSCAN16-AS1   4.092581       -0.085982  0.250824 -0.983436  0.325393  0.659017
ZWILCH        4.100983       -0.118528  0.232235 -1.171133  0.241545  0.581971
ZWINT        18.622170       -0.090583  0.182260 -0.874016  0.382110  0.708483
ZYX          14.639871        0.004795  0.218967  0.000507

  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
Fitting dispersions...
... done in 0.02 seconds.

Fitting MAP dispersions...
... done in 0.02 seconds.

Fitting LFCs...
... done in 0.05 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
         baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
A1BG     7.333697        0.033259  0.567511  0.684194  0.493853  0.895973
A2M      2.129151       -1.394512  0.635644 -2.872128  0.004077  0.455115
A4GALT   0.762575       -0.003692  0.801334 -0.168483  0.866203  0.972150
AACS     0.750616        0.022820  1.028408  0.691842  0.489037  0.892541
AAGAB    2.437998        0.024802  0.662699  0.624668  0.532189  0.908599
...           ...             ...       ...       ...       ...       ...
ZSCAN2   1.819107        0.026087  0.688492  0.691761  0.489087  0.892541
ZSWIM6   2.610716        0.030840  0.832722  1.041069  0.297843  0.859198
ZSWIM8   2.275447        0.038967  0.676157  0.977202  0.328469  0.859198
ZYX     15.114903       -0.046224  0.448954 -0.772359  0.439902  0.884399
ZZEF1    2.186041       -0.013492  0.635967 -0.358888  0.719679  0.946172

[6564 rows x 6 columns]


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.01 seconds.

Fitting MAP dispersions...
... done in 0.01 seconds.

Fitting LFCs...
... done in 0.01 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
         baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
A1BG     5.248252       -0.014235  0.269037 -0.315304  0.752531  0.974720
A2M     19.503697        0.003085  0.303391  0.031116  0.975177  0.995714
AAAS     3.363143        0.011020  0.287842  0.252328  0.800787       NaN
AAK1     5.217744        0.007830  0.265924  0.163720  0.869952  0.986536
AAMP     8.826078        0.036894  0.237573  0.647649  0.517212  0.909678
...           ...             ...       ...       ...       ...       ...
ZRANB2   7.574791       -0.059287  0.269964 -1.187387  0.235075  0.767894
ZSWIM6   5.547406        0.040702  0.287653  0.877465  0.380234  0.853157
ZSWIM7   6.567832       -0.111529  0.258954 -1.606495  0.108165  0.600366
ZWINT    3.565633        0.856119  0.772626  2.616345  0.008888       NaN
ZYX     31.902204       -0.029391  0.257815 -0.626986  0.530668  0.909725

[5262 rows x 6 columns]


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
Fitting dispersions...
... done in 0.01 seconds.

Fitting MAP dispersions...
... done in 0.01 seconds.

Fitting LFCs...
... done in 0.08 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
          baseMean  log2FoldChange     lfcSE      stat        pvalue  \
A1BG      6.156174        0.270461  0.208089  1.532120  1.254929e-01   
A2M     756.133496        0.118896  0.069123  0.755951  4.496783e-01   
A4GALT   27.217600       -0.548728  0.082871 -6.736613  1.621213e-11   
AAAS      6.229563        0.007991  0.120459  0.073299  9.415685e-01   
AAGAB    10.035727        0.261421  0.102114  2.693987  7.060290e-03   
...            ...             ...       ...       ...           ...   
ZXDC      5.859225       -0.185179  0.172597 -1.271359  2.036009e-01   
ZYG11B   14.674599        0.296958  0.106207  2.964369  3.033041e-03   
ZYX      50.490876        0.424230  0.097858  4.536315  5.724573e-06   
ZZEF1    10.948067        0.048202  0.102012  0.500812  6.165037e-01   
ZZZ3     10.538489       -0.093715  0.138640 -0.748647  4.540702e-01   

                padj  
A1BG    1.892999e-01  
A2M     5.389594

  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.01 seconds.

Fitting MAP dispersions...
... done in 0.01 seconds.

Fitting LFCs...
... done in 0.01 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
           baseMean  log2FoldChange     lfcSE      stat        pvalue  \
A2M      119.151296        0.791574  0.165996  5.067097  4.039279e-07   
A4GALT    10.859400       -0.476007  0.119161 -4.214043  2.508401e-05   
AAK1       9.507699        0.281382  0.174526  1.868406  6.170547e-02   
AAMDC     12.824456       -0.488358  0.110551 -4.618702  3.861479e-06   
AAMP       9.079109        0.068792  0.114658  0.639725  5.223513e-01   
...             ...             ...       ...       ...           ...   
ZNRD2      6.132733       -0.164607  0.176258 -1.066621  2.861430e-01   
ZRANB2    13.197107        0.258938  0.126894  2.207048  2.731068e-02   
ZSCAN18    6.522799       -0.215869  0.140617 -1.690711  9.089201e-02   
ZSWIM7     5.776133        0.036283  0.142580  0.277458  7.814285e-01   
ZYX       11.186940        0.938186  0.155567  6.302895  2.921377e-10   

                 padj  
A2M      3.462129e-06  
A4

  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.01 seconds.

Fitting MAP dispersions...
... done in 0.01 seconds.

Fitting LFCs...
... done in 0.01 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
          baseMean  log2FoldChange     lfcSE      stat        pvalue  \
A2M     369.094383        0.535465  0.084173  6.537862  6.240459e-11   
A4GALT   16.926022       -0.372445  0.088853 -4.335624  1.453469e-05   
AAGAB     7.900357        0.247394  0.114487  2.351227  1.871160e-02   
AAK1     18.745680        0.008815  0.114650  0.078133  9.377225e-01   
AAMDC    22.809150       -0.055669  0.092634 -0.643463  5.199239e-01   
...            ...             ...       ...       ...           ...   
ZXDC      4.345639       -0.057120  0.175471 -0.411473  6.807258e-01   
ZYG11B    8.905302        0.111318  0.119501  1.021977  3.067916e-01   
ZYX      29.362274        0.514644  0.123192  4.382352  1.174048e-05   
ZZEF1     7.962101        0.275074  0.118290  2.494687  1.260683e-02   
ZZZ3      7.933271       -0.052945  0.140899 -0.425081  6.707774e-01   

                padj  
A2M     8.130132e-10  
A4GALT  6.905510

  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
Fitting dispersions...
... done in 0.03 seconds.

Fitting MAP dispersions...
... done in 0.03 seconds.

Fitting LFCs...
... done in 0.06 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
         baseMean  log2FoldChange     lfcSE      stat        pvalue  \
A1CF    15.324505       -2.781472  0.355644 -7.976912  1.500394e-15   
AAAS     6.308697       -0.348159  0.306056 -1.397129  1.623748e-01   
AACS     7.357860       -1.044126  0.280716 -4.070759  4.686023e-05   
AAGAB    9.309315       -0.540095  0.298086 -2.138504  3.247589e-02   
AAK1    17.125703       -2.679404  0.324170 -8.480870  2.235163e-17   
...           ...             ...       ...       ...           ...   
ZXDC     6.094636       -0.794107  0.374768 -2.534947  1.124642e-02   
ZYG11B   6.828911       -1.417748  0.326871 -4.649693  3.324299e-06   
ZYX     17.978192        1.162855  0.313092  4.059231  4.923449e-05   
ZZEF1    8.694799       -1.434632  0.330509 -4.702865  2.565360e-06   
ZZZ3     8.829649       -1.231576  0.335405 -3.964347  7.359706e-05   

                padj  
A1CF    4.970809e-13  
AAAS    2.968889e-01  
AACS 

  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
Fitting dispersions...
... done in 0.31 seconds.

Fitting MAP dispersions...
... done in 0.30 seconds.

Fitting LFCs...
... done in 0.60 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
          baseMean  log2FoldChange     lfcSE       stat        pvalue  \
A1BG      8.495918        0.040791  0.261469   0.168553  8.661482e-01   
A1CF    174.511623       -1.313590  0.120084 -11.131844  8.780038e-29   
A2M      21.554292        1.588918  0.247015   5.116586  3.111148e-07   
AAAS     78.874927        0.302120  0.077738   3.874477  1.068541e-04   
AACS     90.525085        0.153469  0.072330   2.103540  3.541859e-02   
...            ...             ...       ...        ...           ...   
ZXDC     71.055593       -0.271008  0.129806  -2.316644  2.052314e-02   
ZYG11B  100.602307       -0.605827  0.109189  -5.722947  1.046921e-08   
ZYX     192.482309        1.047915  0.094079  11.585295  4.892775e-31   
ZZEF1   215.990237       -1.131455  0.123374  -9.439674  3.739427e-21   
ZZZ3    100.680081       -0.168838  0.114748  -1.485369  1.374460e-01   

                padj  
A1BG    8.928721e-01  
A1CF

  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.02 seconds.

Fitting MAP dispersions...
... done in 0.02 seconds.

Fitting LFCs...
... done in 0.02 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
          baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
A1BG     11.176723       -0.354108  0.177144 -2.337807  0.019397  0.047542
A2M     530.399315        0.099310  0.130870  0.776457  0.437479  0.556418
A4GALT   15.421773       -0.213011  0.111209 -2.075053  0.037982  0.081639
AAAS      4.449148       -0.162886  0.135839 -1.347821  0.177716  0.276217
AAGAB     5.534790        0.259373  0.159242  1.925040  0.054224  0.108514
...            ...             ...       ...       ...       ...       ...
ZSWIM8    5.169881        0.122269  0.120185  1.139058  0.254679  0.366716
ZYG11B    7.884937        0.062562  0.151799  0.492237  0.622552  0.720588
ZYX      14.525373        0.099574  0.136649  0.827030  0.408220  0.527077
ZZEF1     5.656695       -0.029968  0.166147 -0.229463  0.818509  0.871110
ZZZ3      6.475336       -0.309444  0.211948 -1.876177  0.060631  0.118833

[6674 rows x 6 columns]


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.02 seconds.

Fitting MAP dispersions...
... done in 0.02 seconds.

Fitting LFCs...
... done in 0.02 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
          baseMean  log2FoldChange     lfcSE      stat        pvalue      padj
A1BG      9.354417       -0.104123  0.193958 -0.622488  5.336207e-01  0.617812
A2M     270.428245        0.290496  0.154981  1.935082  5.298027e-02  0.088280
A4GALT   10.538560       -0.110016  0.134071 -0.877730  3.800904e-01  0.469272
AAAS      4.507892       -0.240043  0.132397 -1.921660  5.464861e-02  0.090715
AAGAB     5.729318        0.742400  0.152804  5.163508  2.423642e-07  0.000001
...            ...             ...       ...       ...           ...       ...
ZSWIM8    4.566298        0.288758  0.128631  2.398653  1.645550e-02  0.031642
ZYG11B    9.013077        0.232060  0.142902  1.776834  7.559560e-02  0.120425
ZYX      19.504660        0.343774  0.143478  2.570029  1.016899e-02  0.020733
ZZEF1     5.506907        0.140527  0.171840  0.936262  3.491383e-01  0.437512
ZZZ3      7.494115       -0.309891  0.188624 -1.904945  5.

  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.02 seconds.

Fitting MAP dispersions...
... done in 0.02 seconds.

Fitting LFCs...
... done in 0.02 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
          baseMean  log2FoldChange     lfcSE      stat        pvalue  \
A1BG     10.388300        1.288611  0.214322  6.318796  2.636093e-10   
A2M     350.974703        0.357629  0.174707  2.261936  2.370138e-02   
A4GALT   36.304393       -0.624828  0.086370 -7.341392  2.113833e-13   
AAAS      8.829524       -0.308636  0.091069 -3.434332  5.940162e-04   
AACS      5.709833       -0.096924  0.108888 -0.922021  3.565174e-01   
...            ...             ...       ...       ...           ...   
ZXDC      5.590442       -0.510622  0.200264 -2.792238  5.234486e-03   
ZYG11B   17.737810       -0.221536  0.127871 -1.863217  6.243168e-02   
ZYX      48.225056        0.136990  0.107997  1.388071  1.651155e-01   
ZZEF1     9.512959       -0.052838  0.123016 -0.454335  6.495880e-01   
ZZZ3     10.864401       -0.324149  0.140163 -2.445269  1.447441e-02   

                padj  
A1BG    1.574965e-09  
A2M     4.076949

  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.02 seconds.

Fitting MAP dispersions...
... done in 0.02 seconds.

Fitting LFCs...
... done in 0.02 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
              baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
AAMP          7.975838        0.105960  0.097205  1.281301  0.200088  0.372029
AASDHPPT      5.272838       -0.129557  0.115538 -1.364159  0.172517  0.339819
AATF          8.287190        0.357600  0.139417  3.064520  0.002180  0.014228
ABCE1         6.829952       -0.019667  0.123241 -0.213244  0.831136  0.908368
ABCF1        13.540549        0.579207  0.159519  4.150758  0.000033  0.000502
...                ...             ...       ...       ...       ...       ...
ZNRD2         5.571530        0.014643  0.134906  0.161616  0.871609  0.930425
ZRANB2       16.181996        0.054521  0.114234  0.605960  0.544542  0.708701
ZSCAN16-AS1   6.581030        0.022172  0.145300  0.233584  0.815308  0.900467
ZWINT        11.432035        0.116238  0.136946  1.129413  0.258724  0.439806
ZYX           6.868021       -0.054344  0.144570 -0.539584

  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(


An error occurred for cell_type_fine 'Goblet': Factors should take at least two values, but sample-type takes the single value '['normal']'.


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.01 seconds.

Fitting MAP dispersions...
... done in 0.02 seconds.

Fitting LFCs...
... done in 0.02 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
          baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
AAK1      8.408270       -0.033137  0.316203 -0.803131  0.421899  0.819421
ABCC1     2.695502        0.013539  0.428803  0.550032  0.582297  0.878839
ABRACL    3.266191       -0.028590  0.324962 -0.765726  0.443839  0.827527
ACAP1     7.540736        0.006889  0.351075  0.156131  0.875930  0.966204
ACTB     51.944295       -0.014348  0.327652 -0.381994  0.702466  0.908499
...            ...             ...       ...       ...       ...       ...
ZFP36    35.204800       -0.051918  0.304490 -1.079479  0.280374  0.739960
ZFP36L1  14.831141       -0.026011  0.294855 -0.411347  0.680818  0.906267
ZFP36L2  33.673758       -0.064078  0.306453 -1.195072  0.232059  0.734451
ZNF207    3.482586        0.017626  0.357048  0.376233  0.706744  0.908499
ZNF331   13.414785       -0.011511  0.337647 -0.302342  0.762392  0.922279

[965 rows x 6 columns]


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.03 seconds.

Fitting MAP dispersions...
... done in 0.02 seconds.

Fitting LFCs...
... done in 0.02 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
          baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
A1BG     16.583014       -0.398068  0.148573 -2.990521  0.002785  0.009064
A2M     203.733924       -0.170278  0.126616 -1.776533  0.075645  0.141248
AAAS      6.979683        0.092645  0.121797  0.857597  0.391115  0.511812
AAGAB     7.784864        0.098754  0.118458  0.955031  0.339562  0.460310
AAK1     36.135898       -0.159908  0.105963 -1.674909  0.093952  0.168381
...            ...             ...       ...       ...       ...       ...
ZXDC      6.906194       -0.010368  0.170529 -0.074928  0.940272  0.959013
ZYG11B   13.282423       -0.161043  0.126830 -1.444297  0.148655  0.242554
ZYX      70.220674        0.054015  0.103724  0.261868  0.793423  0.854810
ZZEF1    15.228744       -0.329779  0.131254 -2.801549  0.005086  0.015075
ZZZ3      8.446169       -0.125698  0.158913 -0.962417  0.335840  0.456973

[8341 rows x 6 columns]


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(


An error occurred for cell_type_fine 'Macrophage cycling': Factors should take at least two values, but sample-type takes the single value '['tumor']'.


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.01 seconds.

Fitting MAP dispersions...
... done in 0.01 seconds.

Fitting LFCs...
... done in 0.01 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
           baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
A1BG       7.273878       -0.274401  0.176795 -1.985188  0.047124  0.120595
AAK1       7.011555        0.013140  0.134157  0.120800  0.903850  0.950277
AAMDC      5.337936       -0.219008  0.131715 -1.991510  0.046425  0.119045
AAMP       6.217452        0.116475  0.117825  1.145752  0.251898  0.409581
AASDHPPT   4.558226       -0.166799  0.133292 -1.507190  0.131762  0.257453
...             ...             ...       ...       ...       ...       ...
ZRANB1     6.458775        0.506282  0.218823  2.929058  0.003400  0.015197
ZRANB2    14.073694       -0.234042  0.114680 -2.313969  0.020669  0.062932
ZSCAN18    4.578245       -0.190199  0.147623 -1.565414  0.117486  0.237371
ZSWIM7     5.416707       -0.181009  0.114391 -1.785164  0.074235  0.170077
ZYX        4.432328        0.202358  0.178767  1.497218  0.134336  0.260889

[4156 rows x 6

  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.11 seconds.

Fitting MAP dispersions...
... done in 0.11 seconds.

Fitting LFCs...
... done in 0.13 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
           baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
A1BG       9.454309       -0.207455  0.186919 -1.448889  0.147369  0.284881
A2M        6.657321        0.212783  0.243639  1.327774  0.184253  0.333556
AAK1      16.572670       -0.063354  0.134506 -0.563512  0.573087  0.716506
AAMP      11.987467       -0.142449  0.114924 -1.391016  0.164221  0.308422
AASDHPPT   6.580398       -0.065030  0.123996 -0.609078  0.542473  0.689534
...             ...             ...       ...       ...       ...       ...
ZSWIM7    13.225151       -0.230544  0.111120 -2.294516  0.021761  0.066779
ZSWIM8     5.351779        0.285790  0.172195  2.014908  0.043914  0.116481
ZYG11B     6.149222       -0.125428  0.158901 -0.985646  0.324307  0.489444
ZYX       51.009569       -0.227659  0.116083 -2.152520  0.031356  0.089107
ZZEF1      7.081415       -0.208633  0.155465 -1.605166  0.108457  0.228747

[6070 rows x 6

  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
Fitting dispersions...
... done in 0.01 seconds.

Fitting MAP dispersions...
... done in 0.01 seconds.

Fitting LFCs...
... done in 0.01 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
         baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
AAK1     6.140086    1.385606e-06  0.004873  1.148758  0.250656  0.925071
ABCF1    2.307554    3.332825e-07  0.004378  0.350439  0.726009  0.952466
ABHD17A  3.463129   -7.932721e-07  0.003845 -0.567002  0.570713  0.943437
ABI3     2.501600   -1.532064e-06  0.003556 -1.023412  0.306113  0.925071
ABLIM1   2.556299    6.941646e-07  0.004224  0.216917  0.828273  0.974544
...           ...             ...       ...       ...       ...       ...
ZNF331   7.383909   -2.253031e-07  0.003709 -0.197064  0.843778  0.974544
ZNF609   2.622510    3.091280e-07  0.004381  0.393038  0.694291  0.945738
ZNF683   0.939220    6.283962e-07  0.006776  0.844914  0.398159  0.933435
ZRANB2   2.805317   -7.442235e-08  0.004165 -0.059788  0.952324  0.987092
ZYX      1.831189   -3.718203e-07  0.004094 -0.326266  0.744223  0.956649

[1959 rows x 6 columns]


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(


An error occurred for cell_type_fine 'NKT': Factors should take at least two values, but sample-type takes the single value '['tumor']'.


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.01 seconds.

Fitting MAP dispersions...
... done in 0.01 seconds.

Fitting LFCs...
... done in 0.01 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
         baseMean  log2FoldChange     lfcSE      stat        pvalue  \
ABCA1   13.369542       -0.454280  0.283966 -1.791658  7.318771e-02   
ABCF1    6.817342       -0.989052  0.333378 -3.357000  7.879304e-04   
ABCG1    6.380316       -0.318412  0.309457 -1.176435  2.394210e-01   
ABHD2   15.055363       -1.903781  0.320623 -6.082305  1.184672e-09   
ABHD3    7.508411       -1.285247  0.428902 -3.327244  8.770948e-04   
...           ...             ...       ...       ...           ...   
ZNF710   5.928495       -1.052331  0.560765 -2.368987  1.783690e-02   
ZNFX1    8.819475       -0.830155  0.287459 -3.128427  1.757449e-03   
ZNHIT1   4.838359       -0.016228  0.343847 -0.058316  9.534972e-01   
ZSWIM6  38.313062       -0.833090  0.518538 -2.144988  3.195382e-02   
ZYX     14.526218       -0.481344  0.221980 -2.310132  2.088085e-02   

                padj  
ABCA1   1.102011e-01  
ABCF1   2.400416e-03  
ABCG1

  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.02 seconds.

Fitting MAP dispersions...
... done in 0.02 seconds.

Fitting LFCs...
... done in 0.02 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
          baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
A1BG     16.281936       -0.041084  0.169976 -0.265550  0.790585  0.839372
A2M     429.952066        0.189541  0.100643  1.560227  0.118706  0.176252
A4GALT   18.884899       -0.084340  0.087122 -0.998971  0.317809  0.404037
AAAS      6.640650       -0.108161  0.100820 -1.114933  0.264879  0.347443
AAGAB     5.854920        0.374620  0.124753  3.153446  0.001614  0.003855
...            ...             ...       ...       ...       ...       ...
ZXDC      4.422129        0.082926  0.189364  0.515517  0.606192  0.680434
ZYG11B   12.932549        0.156348  0.123262  1.364911  0.172281  0.241742
ZYX      37.124380        0.272774  0.101579  2.776289  0.005498  0.011572
ZZEF1     8.432914        0.125970  0.142392  0.982413  0.325897  0.411995
ZZZ3      7.633987       -0.079156  0.135791 -0.628994  0.529353  0.612036

[8631 rows x 6 columns]


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.07 seconds.

Fitting MAP dispersions...
... done in 0.07 seconds.

Fitting LFCs...
... done in 0.12 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
           baseMean  log2FoldChange     lfcSE      stat        pvalue  \
A1BG      95.839386       -0.248522  0.138951 -1.989612  4.663374e-02   
A1BG-AS1   5.479284        0.063204  0.137268  0.562429  5.738238e-01   
A2M        7.329310        0.702567  0.226967  3.456040  5.481738e-04   
AAAS      20.041605        0.019487  0.074489  0.272190  7.854758e-01   
AACS      13.111646        0.672522  0.104654  6.703957  2.028495e-11   
...             ...             ...       ...       ...           ...   
ZXDC       9.487164        0.354412  0.170706  2.559268  1.048928e-02   
ZYG11B    12.449299        0.147648  0.131406  1.314738  1.885979e-01   
ZYX       10.106099        0.169457  0.126722  1.540163  1.235206e-01   
ZZEF1     23.476508        0.546314  0.126036  4.637959  3.518671e-06   
ZZZ3      15.526130       -0.008381  0.137647 -0.063443  9.494136e-01   

                  padj  
A1BG      8.608572e-02  


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
Fitting dispersions...
... done in 0.11 seconds.

Fitting MAP dispersions...
... done in 0.10 seconds.

Fitting LFCs...
... done in 0.15 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
           baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
A1BG      33.239947        0.694719  0.287881  2.860202  0.004234  0.015036
A1BG-AS1   2.209594        0.736894  0.390866  2.206755  0.027331  0.067888
A2M        3.775827       -0.107127  0.399754 -0.455761  0.648562  0.745047
AAAS       8.364080        0.020792  0.209565  0.117824  0.906207  0.936515
AACS       6.409948        0.195783  0.210676  1.048501  0.294408  0.416362
...             ...             ...       ...       ...       ...       ...
ZXDC       3.122345        0.486001  0.380852  1.589968  0.111842  0.199727
ZYG11B     5.098715        0.323000  0.304091  1.320317  0.186729  0.295324
ZYX        5.656904       -1.045990  0.271816 -4.138484  0.000035  0.000246
ZZEF1     10.332299        0.353032  0.255447  1.694541  0.090162  0.169807
ZZZ3       5.859211        0.327552  0.315340  1.326553  0.184657  0.292980

[10413 rows x 

  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
Fitting dispersions...
... done in 0.16 seconds.

Fitting MAP dispersions...
... done in 0.16 seconds.

Fitting LFCs...
... done in 0.17 seconds.

  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))


Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
        baseMean  log2FoldChange     lfcSE      stat    pvalue  padj
AARS1   2.392417    3.822235e-07  0.004250  0.705637  0.480414   NaN
ACADVL  6.008091    1.278185e-06  0.005968  1.467625  0.142206   NaN
ACBD3   2.989833    4.457859e-07  0.004437  0.841304  0.400177   NaN
ACIN1   1.928503    2.141197e-07  0.004014  0.460204  0.645370   NaN
ACSS1   3.167510   -4.522392e-07  0.002525 -0.622756  0.533445   NaN
...          ...             ...       ...       ...       ...   ...
ZNF638  3.038820   -2.344716e-07  0.002975 -0.321963  0.747481   NaN
ZNF655  1.477792    1.300573e-07  0.003630  0.260546  0.794443   NaN
ZNF664  1.268058    2.425407e-07  0.006606  0.730305  0.465204   NaN
ZRANB1  1.375864   -1.024205e-07  0.002946 -0.232459  0.816181   NaN
ZRANB2  3.283823   -2.071484e-07  0.003086 -0.262160  0.793198   NaN

[1187 rows x 6 columns]


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.03 seconds.

Fitting MAP dispersions...
... done in 0.03 seconds.

Fitting LFCs...
... done in 0.04 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
         baseMean  log2FoldChange     lfcSE       stat        pvalue  \
A1BG    11.703373       -2.633520  0.271100  -9.808222  1.037810e-22   
AAAS     5.374672        0.459452  0.172889   2.791648  5.244032e-03   
AACS     3.270177        0.566415  0.200925   2.927175  3.420566e-03   
AAGAB    4.240917        0.996791  0.247783   4.293007  1.762695e-05   
AAK1     8.071016        0.658291  0.251296   2.990934  2.781260e-03   
...           ...             ...       ...        ...           ...   
ZSWIM6  10.442405       -2.419924  0.422730  -6.141032  8.198694e-10   
ZSWIM7   4.692425       -0.644678  0.179352  -3.828100  1.291361e-04   
ZWILCH   3.587054        3.003671  0.275335  10.670494  1.398809e-26   
ZWINT   11.019543        3.914920  0.287773  13.065487  5.185166e-39   
ZZEF1    5.273741       -0.133347  0.247499  -0.646480  5.179683e-01   

                padj  
A1BG    2.919673e-21  
AAAS    1.036469

  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.01 seconds.

Fitting MAP dispersions...
... done in 0.01 seconds.

Fitting LFCs...
... done in 0.01 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
         baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
A1BG     8.715380       -0.225985  0.226953 -1.438433  0.150311  0.282784
A2M     56.175388        0.432413  0.272283  2.268525  0.023297  0.071784
AAK1     9.740288        0.029223  0.187485  0.216289  0.828762  0.898721
AAMDC   16.207032       -0.445155  0.111331 -4.304783  0.000017  0.000214
AAMP    12.614577        0.096490  0.113924  0.948072  0.343093  0.504227
...           ...             ...       ...       ...       ...       ...
ZSWIM6  29.494270       -0.056011  0.248197 -0.398781  0.690054  0.805194
ZSWIM7   8.132284       -0.364587  0.120901 -3.301042  0.000963  0.005940
ZYG11B   6.531349        0.082018  0.191487  0.593688  0.552721  0.698077
ZYX      8.478426        0.598915  0.232107  3.168461  0.001532  0.008576
ZZZ3     6.289510       -0.059969  0.186629 -0.445059  0.656277  0.780864

[5691 rows x 6 columns]


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
Fitting dispersions...
... done in 0.04 seconds.

Fitting MAP dispersions...
... done in 0.04 seconds.

Fitting LFCs...
... done in 0.04 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
         baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
A1BG     9.389820       -0.081920  0.246680 -0.659299  0.509704  0.714435
AAAS     4.863333       -0.308266  0.209413 -1.888394  0.058973  0.201656
AAGAB   10.473716       -0.319054  0.178281 -2.145628  0.031903  0.136778
AAK1    64.949927        0.179821  0.159916  1.451228  0.146716  0.352246
AAMDC    4.720886        0.215826  0.265719  1.622167  0.104768  0.291289
...           ...             ...       ...       ...       ...       ...
ZXDC     4.131735       -0.026796  0.292922 -0.258693  0.795872  0.898592
ZYG11B   5.087355       -0.153105  0.251278 -1.187819  0.234905  0.462652
ZYX     18.899512       -0.355294  0.191752 -2.354525  0.018546  0.096398
ZZEF1   11.944357        0.094887  0.207769  0.715855  0.474081  0.687236
ZZZ3     7.298469       -0.202723  0.220143 -1.503362  0.132746  0.332109

[8888 rows x 6 columns]


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
Fitting dispersions...
... done in 0.03 seconds.

Fitting MAP dispersions...
... done in 0.04 seconds.

Fitting LFCs...
... done in 0.03 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
         baseMean  log2FoldChange     lfcSE       stat        pvalue  \
A1CF    24.648026       -1.459383  0.139071 -10.624289  2.297564e-26   
AAAS     9.851477        0.259947  0.101995   2.644620  8.178271e-03   
AACS    12.205401        0.006289  0.102713   0.063597  9.492911e-01   
AAGAB   16.720470       -0.079220  0.104355  -0.783031  4.336092e-01   
AAK1    33.057164       -0.981507  0.144963  -6.983398  2.881246e-12   
...           ...             ...       ...        ...           ...   
ZXDC    10.993741       -0.153037  0.155694  -1.059725  2.892696e-01   
ZYG11B  16.778708       -0.886069  0.137418  -6.584333  4.569310e-11   
ZYX     25.818218        1.059756  0.100613  10.722386  7.991471e-27   
ZZEF1   37.771036       -1.407116  0.151364  -9.463251  2.985149e-21   
ZZZ3    15.563758       -0.158693  0.156325  -1.096861  2.727020e-01   

                padj  
A1CF    6.767076e-25  
AAAS    1.435824

  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(


An error occurred for cell_type_fine 'cDC progenitor': Factors should take at least two values, but sample-type takes the single value '['tumor']'.


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
Fitting dispersions...
... done in 0.02 seconds.

Fitting MAP dispersions...
... done in 0.02 seconds.

Fitting LFCs...
... done in 0.02 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
         baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
A1BG     4.787591       -0.350580  0.255676 -1.701757  0.088801  0.187043
A2M     10.866504        0.686684  0.485000  2.311943  0.020781  0.063291
AAAS     3.456737        0.601230  0.252616  2.773564  0.005545  0.022588
AAGAB    3.063740        0.429629  0.282741  1.940341  0.052338  0.127047
AAK1     6.290733        0.942046  0.313284  3.481769  0.000498  0.003489
...           ...             ...       ...       ...       ...       ...
ZSWIM7   5.970334       -0.375593  0.169432 -2.458299  0.013960  0.046659
ZUP1     3.128779       -0.095592  0.237135 -0.524097  0.600211  0.729218
ZYX     27.331211       -0.714867  0.179711 -4.228303  0.000024  0.000311
ZZEF1    3.777828        0.519555  0.288726  2.253994  0.024197  0.071140
ZZZ3     2.709224        0.109013  0.340277  0.490076  0.624080  0.746209

[6003 rows x 6 columns]


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.02 seconds.

Fitting MAP dispersions...
... done in 0.02 seconds.

Fitting LFCs...
... done in 0.02 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
              baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
A1BG          6.884660       -0.623666  0.166156 -4.163943  0.000031  0.000297
A2M           9.704924       -0.127987  0.183907 -0.992698  0.320857  0.469675
AAAS          3.858986       -0.211445  0.164735 -1.642669  0.100451  0.198373
AAK1          6.904231        0.130290  0.135391  1.186681  0.235353  0.376758
AAMP          9.701489       -0.103882  0.098331 -1.181809  0.237281  0.379368
...                ...             ...       ...       ...       ...       ...
ZRANB2        9.927112       -0.060811  0.109779 -0.643497  0.519902  0.651251
ZSCAN16-AS1   4.637662       -0.068644  0.170462 -0.566506  0.571050  0.694664
ZSWIM6        7.674113        0.792275  0.225137  4.080986  0.000045  0.000402
ZSWIM7        9.368592       -0.472435  0.107767 -4.667455  0.000003  0.000044
ZYX          28.270835       -0.237573  0.149989 -1.926318

  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
  self._fit_parametric_dispersion_trend(vst)
Fitting dispersions...
... done in 0.01 seconds.

Fitting MAP dispersions...
... done in 0.01 seconds.

Fitting LFCs...
... done in 0.01 seconds.



Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
          baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
AAK1      8.458190        0.129180  0.145960  1.180259  0.237897  0.375824
ABHD17A   5.516328       -0.270806  0.152324 -2.204625  0.027480  0.080332
ABI3      5.011926       -0.727598  0.181359 -4.456490  0.000008  0.000181
ABLIM1    4.262591        0.048729  0.183377  0.429862  0.667296  0.782611
ABRACL    8.742010       -0.229255  0.129397 -2.111716  0.034711  0.094948
...            ...             ...       ...       ...       ...       ...
ZNF207    6.482335        0.452844  0.134906  3.774793  0.000160  0.001846
ZNF292    5.127079        0.192800  0.176433  1.578412  0.114471  0.220172
ZNF331   15.417421       -0.006027  0.189746 -0.053296  0.957496  0.969913
ZNHIT1    4.311618       -0.006183  0.127766 -0.059619  0.952459  0.969913
ZRANB2    5.207061        0.049566  0.157800  0.447513  0.654505  0.772656

[1406 rows x 6 columns]


  dds = DeseqDataSet(
  dds = DeseqDataSet(
                be converted to hyphens ('-').
  dds = DeseqDataSet(
Fitting dispersions...
... done in 0.02 seconds.

Fitting MAP dispersions...
... done in 0.02 seconds.

Fitting LFCs...
... done in 0.02 seconds.

  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))
  counts - (counts + size) / (1 + size * np.exp(-xbeta - offset))


Shrunk log2 fold change & Wald test p-value: sample-type tumor vs normal
         baseMean  log2FoldChange     lfcSE      stat    pvalue      padj
A1BG    10.487433    3.189644e-07  0.005136  0.179797  0.857312  0.996537
A2M      2.292853    1.222626e-07  0.005251  0.213739  0.830751  0.996537
AAAS     1.965413   -7.735946e-07  0.005376 -0.639369  0.522583  0.996537
AAK1     2.856669    1.147194e-06  0.006121  1.005784  0.314520  0.996537
AAMP     4.547064   -8.830468e-07  0.005413 -0.474306  0.635282  0.996537
...           ...             ...       ...       ...       ...       ...
ZSWIM7   3.031376   -1.263589e-06  0.005398 -0.843486  0.398957  0.996537
ZUP1     1.826346    2.251760e-08  0.005437  0.018055  0.985595  0.997666
ZYX      6.310446   -2.362966e-06  0.003995 -1.282551  0.199650  0.996537
ZZEF1    2.224566    7.403799e-07  0.005801  0.636330  0.524561  0.996537
ZZZ3     1.642247    1.054006e-06  0.006301  1.065069  0.286845  0.996537

[6284 rows x 6 columns]


## 04. Concatenate results across cell types

In [50]:
dea_df = pd.concat(dea_results)
dea_df = dea_df.reset_index().rename(columns={'level_0': groupby,'level_1':'index'}).set_index('index')
dea_df.head()

Unnamed: 0_level_0,cell_type_fine,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
A1BG,B cell activated,10.115952,-0.397364,0.129769,-3.49433,0.000475,0.002824
AAGAB,B cell activated,5.653157,0.307447,0.120828,3.014928,0.00257,0.011026
AAMP,B cell activated,10.228345,0.142425,0.073479,2.129223,0.033236,0.082092
AARS1,B cell activated,3.602263,0.404288,0.12262,3.702105,0.000214,0.001484
AARSD1,B cell activated,6.179178,0.52674,0.182165,3.463734,0.000533,0.003069


## 05. Save Differential Expression results

In [55]:
dea_df.to_csv("dea.csv")

## 06. Subset the adata to those and normalize the counts

In [52]:
adata = adata[adata.obs[condition_key]=='tumor'].copy()

In [53]:
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

## 07. Combine the DEA results with the ligand-receptor interactions

In [None]:
lr_res = li.multi.df_to_lr(adata,
                           dea_df=dea_df,
                           resource_name='consensus', # NOTE: uses HUMAN gene symbols!
                           expr_prop=0.1, # calculated for adata as passed - used to filter interactions
                           groupby=groupby,
                           stat_keys=['stat', 'pvalue', 'padj'],
                           use_raw=False,
                           complex_col='stat', # NOTE: we use the Wald Stat to deal with complexes
                           verbose=True,
                           return_all_lrs=False,
                           )

## 08. Visualize distribution of the interactions

In [None]:
lr_res = lr_res.sort_values("interaction_stat", ascending=False)
lr_res['interaction_stat'].hist(bins=50)

## 09. Top interactions Wald statistic

In [None]:
li.pl.tileplot(liana_res=lr_res,
               fill = 'expr',
               label='padj',
               label_fun = lambda x: '*' if x < 0.05 else np.nan,
               top_n=15,
               orderby = 'interaction_stat',
               orderby_ascending = False,
               orderby_absolute = False,
               source_title='Ligand',
               target_title='Receptor',
               )

## 10. Ligand-Receptor Plot

In [None]:
plot = li.pl.dotplot(liana_res=lr_res,
                     colour='interaction_stat',
                     size='ligand_pvalue',
                     inverse_size=True,
                     orderby='interaction_stat',
                     orderby_ascending=False,
                     orderby_absolute=True,
                     top_n=10,
                     size_range=(0.5, 4)
                     )

# customize plot
(
    plot
    + p9.theme_bw(base_size=14)
    # fill cmap blue to red, with 0 the middle
    + p9.scale_color_cmap('RdBu_r', midpoint=0, limits=(-10, 10))
    # rotate x
    + p9.theme(axis_text_x=p9.element_text(angle=90), figure_size=(11, 6))

)