In [1]:
import scanpy as sc
import pandas as pd
import random
import numpy as np
import matplotlib as plt
import gc 
import ctypes
import scvi



In [2]:
adata = sc.read_h5ad('../data/luca_query_reannotated.h5ad')

In [3]:
adata.obs

Unnamed: 0,sample,uicc_stage,ever_smoker,age,donor_id,origin,dataset,ann_fine,cell_type_predicted,doublet_status,...,IFN_TAMs,Reg_TAMs,Inflam_TAMs,LA_TAMs,Angio_TAMs,RTM_TAMs,Prolif_TAMs,Subtype,Projection_CellType,ident
001C_AAACCTGTCAACACCA-0,Adams_Kaminski_2020_001C,non-cancer,no,22.0,Adams_Kaminski_2020_001C,normal,Adams_Kaminski_2020,Macrophage alveolar,Macrophage,singlet,...,0.353933,0.571664,0.298638,0.541069,0.427279,0.565822,0.401695,Reg_TAMs,RTM_TAMs,local
001C_AAACGGGAGGCTCATT-0,Adams_Kaminski_2020_001C,non-cancer,no,22.0,Adams_Kaminski_2020_001C,normal,Adams_Kaminski_2020,Macrophage,Macrophage,singlet,...,0.477315,0.451494,0.370148,0.492239,0.537751,0.511095,0.348617,Angio_TAMs,LA_TAMs,local
001C_AAACGGGGTATAATGG-0,Adams_Kaminski_2020_001C,non-cancer,no,22.0,Adams_Kaminski_2020_001C,normal,Adams_Kaminski_2020,Macrophage alveolar,Macrophage,singlet,...,0.387389,0.497444,0.282365,0.403790,0.443018,0.548354,0.328568,RTM_TAMs,RTM_TAMs,local
001C_AAACGGGTCACAAACC-0,Adams_Kaminski_2020_001C,non-cancer,no,22.0,Adams_Kaminski_2020_001C,normal,Adams_Kaminski_2020,Macrophage,Macrophage,singlet,...,0.333177,0.592321,0.262999,0.594631,0.444907,0.450925,0.406192,LA_TAMs,LA_TAMs,local
001C_AAAGATGAGTGCTGCC-0,Adams_Kaminski_2020_001C,non-cancer,no,22.0,Adams_Kaminski_2020_001C,normal,Adams_Kaminski_2020,Macrophage alveolar,Macrophage,singlet,...,0.358754,0.648105,0.282604,0.482257,0.408499,0.454457,0.291259,Reg_TAMs,RTM_TAMs,local
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTCAAGCTGTCTA-1-38-8,Leader_Merad_2021_414,II,no,64.0,Leader_Merad_2021_729,tumor_primary,Leader_Merad_2021_10x_3p_v2_beads,Macrophage,,singlet,...,0.551482,0.640531,0.489101,0.623875,0.525708,0.514441,0.527691,Reg_TAMs,IFN_TAMs,local
TTTGTCAAGGATATAC-1-38-8,Leader_Merad_2021_414,II,no,64.0,Leader_Merad_2021_729,tumor_primary,Leader_Merad_2021_10x_3p_v2_beads,Macrophage,,singlet,...,0.492014,0.699073,0.539889,0.612688,0.608443,0.551273,0.460248,Reg_TAMs,Reg_TAMs,local
TTTGTCAAGTACGCGA-1-38-8,Leader_Merad_2021_414,II,no,64.0,Leader_Merad_2021_729,tumor_primary,Leader_Merad_2021_10x_3p_v2_beads,Macrophage,,singlet,...,0.470449,0.588994,0.611164,0.427515,0.471975,0.431026,0.455491,Inflam_TAMs,Inflam_TAMs,local
TTTGTCACATCTATGG-1-38-8,Leader_Merad_2021_414,II,no,64.0,Leader_Merad_2021_729,tumor_primary,Leader_Merad_2021_10x_3p_v2_beads,Macrophage,,singlet,...,0.515349,0.589936,0.633147,0.577088,0.550079,0.487074,0.425062,Inflam_TAMs,LA_TAMs,local


In [4]:
from pydeseq2.dds import DeseqDataSet
from pydeseq2.ds import DeseqStats

In [5]:
import gc 
import ctypes

In [6]:
def make_pseudobulk(adata, sample_colname, cell_type_colname, metacols):
    '''
    To create an an data with pseudo-bulk gene expression profiles for the samples and cell_types given.
    The function also carries metadata columns from the obs. metacols: as a list of col names.
    This is how to use:
        ps_adata = make_pseudobulk(hlca, sample_colname = 'sample', cell_type_colname = 'cell_type', metacols= ['patient', 'disease', 'tissue', 'study', 'sex'])
    '''
    ps_list = []
    for s in adata.obs[sample_colname].unique():
        adata_sub = adata[adata.obs[sample_colname] == s]
        
        for ct in adata_sub.obs[cell_type_colname].unique():
            adata_sub_ct = adata_sub[adata_sub.obs[cell_type_colname] == ct]
            
            adata_rep = sc.AnnData(X = adata_sub_ct.X.sum(axis = 0),
                                   var = adata_sub_ct.var[[]])
            samp_name=adata_sub.obs[sample_colname].iloc[0]
            cell= adata_sub_ct.obs[cell_type_colname].iloc[0]
            adata_rep.obs_names = [f"{samp_name}{cell}"]
            adata_rep.obs[cell_type_colname] = adata_sub_ct.obs[cell_type_colname].iloc[0]
            adata_rep.obs[metacols] = adata_sub_ct.obs[metacols].iloc[0]
            ps_list.append(adata_rep)

    ps_adata = sc.concat(ps_list)
    
    return ps_adata


In [13]:
import pandas as pd
import scipy.sparse

def savePBdata2(adata, filename):
    # Convert counts to dense matrix if it's sparse
    if scipy.sparse.issparse(adata.X):
        counts = adata.X.toarray()
    else:
        counts = adata.X

    # Convert to DataFrame
    counts_df = pd.DataFrame(counts, index=adata.obs_names, columns=adata.var_names)

    # (Optional) Convert to integer
    counts_df = counts_df.astype(int, errors='ignore')

    # Save AnnData object
    adata.write(filename)
    
    # Save counts as CSV for inspection (optional)
    counts_df.to_csv(filename.replace(".h5ad", "_counts.csv"))

    print(f"Saved AnnData to {filename} and count matrix to CSV.")


In [8]:
ps_adata_macs = make_pseudobulk(adata, sample_colname = 'sample', cell_type_colname = 'Projection_CellType', metacols= ['assay', 'donor_id', 'disease', 'tissue', 'study', 'sex', 'age', 'uicc_stage', 'tumor_stage'])



In [14]:
savePBdata2(ps_adata_macs, filename="../data/ps_adata_macs.h5ad")


Saved AnnData to ../data/ps_adata_macs.h5ad and count matrix to CSV.


In [None]:
with open('../data/Homo_sapiens.GRCh38.104.gtf') as f:
    gtf = list(f)

#prep the gtf file
gtf = [x for x in gtf if not x.startswith('#')]
gtf = [x for x in gtf if 'gene_id "' in x and 'gene_name "' in x]
gtf = list(map(lambda x: (x.split('gene_id "')[1].split('"')[0], x.split('gene_name "')[1].split('"')[0]), gtf))

In [10]:
ctyps = ps_adata_macs.obs['Projection_CellType'][~ps_adata_macs.obs['Projection_CellType'].isin(['Int.Node.3', 'Int.Node.4', 'Int.Node.5'])].unique()
ctyps

array(['RTM_TAMs', 'LA_TAMs', 'Prolif_TAMs', 'IFN_TAMs', 'Reg_TAMs',
       'Angio_TAMs', 'Inflam_TAMs'], dtype=object)

In [None]:
#sub_canc = ps_adata_macs[ps_adata_macs.obs['cell_type'] == 'malignant cell' ]
df = pd.DataFrame()

for c in ctyps:
    sub_c = ps_adata_macs[ps_adata_macs.obs['Projection_CellType'] == c ]
    sub_c.obs['Contrast'] = c
    pbs = [sub_c]
    print(c)
    sub_ct = ps_adata_macs[ps_adata_macs.obs['Projection_CellType'] != c ]
    sub_ct.obs['Contrast'] = 'others'
    pbs.append(sub_ct)
    pb = sc.concat(pbs)
    print(pb)
    counts= pd.DataFrame(pb.X, columns = pb.var_names)
    counts = counts.astype(int, errors='ignore')
    #To get rid off all the 0s in count matrix (to prevent problems)
    counts=counts+1
    dds = DeseqDataSet(counts = counts, metadata=pb.obs, design_factors = ["Contrast", "assay"])
    sc.pp.filter_genes(dds, min_cells = 10)
    dds.deseq2()
    stat_res = DeseqStats(dds, contrast=('Contrast', c, 'others'))
    stat_res.summary()
    de  = stat_res.results_df
    de = de.sort_values('log2FoldChange', ascending = False)
        
    #get rownames aka gene symbols
    row_names = de.index.tolist()
    #row_names
    #add rownames as a new column 
    de['gene_symbols'] = row_names
    gtf=dict(gtf)
    de['gene_name'] = de['gene_symbols'].map(gtf)
    de = de[(de['padj'] < 0.05) & (de['log2FoldChange'] > 1.0) ]
    de['SubType'] = c
    de['inContrastTo'] = 'others'
    df = pd.concat([df, de])
    #save deg list
    df.to_csv('../results/macs_PB-DEGs.csv', index=True)

  sub_c.obs['Contrast'] = c
  sub_ct.obs['Contrast'] = 'others'


RTM_TAMs
AnnData object with n_obs × n_vars = 2890 × 17811
    obs: 'Projection_CellType', 'assay', 'donor_id', 'disease', 'tissue', 'study', 'sex', 'age', 'uicc_stage', 'tumor_stage', 'Contrast'


  dds = DeseqDataSet(counts = counts, metadata=pb.obs, design_factors = ["Contrast", "assay"])
Fitting size factors...


Using None as control genes, passed at DeseqDataSet initialization


... done in 2.59 seconds.

  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
  pid = os.fork()
 

Log2 fold change & Wald test p-value: Contrast RTM_TAMs vs others
                  baseMean  log2FoldChange     lfcSE       stat        pvalue  \
ENSG00000121410   3.770136        0.417685  0.060941   6.853916  7.185533e-12   
ENSG00000268895   1.432926       -0.082475  0.056925  -1.448829  1.473854e-01   
ENSG00000175899  14.883531       -1.238621  0.105889 -11.697327  1.315318e-31   
ENSG00000245105   1.178039       -1.190198  0.065009 -18.308332  7.101166e-75   
ENSG00000166535   1.172041       -1.146320  0.071008 -16.143592  1.259914e-58   
...                    ...             ...       ...        ...           ...   
ENSG00000070476   2.052263        0.544757  0.059618   9.137382  6.398234e-20   
ENSG00000203995   1.158362       -1.109067  0.071663 -15.476228  5.020585e-54   
ENSG00000162378   3.316611        0.957262  0.054561  17.544927  6.503645e-69   
ENSG00000159840  13.447196        1.026019  0.079158  12.961582  2.020381e-38   
ENSG00000074755   3.159791        0.498592 

  sub_c.obs['Contrast'] = c
  sub_ct.obs['Contrast'] = 'others'


LA_TAMs
AnnData object with n_obs × n_vars = 2890 × 17811
    obs: 'Projection_CellType', 'assay', 'donor_id', 'disease', 'tissue', 'study', 'sex', 'age', 'uicc_stage', 'tumor_stage', 'Contrast'


  dds = DeseqDataSet(counts = counts, metadata=pb.obs, design_factors = ["Contrast", "assay"])
Fitting size factors...


Using None as control genes, passed at DeseqDataSet initialization


... done in 2.57 seconds.

Fitting dispersions...
... done in 6.03 seconds.

Fitting dispersion trend curve...
  self._fit_parametric_dispersion_trend(vst)
... done in 0.27 seconds.

Fitting MAP dispersions...
... done in 6.05 seconds.

Fitting LFCs...
... done in 7.52 seconds.

Calculating cook's distance...
... done in 4.73 seconds.

Replacing 89 outlier genes.

Fitting dispersions...
... done in 0.28 seconds.

Fitting MAP dispersions...
... done in 0.16 seconds.

Fitting LFCs...
... done in 0.18 seconds.

Running Wald tests...
... done in 5.57 seconds.

  sub_c.obs['Contrast'] = c
  sub_ct.obs['Contrast'] = 'others'


Log2 fold change & Wald test p-value: Contrast LA_TAMs vs others
                  baseMean  log2FoldChange     lfcSE       stat        pvalue  \
ENSG00000121410   3.770136        0.589004  0.058460  10.075288  7.105251e-24   
ENSG00000268895   1.432926       -0.026855  0.061766  -0.434793  6.637129e-01   
ENSG00000175899  14.883531        1.665453  0.092967  17.914386  9.106840e-72   
ENSG00000245105   1.178039       -0.340890  0.074188  -4.594946  4.328620e-06   
ENSG00000166535   1.172041       -0.293508  0.075834  -3.870405  1.086546e-04   
...                    ...             ...       ...        ...           ...   
ENSG00000070476   2.052263        0.123876  0.062747   1.974215  4.835726e-02   
ENSG00000203995   1.158362       -0.328290  0.076939  -4.266883  1.982233e-05   
ENSG00000162378   3.316611        0.444663  0.058350   7.620588  2.525225e-14   
ENSG00000159840  13.447196        0.796940  0.075689  10.529136  6.341377e-26   
ENSG00000074755   3.159791        0.481231  

  dds = DeseqDataSet(counts = counts, metadata=pb.obs, design_factors = ["Contrast", "assay"])
Fitting size factors...


Using None as control genes, passed at DeseqDataSet initialization


... done in 2.53 seconds.

Fitting dispersions...
... done in 6.49 seconds.

Fitting dispersion trend curve...
  self._fit_parametric_dispersion_trend(vst)
... done in 0.28 seconds.

Fitting MAP dispersions...
... done in 6.26 seconds.

Fitting LFCs...
... done in 8.29 seconds.

Calculating cook's distance...
... done in 4.69 seconds.

Replacing 107 outlier genes.

Fitting dispersions...
... done in 0.28 seconds.

Fitting MAP dispersions...
... done in 0.21 seconds.

Fitting LFCs...
... done in 0.25 seconds.

Running Wald tests...
... done in 6.43 seconds.

  sub_c.obs['Contrast'] = c


Log2 fold change & Wald test p-value: Contrast Prolif_TAMs vs others
                  baseMean  log2FoldChange     lfcSE       stat        pvalue  \
ENSG00000121410   3.770136       -0.739641  0.080312  -9.209583  3.273943e-20   
ENSG00000268895   1.432926        0.001264  0.084802   0.014900  9.881120e-01   
ENSG00000175899  14.883531       -1.926499  0.113990 -16.900619  4.452096e-64   
ENSG00000245105   1.178039        0.543780  0.087173   6.237948  4.433474e-10   
ENSG00000166535   1.172041        0.444100  0.090260   4.920255  8.643169e-07   
...                    ...             ...       ...        ...           ...   
ENSG00000070476   2.052263       -0.489071  0.086322  -5.665672  1.464493e-08   
ENSG00000203995   1.158362        0.471778  0.090304   5.224345  1.747726e-07   
ENSG00000162378   3.316611       -0.597517  0.078195  -7.641414  2.148490e-14   
ENSG00000159840  13.447196       -1.166144  0.089106 -13.087217  3.896238e-39   
ENSG00000074755   3.159791       -0.7744

  sub_ct.obs['Contrast'] = 'others'


AnnData object with n_obs × n_vars = 2890 × 17811
    obs: 'Projection_CellType', 'assay', 'donor_id', 'disease', 'tissue', 'study', 'sex', 'age', 'uicc_stage', 'tumor_stage', 'Contrast'


  dds = DeseqDataSet(counts = counts, metadata=pb.obs, design_factors = ["Contrast", "assay"])
Fitting size factors...


Using None as control genes, passed at DeseqDataSet initialization


... done in 2.53 seconds.

Fitting dispersions...
... done in 5.92 seconds.

Fitting dispersion trend curve...
  self._fit_parametric_dispersion_trend(vst)
... done in 0.28 seconds.

Fitting MAP dispersions...
... done in 5.84 seconds.

Fitting LFCs...
... done in 8.00 seconds.

Calculating cook's distance...
... done in 4.69 seconds.

Replacing 100 outlier genes.

Fitting dispersions...
... done in 0.29 seconds.

Fitting MAP dispersions...
... done in 0.22 seconds.

Fitting LFCs...
... done in 0.21 seconds.

Running Wald tests...
... done in 5.93 seconds.

  sub_c.obs['Contrast'] = c


Log2 fold change & Wald test p-value: Contrast IFN_TAMs vs others
                  baseMean  log2FoldChange     lfcSE      stat    pvalue  \
ENSG00000121410   3.770136       -0.156425  0.067942 -2.302330  0.021317   
ENSG00000268895   1.432926        0.051460  0.071755  0.717168  0.473271   
ENSG00000175899  14.883531       -0.047390  0.103864 -0.456264  0.648200   
ENSG00000245105   1.178039        0.189887  0.080912  2.346822  0.018934   
ENSG00000166535   1.172041        0.075965  0.084703  0.896840  0.369805   
...                    ...             ...       ...       ...       ...   
ENSG00000070476   2.052263       -0.186803  0.072536 -2.575312  0.010015   
ENSG00000203995   1.158362        0.110658  0.084656  1.307149  0.191162   
ENSG00000162378   3.316611       -0.262807  0.067715 -3.881054  0.000104   
ENSG00000159840  13.447196        0.126516  0.081507  1.552214  0.120611   
ENSG00000074755   3.159791       -0.178127  0.071523 -2.490496  0.012756   

                     

  sub_ct.obs['Contrast'] = 'others'


AnnData object with n_obs × n_vars = 2890 × 17811
    obs: 'Projection_CellType', 'assay', 'donor_id', 'disease', 'tissue', 'study', 'sex', 'age', 'uicc_stage', 'tumor_stage', 'Contrast'


  dds = DeseqDataSet(counts = counts, metadata=pb.obs, design_factors = ["Contrast", "assay"])
Fitting size factors...


Using None as control genes, passed at DeseqDataSet initialization


... done in 2.59 seconds.

Fitting dispersions...
... done in 5.99 seconds.

Fitting dispersion trend curve...
  self._fit_parametric_dispersion_trend(vst)
... done in 0.28 seconds.

Fitting MAP dispersions...
... done in 5.16 seconds.

Fitting LFCs...
... done in 7.63 seconds.

Calculating cook's distance...
... done in 4.69 seconds.

Replacing 97 outlier genes.

Fitting dispersions...
... done in 0.30 seconds.

Fitting MAP dispersions...
... done in 0.20 seconds.

Fitting LFCs...
... done in 0.22 seconds.

Running Wald tests...
... done in 5.84 seconds.

  sub_c.obs['Contrast'] = c
  sub_ct.obs['Contrast'] = 'others'


Log2 fold change & Wald test p-value: Contrast Reg_TAMs vs others
                  baseMean  log2FoldChange     lfcSE       stat        pvalue  \
ENSG00000121410   3.770136        0.299235  0.061886   4.835261  1.329711e-06   
ENSG00000268895   1.432926        0.156329  0.064732   2.415019  1.573441e-02   
ENSG00000175899  14.883531        1.630715  0.093515  17.437993  4.247288e-68   
ENSG00000245105   1.178039        0.150484  0.074638   2.016199  4.377921e-02   
ENSG00000166535   1.172041       -0.007611  0.078945  -0.096414  9.231916e-01   
...                    ...             ...       ...        ...           ...   
ENSG00000070476   2.052263        0.046869  0.065726   0.713097  4.757855e-01   
ENSG00000203995   1.158362       -0.034590  0.079993  -0.432417  6.654381e-01   
ENSG00000162378   3.316611        0.130550  0.061525   2.121910  3.384529e-02   
ENSG00000159840  13.447196        0.304485  0.077727   3.917382  8.951579e-05   
ENSG00000074755   3.159791        0.242706 

  dds = DeseqDataSet(counts = counts, metadata=pb.obs, design_factors = ["Contrast", "assay"])
Fitting size factors...


Using None as control genes, passed at DeseqDataSet initialization


... done in 2.52 seconds.

Fitting dispersions...
... done in 6.05 seconds.

Fitting dispersion trend curve...
  self._fit_parametric_dispersion_trend(vst)
... done in 0.28 seconds.

Fitting MAP dispersions...
... done in 5.32 seconds.

Fitting LFCs...
... done in 7.91 seconds.

Calculating cook's distance...
... done in 4.46 seconds.

Replacing 100 outlier genes.

Fitting dispersions...
... done in 0.28 seconds.

Fitting MAP dispersions...
... done in 0.18 seconds.

Fitting LFCs...
... done in 0.21 seconds.

Running Wald tests...
... done in 5.93 seconds.

  sub_c.obs['Contrast'] = c
  sub_ct.obs['Contrast'] = 'others'


Log2 fold change & Wald test p-value: Contrast Angio_TAMs vs others
                  baseMean  log2FoldChange     lfcSE       stat        pvalue  \
ENSG00000121410   3.770136       -0.153361  0.075191  -2.039630  4.138721e-02   
ENSG00000268895   1.432926       -0.025867  0.084619  -0.305694  7.598376e-01   
ENSG00000175899  14.883531       -1.795126  0.116127 -15.458324  6.630170e-54   
ENSG00000245105   1.178039        0.364708  0.090340   4.037058  5.412578e-05   
ENSG00000166535   1.172041        0.364693  0.091371   3.991344  6.569993e-05   
...                    ...             ...       ...        ...           ...   
ENSG00000070476   2.052263       -0.330220  0.084076  -3.927648  8.578061e-05   
ENSG00000203995   1.158362        0.370522  0.091932   4.030401  5.568176e-05   
ENSG00000162378   3.316611       -0.568003  0.078679  -7.219241  5.227868e-13   
ENSG00000159840  13.447196       -0.704050  0.090269  -7.799435  6.218510e-15   
ENSG00000074755   3.159791       -0.40039

  dds = DeseqDataSet(counts = counts, metadata=pb.obs, design_factors = ["Contrast", "assay"])
Fitting size factors...


Using None as control genes, passed at DeseqDataSet initialization


... done in 2.51 seconds.

Fitting dispersions...
... done in 5.86 seconds.

Fitting dispersion trend curve...
  self._fit_parametric_dispersion_trend(vst)
... done in 0.28 seconds.

Fitting MAP dispersions...
... done in 5.43 seconds.

Fitting LFCs...
... done in 7.70 seconds.

Calculating cook's distance...
... done in 4.67 seconds.

Replacing 141 outlier genes.

Fitting dispersions...
... done in 0.37 seconds.

Fitting MAP dispersions...
... done in 0.27 seconds.

Fitting LFCs...
... done in 0.31 seconds.

Running Wald tests...


Log2 fold change & Wald test p-value: Contrast Inflam_TAMs vs others
                  baseMean  log2FoldChange     lfcSE      stat        pvalue  \
ENSG00000121410   3.770136       -0.196659  0.075473 -2.605672  9.169424e-03   
ENSG00000268895   1.432926        0.049937  0.082110  0.608170  5.430748e-01   
ENSG00000175899  14.883531       -0.820015  0.114274 -7.175863  7.185245e-13   
ENSG00000245105   1.178039        0.351399  0.089818  3.912363  9.139720e-05   
ENSG00000166535   1.172041        0.295379  0.092078  3.207910  1.337033e-03   
...                    ...             ...       ...       ...           ...   
ENSG00000070476   2.052263       -0.331311  0.083579 -3.964057  7.368670e-05   
ENSG00000203995   1.158362        0.284770  0.092993  3.062256  2.196752e-03   
ENSG00000162378   3.316611       -0.508953  0.077694 -6.550773  5.724019e-11   
ENSG00000159840  13.447196       -0.413137  0.089988 -4.591025  4.410747e-06   
ENSG00000074755   3.159791       -0.426625  0.08155

... done in 5.87 seconds.

