# Format Data for PyDEseq2 Function

In [16]:
def featureCount_convert(file_in): 
    #converts featureCounts output to be compatible with PyDESeq2
    
    import pandas as pd

    file_out = file_in.replace('_table.tsv','_PyDEseq_format.tsv')

    features_df_raw = pd.read_csv(file_in, sep='\t',index_col=0)

    features_df = features_df_raw.drop(columns=['Chr','Start','End','Strand','Length'])

    # transpose dataframe
    features_df_transposed = features_df.transpose()
    sample_names = []
    for i in features_df_transposed.index:
        i = i.replace('data/BAM_sorted/','')
        i = i.replace('_sorted.bam','')
        sample_names.append(i)

    features_df_final = features_df_transposed.copy()

    # replace index with sample names
    features_df_final.index = sample_names

    # save to csv
    features_df_final.to_csv(file_out, sep='\t')

# Analysis Function

In [58]:
def pydeseq2(counts_path,metadata_path,condition1,condition2,results_dir):    
    import os
    import pandas as pd

    from pydeseq2.dds import DeseqDataSet
    from pydeseq2.default_inference import DefaultInference
    from pydeseq2.ds import DeseqStats
    
    # import data
    counts_df = pd.read_csv(counts_path, sep="\t", index_col=0)
    metadata = pd.read_csv(metadata_path, index_col=0)

    metadata = metadata[metadata.condition.isin([condition1,condition2])]

    # only keep samples in counts_df if they are in metadata
    counts_df = counts_df.loc[metadata.index]

    # data filtering
    samples_to_keep = ~metadata.condition.isna()
    counts_df = counts_df.loc[samples_to_keep]
    metadata = metadata.loc[samples_to_keep]

    genes_to_keep = counts_df.columns[counts_df.sum(axis=0) >= 10]
    counts_df = counts_df[genes_to_keep]

    # read counts modeling
    inference = DefaultInference(n_cpus=8)
    dds = DeseqDataSet(
        counts=counts_df,
        metadata=metadata,
        design_factors="condition",
        refit_cooks=True,
        inference=inference,
    )

    dds.deseq2()

    print(dds.varm["LFC"])

    # statistical analysis
    stat_res = DeseqStats(dds, inference=inference)

    # wald test
    stat_res.summary()
    stat_res.summary(lfc_null=0.1, alt_hypothesis="greaterAbs")

    try:
        os.mkdir(results_dir)
    except:
        pass

    try:
        stat_res.lfc_shrink(coeff=f"condition_{condition2}_vs_{condition1}")
        stat_res.results_df.to_csv(f'{results_dir}/{condition2}_vs_ref{condition1}.csv')
    except:
        stat_res.lfc_shrink(coeff=f"condition_{condition1}_vs_{condition2}")
        stat_res.results_df.to_csv(f'{results_dir}/{condition1}_vs_ref{condition2}.csv')


# BW25113

In [17]:
featureCount_convert(r'2_in_progress\featureCounts\featureCounts_table.tsv')

In [59]:
pydeseq2(r'2_in_progress\featureCounts\featureCounts_PyDEseq_format.tsv',r'2_in_progress\metadata.csv','MB','AB','2_in_progress/pydeseq2')

Fitting size factors...
... done in 0.00 seconds.

Fitting dispersions...
... done in 0.97 seconds.

Fitting dispersion trend curve...
... done in 0.12 seconds.

  self.fit_dispersion_prior()
Fitting MAP dispersions...
... done in 1.10 seconds.

Fitting LFCs...
... done in 0.60 seconds.

Calculating cook's distance...
... done in 0.02 seconds.

Replacing 0 outlier genes.

Running Wald tests...


                 intercept  condition_MB_vs_AB
BW25113_RS00005   3.897184            0.110445
BW25113_RS00010   6.427833            0.004568
BW25113_RS00015   5.950272            1.141036
BW25113_RS00020   6.374614            0.906156
BW25113_RS00030   4.270315            0.222734
...                    ...                 ...
BW25113_RS22915   5.704613           -0.028569
BW25113_RS22920   4.350210           -0.578052
BW25113_RS22925   9.018653            0.110239
BW25113_RS22930   7.588761            0.237398
BW25113_RS22935   5.800682           -0.124317

[4267 rows x 2 columns]


... done in 0.40 seconds.

Running Wald tests...


Log2 fold change & Wald test p-value: condition MB vs AB
                    baseMean  log2FoldChange     lfcSE      stat  \
BW25113_RS00005    52.112222        0.159338  0.390373  0.408169   
BW25113_RS00010   619.503563        0.006590  0.179318  0.036751   
BW25113_RS00015   792.201426        1.646167  0.166455  9.889554   
BW25113_RS00020  1018.799034        1.307306  0.157309  8.310424   
BW25113_RS00030    80.825457        0.321337  0.340277  0.944339   
...                      ...             ...       ...       ...   
BW25113_RS22915   295.949851       -0.041217  0.199031 -0.207088   
BW25113_RS22920    60.500560       -0.833953  0.361881 -2.304493   
BW25113_RS22925  8736.960750        0.159041  0.133555  1.190831   
BW25113_RS22930  2240.687332        0.342493  0.135519  2.527276   
BW25113_RS22935   311.187277       -0.179351  0.200120 -0.896218   

                       pvalue          padj  
BW25113_RS00005  6.831498e-01  7.642510e-01  
BW25113_RS00010  9.706837e-01  9.7

... done in 0.88 seconds.

Fitting MAP LFCs...


Log2 fold change & Wald test p-value: condition MB vs AB
                    baseMean  log2FoldChange     lfcSE      stat  \
BW25113_RS00005    52.112222        0.159338  0.390373  0.152003   
BW25113_RS00010   619.503563        0.006590  0.179318  0.000000   
BW25113_RS00015   792.201426        1.646167  0.166455  9.288792   
BW25113_RS00020  1018.799034        1.307306  0.157309  7.674733   
BW25113_RS00030    80.825457        0.321337  0.340277  0.650461   
...                      ...             ...       ...       ...   
BW25113_RS22915   295.949851       -0.041217  0.199031  0.000000   
BW25113_RS22920    60.500560       -0.833953  0.361881 -2.028160   
BW25113_RS22925  8736.960750        0.159041  0.133555  0.442075   
BW25113_RS22930  2240.687332        0.342493  0.135519  1.789371   
BW25113_RS22935   311.187277       -0.179351  0.200120 -0.396517   

                       pvalue          padj  
BW25113_RS00005  8.791843e-01  1.000000e+00  
BW25113_RS00010  1.000000e+00  1.0

... done in 1.43 seconds.



In [60]:
pydeseq2(r'2_in_progress\featureCounts\featureCounts_PyDEseq_format.tsv',r'2_in_progress\metadata.csv','MB','MR','2_in_progress/pydeseq2')

Fitting size factors...
... done in 0.00 seconds.

Fitting dispersions...
... done in 1.10 seconds.

Fitting dispersion trend curve...
... done in 0.14 seconds.

  self.fit_dispersion_prior()
Fitting MAP dispersions...
... done in 1.29 seconds.

Fitting LFCs...
... done in 0.63 seconds.

Calculating cook's distance...
... done in 0.02 seconds.

Replacing 0 outlier genes.

Running Wald tests...


                 intercept  condition_MR_vs_MB
BW25113_RS00005   3.881617           -0.042992
BW25113_RS00010   6.304946           -0.053229
BW25113_RS00015   6.963298            0.058544
BW25113_RS00020   7.153161            0.139209
BW25113_RS00030   4.365449           -0.324655
...                    ...                 ...
BW25113_RS22915   5.547804           -0.184138
BW25113_RS22920   3.645585            0.202356
BW25113_RS22925   9.002137            0.076019
BW25113_RS22930   7.699183            0.043008
BW25113_RS22935   5.550227           -0.075946

[4232 rows x 2 columns]


... done in 0.42 seconds.

Running Wald tests...


Log2 fold change & Wald test p-value: condition MR vs MB
                    baseMean  log2FoldChange     lfcSE      stat    pvalue  \
BW25113_RS00005    47.513984       -0.062025  0.433111 -0.143208  0.886126   
BW25113_RS00010   533.065467       -0.076793  0.262128 -0.292960  0.769552   
BW25113_RS00015  1088.779898        0.084462  0.260360  0.324403  0.745633   
BW25113_RS00020  1373.295932        0.200836  0.268694  0.747454  0.454790   
BW25113_RS00030    67.728838       -0.468379  0.379428 -1.234434  0.217041   
...                      ...             ...       ...       ...       ...   
BW25113_RS22915   234.836304       -0.265656  0.303528 -0.875225  0.381451   
BW25113_RS22920    42.734171        0.291938  0.448148  0.651431  0.514768   
BW25113_RS22925  8441.113416        0.109672  0.248558  0.441233  0.659044   
BW25113_RS22930  2254.976831        0.062047  0.248901  0.249284  0.803141   
BW25113_RS22935   248.062248       -0.109567  0.294802 -0.371664  0.710143   

      

... done in 0.75 seconds.

Fitting MAP LFCs...


Log2 fold change & Wald test p-value: condition MR vs MB
                    baseMean  log2FoldChange     lfcSE      stat    pvalue  \
BW25113_RS00005    47.513984       -0.062025  0.433111  0.000000  1.000000   
BW25113_RS00010   533.065467       -0.076793  0.262128  0.000000  1.000000   
BW25113_RS00015  1088.779898        0.084462  0.260360  0.000000  1.000000   
BW25113_RS00020  1373.295932        0.200836  0.268694  0.375283  0.707450   
BW25113_RS00030    67.728838       -0.468379  0.379428 -0.970880  0.331608   
...                      ...             ...       ...       ...       ...   
BW25113_RS22915   234.836304       -0.265656  0.303528 -0.545766  0.585226   
BW25113_RS22920    42.734171        0.291938  0.448148  0.428291  0.668439   
BW25113_RS22925  8441.113416        0.109672  0.248558  0.038912  0.968960   
BW25113_RS22930  2254.976831        0.062047  0.248901  0.000000  1.000000   
BW25113_RS22935   248.062248       -0.109567  0.294802 -0.032453  0.974110   

      

... done in 2.24 seconds.



In [61]:
pydeseq2(r'2_in_progress\featureCounts\featureCounts_PyDEseq_format.tsv',r'2_in_progress\metadata.csv','AB','AR','2_in_progress/pydeseq2')

Fitting size factors...
... done in 0.00 seconds.

Fitting dispersions...
... done in 1.09 seconds.

Fitting dispersion trend curve...
... done in 0.13 seconds.

  self.fit_dispersion_prior()
Fitting MAP dispersions...
... done in 1.27 seconds.

Fitting LFCs...
... done in 0.63 seconds.

Calculating cook's distance...
... done in 0.01 seconds.

Replacing 0 outlier genes.

Running Wald tests...


                 intercept  condition_AR_vs_AB
BW25113_RS00005   3.971504           -0.070297
BW25113_RS00010   6.502549            0.235871
BW25113_RS00015   6.024442           -0.047796
BW25113_RS00020   6.448892            0.075650
BW25113_RS00030   4.344968            0.252144
...                    ...                 ...
BW25113_RS22915   5.778867           -0.063341
BW25113_RS22920   4.424585           -0.128381
BW25113_RS22925   9.092950           -0.012439
BW25113_RS22930   7.663057           -0.083436
BW25113_RS22935   5.875232           -0.038617

[4269 rows x 2 columns]


... done in 0.45 seconds.

Running Wald tests...


Log2 fold change & Wald test p-value: condition AR vs AB
                    baseMean  log2FoldChange     lfcSE      stat    pvalue  \
BW25113_RS00005    51.293091       -0.101417  0.391308 -0.259175  0.795500   
BW25113_RS00010   754.859368        0.340290  0.163977  2.075229  0.037965   
BW25113_RS00015   403.044791       -0.068955  0.181549 -0.379817  0.704082   
BW25113_RS00020   655.855020        0.109139  0.161498  0.675795  0.499171   
BW25113_RS00030    88.794011        0.363768  0.343798  1.058085  0.290017   
...                      ...             ...       ...       ...       ...   
BW25113_RS22915   313.224406       -0.091381  0.187810 -0.486561  0.626569   
BW25113_RS22920    78.588172       -0.185215  0.320037 -0.578728  0.562773   
BW25113_RS22925  8837.252049       -0.017945  0.109874 -0.163325  0.870263   
BW25113_RS22930  2043.424468       -0.120373  0.123725 -0.972913  0.330597   
BW25113_RS22935   349.292814       -0.055713  0.178977 -0.311286  0.755583   

      

... done in 0.75 seconds.

Fitting MAP LFCs...


Log2 fold change & Wald test p-value: condition AR vs AB
                    baseMean  log2FoldChange     lfcSE      stat    pvalue  \
BW25113_RS00005    51.293091       -0.101417  0.391308 -0.003622  0.997110   
BW25113_RS00010   754.859368        0.340290  0.163977  1.465388  0.142815   
BW25113_RS00015   403.044791       -0.068955  0.181549  0.000000  1.000000   
BW25113_RS00020   655.855020        0.109139  0.161498  0.056592  0.954870   
BW25113_RS00030    88.794011        0.363768  0.343798  0.767216  0.442953   
...                      ...             ...       ...       ...       ...   
BW25113_RS22915   313.224406       -0.091381  0.187810  0.000000  1.000000   
BW25113_RS22920    78.588172       -0.185215  0.320037 -0.266265  0.790035   
BW25113_RS22925  8837.252049       -0.017945  0.109874  0.000000  1.000000   
BW25113_RS22930  2043.424468       -0.120373  0.123725 -0.164666  0.869207   
BW25113_RS22935   349.292814       -0.055713  0.178977  0.000000  1.000000   

      

... done in 1.57 seconds.



In [62]:
pydeseq2(r'2_in_progress\featureCounts\featureCounts_PyDEseq_format.tsv',r'2_in_progress\metadata.csv','MB','MF','2_in_progress/pydeseq2')

Fitting size factors...
... done in 0.00 seconds.

Fitting dispersions...
... done in 1.20 seconds.

Fitting dispersion trend curve...
... done in 0.13 seconds.

  self.fit_dispersion_prior()
Fitting MAP dispersions...
... done in 1.40 seconds.

Fitting LFCs...
... done in 0.75 seconds.

Calculating cook's distance...
... done in 0.02 seconds.

Replacing 0 outlier genes.

Running Wald tests...


                 intercept  condition_MF_vs_MB
BW25113_RS00005   4.006756           -0.500963
BW25113_RS00010   6.430420           -0.175873
BW25113_RS00015   7.088977           -0.033457
BW25113_RS00020   7.278691           -0.092789
BW25113_RS00030   4.490872           -0.308867
...                    ...                 ...
BW25113_RS22915   5.673623            0.020557
BW25113_RS22920   3.770851           -0.130803
BW25113_RS22925   9.127384            0.047682
BW25113_RS22930   7.824494           -0.014560
BW25113_RS22935   5.675163            0.002150

[4247 rows x 2 columns]


... done in 0.41 seconds.

Running Wald tests...


Log2 fold change & Wald test p-value: condition MF vs MB
                    baseMean  log2FoldChange     lfcSE      stat    pvalue  \
BW25113_RS00005    44.304368       -0.722737  0.437356 -1.652516  0.098429   
BW25113_RS00010   570.011400       -0.253731  0.207378 -1.223516  0.221135   
BW25113_RS00015  1178.866367       -0.048268  0.193978 -0.248834  0.803489   
BW25113_RS00020  1385.087317       -0.133866  0.187985 -0.712110  0.476397   
BW25113_RS00030    77.902254       -0.445601  0.409129 -1.089147  0.276089   
...                      ...             ...       ...       ...       ...   
BW25113_RS22915   293.921947        0.029658  0.232503  0.127560  0.898497   
BW25113_RS22920    40.798285       -0.188709  0.439251 -0.429615  0.667476   
BW25113_RS22925  9428.708176        0.068790  0.180267  0.381602  0.702757   
BW25113_RS22930  2483.535920       -0.021006  0.185081 -0.113495  0.909638   
BW25113_RS22935   291.817864        0.003101  0.237354  0.013067  0.989574   

      

... done in 0.74 seconds.

Fitting MAP LFCs...


Log2 fold change & Wald test p-value: condition MF vs MB
                    baseMean  log2FoldChange     lfcSE      stat    pvalue  \
BW25113_RS00005    44.304368       -0.722737  0.437356 -1.423869  0.154484   
BW25113_RS00010   570.011400       -0.253731  0.207378 -0.741305  0.458508   
BW25113_RS00015  1178.866367       -0.048268  0.193978  0.000000  1.000000   
BW25113_RS00020  1385.087317       -0.133866  0.187985 -0.180151  0.857034   
BW25113_RS00030    77.902254       -0.445601  0.409129 -0.844725  0.398264   
...                      ...             ...       ...       ...       ...   
BW25113_RS22915   293.921947        0.029658  0.232503  0.000000  1.000000   
BW25113_RS22920    40.798285       -0.188709  0.439251 -0.201954  0.839952   
BW25113_RS22925  9428.708176        0.068790  0.180267  0.000000  1.000000   
BW25113_RS22930  2483.535920       -0.021006  0.185081  0.000000  1.000000   
BW25113_RS22935   291.817864        0.003101  0.237354  0.000000  1.000000   

      

... done in 2.34 seconds.



In [63]:
pydeseq2(r'2_in_progress\featureCounts\featureCounts_PyDEseq_format.tsv',r'2_in_progress\metadata.csv','AB','AF','2_in_progress/pydeseq2')

Fitting size factors...
... done in 0.00 seconds.

Fitting dispersions...
... done in 1.24 seconds.

Fitting dispersion trend curve...
... done in 0.56 seconds.

  self.fit_dispersion_prior()
Fitting MAP dispersions...
... done in 3.62 seconds.

Fitting LFCs...
... done in 0.69 seconds.

Calculating cook's distance...
... done in 0.02 seconds.

Replacing 0 outlier genes.

Running Wald tests...


                 intercept  condition_AF_vs_AB
BW25113_RS00005   3.946970            0.093091
BW25113_RS00010   6.477760           -0.033355
BW25113_RS00015   5.999839           -0.072849
BW25113_RS00020   6.424202           -0.122051
BW25113_RS00030   4.319645            0.117918
...                    ...                 ...
BW25113_RS22915   5.754150            0.047161
BW25113_RS22920   4.399882           -0.054351
BW25113_RS22925   9.068332            0.019675
BW25113_RS22930   7.638430           -0.030552
BW25113_RS22935   5.850633            0.105942

[4260 rows x 2 columns]


... done in 0.51 seconds.

Running Wald tests...


Log2 fold change & Wald test p-value: condition AF vs AB
                    baseMean  log2FoldChange     lfcSE      stat    pvalue  \
BW25113_RS00005    54.437499        0.134302  0.375753  0.357421  0.720777   
BW25113_RS00010   638.673035       -0.048121  0.163484 -0.294348  0.768492   
BW25113_RS00015   388.402678       -0.105099  0.173295 -0.606471  0.544202   
BW25113_RS00020   580.071623       -0.176083  0.159172 -1.106242  0.268622   
BW25113_RS00030    80.361044        0.170120  0.331428  0.513293  0.607747   
...                      ...             ...       ...       ...       ...   
BW25113_RS22915   323.130919        0.068039  0.175414  0.387877  0.698107   
BW25113_RS22920    78.935126       -0.078413  0.318909 -0.245877  0.805777   
BW25113_RS22925  8761.894905        0.028385  0.105803  0.268283  0.788481   
BW25113_RS22930  2046.449724       -0.044077  0.135953 -0.324210  0.745779   
BW25113_RS22935   366.762909        0.152842  0.169792  0.900171  0.368029   

      

... done in 0.95 seconds.

Fitting MAP LFCs...


Log2 fold change & Wald test p-value: condition AF vs AB
                    baseMean  log2FoldChange     lfcSE      stat    pvalue  \
BW25113_RS00005    54.437499        0.134302  0.375753  0.091289  0.927263   
BW25113_RS00010   638.673035       -0.048121  0.163484  0.000000  1.000000   
BW25113_RS00015   388.402678       -0.105099  0.173295 -0.029422  0.976528   
BW25113_RS00020   580.071623       -0.176083  0.159172 -0.477990  0.632657   
BW25113_RS00030    80.361044        0.170120  0.331428  0.211568  0.832444   
...                      ...             ...       ...       ...       ...   
BW25113_RS22915   323.130919        0.068039  0.175414  0.000000  1.000000   
BW25113_RS22920    78.935126       -0.078413  0.318909  0.000000  1.000000   
BW25113_RS22925  8761.894905        0.028385  0.105803  0.000000  1.000000   
BW25113_RS22930  2046.449724       -0.044077  0.135953  0.000000  1.000000   
BW25113_RS22935   366.762909        0.152842  0.169792  0.311217  0.755635   

      

... done in 2.24 seconds.



In [64]:
pydeseq2(r'2_in_progress\featureCounts\featureCounts_PyDEseq_format.tsv',r'2_in_progress\metadata.csv','MR','MF','2_in_progress/pydeseq2')

Fitting size factors...
... done in 0.00 seconds.

Fitting dispersions...
... done in 1.40 seconds.

Fitting dispersion trend curve...
... done in 0.16 seconds.

  self.fit_dispersion_prior()
Fitting MAP dispersions...
... done in 1.42 seconds.

Fitting LFCs...
... done in 0.78 seconds.

Calculating cook's distance...
... done in 0.00 seconds.

Replacing 0 outlier genes.

Running Wald tests...


                 intercept  condition_MR_vs_MF
BW25113_RS00005   3.499355            0.449446
BW25113_RS00010   6.247070            0.115173
BW25113_RS00015   7.048698            0.083374
BW25113_RS00020   7.179239            0.222939
BW25113_RS00030   4.178549           -0.027841
...                    ...                 ...
BW25113_RS22915   5.687064           -0.213670
BW25113_RS22920   3.633287            0.325876
BW25113_RS22925   9.168199            0.020367
BW25113_RS22930   7.803193            0.049348
BW25113_RS22935   5.670055           -0.084897

[4240 rows x 2 columns]


... done in 0.47 seconds.

Running Wald tests...


Log2 fold change & Wald test p-value: condition MR vs MF
                    baseMean  log2FoldChange     lfcSE      stat    pvalue  \
BW25113_RS00005    42.541867        0.648414  0.456356  1.420851  0.155360   
BW25113_RS00010   547.830139        0.166160  0.288043  0.576857  0.564036   
BW25113_RS00015  1201.319411        0.120284  0.276809  0.434536  0.663899   
BW25113_RS00020  1475.583346        0.321633  0.289911  1.109418  0.267250   
BW25113_RS00030    64.791918       -0.040166  0.467142 -0.085983  0.931480   
...                      ...             ...       ...       ...       ...   
BW25113_RS22915   266.388621       -0.308260  0.315799 -0.976129  0.329001   
BW25113_RS22920    45.263228        0.470140  0.452949  1.037953  0.299292   
BW25113_RS22925  9685.861269        0.029384  0.266820  0.110126  0.912309   
BW25113_RS22930  2510.438832        0.071194  0.273754  0.260065  0.794814   
BW25113_RS22935   278.259381       -0.122481  0.305961 -0.400315  0.688925   

      

... done in 0.77 seconds.

Fitting MAP LFCs...


Log2 fold change & Wald test p-value: condition MR vs MF
                    baseMean  log2FoldChange     lfcSE      stat    pvalue  \
BW25113_RS00005    42.541867        0.648414  0.456356  1.201724  0.229471   
BW25113_RS00010   547.830139        0.166160  0.288043  0.229687  0.818335   
BW25113_RS00015  1201.319411        0.120284  0.276809  0.073276  0.941586   
BW25113_RS00020  1475.583346        0.321633  0.289911  0.764485  0.444578   
BW25113_RS00030    64.791918       -0.040166  0.467142  0.000000  1.000000   
...                      ...             ...       ...       ...       ...   
BW25113_RS22915   266.388621       -0.308260  0.315799 -0.659471  0.509593   
BW25113_RS22920    45.263228        0.470140  0.452949  0.817178  0.413827   
BW25113_RS22925  9685.861269        0.029384  0.266820  0.000000  1.000000   
BW25113_RS22930  2510.438832        0.071194  0.273754  0.000000  1.000000   
BW25113_RS22935   278.259381       -0.122481  0.305961 -0.073476  0.941427   

      

... done in 2.24 seconds.



In [65]:
pydeseq2(r'2_in_progress\featureCounts\featureCounts_PyDEseq_format.tsv',r'2_in_progress\metadata.csv','AR','AF','2_in_progress/pydeseq2')

Fitting size factors...
... done in 0.00 seconds.

Fitting dispersions...
... done in 1.04 seconds.

Fitting dispersion trend curve...
... done in 0.15 seconds.

  self.fit_dispersion_prior()
Fitting MAP dispersions...
... done in 1.24 seconds.

Fitting LFCs...
... done in 0.64 seconds.

Calculating cook's distance...
... done in 0.00 seconds.

Replacing 0 outlier genes.

Running Wald tests...


                 intercept  condition_AR_vs_AF
BW25113_RS00005   3.993803           -0.167047
BW25113_RS00010   6.397661            0.265772
BW25113_RS00015   5.879696            0.022032
BW25113_RS00020   6.255224            0.194563
BW25113_RS00030   4.390343            0.131265
...                    ...                 ...
BW25113_RS22915   5.754191           -0.113422
BW25113_RS22920   4.297467           -0.075479
BW25113_RS22925   9.040934           -0.035219
BW25113_RS22930   7.560638           -0.055722
BW25113_RS22935   5.909453           -0.147747

[4257 rows x 2 columns]


... done in 0.40 seconds.

Running Wald tests...


Log2 fold change & Wald test p-value: condition AR vs AF
                    baseMean  log2FoldChange     lfcSE      stat    pvalue  \
BW25113_RS00005    50.428367       -0.240997  0.399728 -0.602902  0.546574   
BW25113_RS00010   691.767219        0.383429  0.137235  2.793952  0.005207   
BW25113_RS00015   360.753712        0.031786  0.180592  0.176008  0.860288   
BW25113_RS00020   575.636857        0.280696  0.152535  1.840205  0.065738   
BW25113_RS00030    86.688240        0.189375  0.317121  0.597169  0.550394   
...                      ...             ...       ...       ...       ...   
BW25113_RS22915   298.215405       -0.163634  0.180625 -0.905929  0.364973   
BW25113_RS22920    70.693604       -0.108893  0.343241 -0.317249  0.751055   
BW25113_RS22925  8294.926728       -0.050810  0.093244 -0.544914  0.585812   
BW25113_RS22930  1870.615213       -0.080390  0.129119 -0.622606  0.533544   
BW25113_RS22935   343.513628       -0.213155  0.168546 -1.264664  0.205992   

      

... done in 0.75 seconds.

Fitting MAP LFCs...


Log2 fold change & Wald test p-value: condition AR vs AF
                    baseMean  log2FoldChange     lfcSE      stat    pvalue  \
BW25113_RS00005    50.428367       -0.240997  0.399728 -0.352733  0.724289   
BW25113_RS00010   691.767219        0.383429  0.137235  2.065276  0.038897   
BW25113_RS00015   360.753712        0.031786  0.180592  0.000000  1.000000   
BW25113_RS00020   575.636857        0.280696  0.152535  1.184617  0.236169   
BW25113_RS00030    86.688240        0.189375  0.317121  0.281832  0.778072   
...                      ...             ...       ...       ...       ...   
BW25113_RS22915   298.215405       -0.163634  0.180625 -0.352297  0.724616   
BW25113_RS22920    70.693604       -0.108893  0.343241 -0.025909  0.979330   
BW25113_RS22925  8294.926728       -0.050810  0.093244  0.000000  1.000000   
BW25113_RS22930  1870.615213       -0.080390  0.129119  0.000000  1.000000   
BW25113_RS22935   343.513628       -0.213155  0.168546 -0.671356  0.501994   

      

... done in 1.80 seconds.



In [66]:
pydeseq2(r'2_in_progress\featureCounts\featureCounts_PyDEseq_format.tsv',r'2_in_progress\metadata.csv','MB','MP','2_in_progress/pydeseq2')

Fitting size factors...
... done in 0.00 seconds.

Fitting dispersions...
... done in 1.18 seconds.

Fitting dispersion trend curve...
... done in 0.16 seconds.

  self.fit_dispersion_prior()
Fitting MAP dispersions...
... done in 1.30 seconds.

Fitting LFCs...
... done in 0.63 seconds.

Calculating cook's distance...
... done in 0.00 seconds.

Replacing 0 outlier genes.

Running Wald tests...


                 intercept  condition_MP_vs_MB
BW25113_RS00005   4.150655           -0.060597
BW25113_RS00010   6.575600           -0.007082
BW25113_RS00015   7.234331           -0.771738
BW25113_RS00020   7.423915           -0.388528
BW25113_RS00030   4.636285           -0.204515
...                    ...                 ...
BW25113_RS22915   5.819237            0.108477
BW25113_RS22920   3.915369            0.248166
BW25113_RS22925   9.272432            0.102905
BW25113_RS22930   7.969470           -0.216361
BW25113_RS22935   5.819639           -0.230049

[4303 rows x 2 columns]


... done in 0.36 seconds.

Running Wald tests...


Log2 fold change & Wald test p-value: condition MP vs MB
                     baseMean  log2FoldChange     lfcSE      stat  \
BW25113_RS00005     61.729979       -0.087422  0.340743 -0.256564   
BW25113_RS00010    714.777780       -0.010217  0.132134 -0.077320   
BW25113_RS00015   1013.103260       -1.113383  0.131876 -8.442675   
BW25113_RS00020   1405.638031       -0.560528  0.116154 -4.825737   
BW25113_RS00030     93.583659       -0.295053  0.274506 -1.074848   
...                       ...             ...       ...       ...   
BW25113_RS22915    355.808793        0.156500  0.166773  0.938398   
BW25113_RS22920     57.279247        0.358028  0.347456  1.030427   
BW25113_RS22925  11217.776259        0.148461  0.107214  1.384723   
BW25113_RS22930   2610.355453       -0.312143  0.107929 -2.892117   
BW25113_RS22935    302.511487       -0.331891  0.179534 -1.848629   

                       pvalue          padj  
BW25113_RS00005  7.975152e-01  8.546397e-01  
BW25113_RS00010  9.383

... done in 0.76 seconds.

Fitting MAP LFCs...


Log2 fold change & Wald test p-value: condition MP vs MB
                     baseMean  log2FoldChange     lfcSE      stat  \
BW25113_RS00005     61.729979       -0.087422  0.340743  0.000000   
BW25113_RS00010    714.777780       -0.010217  0.132134  0.000000   
BW25113_RS00015   1013.103260       -1.113383  0.131876 -7.684384   
BW25113_RS00020   1405.638031       -0.560528  0.116154 -3.964810   
BW25113_RS00030     93.583659       -0.295053  0.274506 -0.710558   
...                       ...             ...       ...       ...   
BW25113_RS22915    355.808793        0.156500  0.166773  0.338781   
BW25113_RS22920     57.279247        0.358028  0.347456  0.742620   
BW25113_RS22925  11217.776259        0.148461  0.107214  0.452006   
BW25113_RS22930   2610.355453       -0.312143  0.107929 -1.965581   
BW25113_RS22935    302.511487       -0.331891  0.179534 -1.291630   

                       pvalue          padj  
BW25113_RS00005  1.000000e+00  1.000000e+00  
BW25113_RS00010  1.000

... done in 1.57 seconds.

