# Running correlations between gene expression and OTU abundance


## Filtering lowly expressed genes and lowly abundant OTUs

 * Remove genes with less than one mapped read per million reads in at least 80% of samples ([Johnson and Krishnan, 2022](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-021-02568-9))
 * Retaining OTUs found at 0.001 relative abundance in at least 10% of the samples ([Priya et al 2022](https://www.nature.com/articles/s41564-022-01121-z))

### Filtering genes

Since RPKM will be used to filter out genes with low expression, it must be imported first:

In [1]:
import pandas as pd

kremling_expression_v5_day_rpkm = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_rpkm.tsv',
#kremling_expression_v5_day_rpkm = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_rpkm.tsv',
                            sep='\t')
kremling_expression_v5_day_rpkm.set_index('Name', inplace=True)

kremling_expression_v5_night_rpkm = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_rpkm.tsv',
#kremling_expression_v5_night_rpkm = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_rpkm.tsv',
                            sep='\t')
kremling_expression_v5_night_rpkm.set_index('Name', inplace=True)

These are the genes to be used in filtering steps:

In [2]:
genes_tokeep_day = kremling_expression_v5_day_rpkm[(kremling_expression_v5_day_rpkm > 1).sum(axis=1) >= (kremling_expression_v5_day_rpkm.shape[1] * 0.8)].index
genes_tokeep_night = kremling_expression_v5_night_rpkm[(kremling_expression_v5_night_rpkm > 1).sum(axis=1) >= (kremling_expression_v5_night_rpkm.shape[1] * 0.8)].index
print('Genes to keep in day:', len(genes_tokeep_day))
print('Genes to keep in night:', len(genes_tokeep_night))

Genes to keep in day: 13107
Genes to keep in night: 13630


Filtering RPKM:

In [3]:
kremling_expression_v5_day_rpkm_filtered = kremling_expression_v5_day_rpkm[(kremling_expression_v5_day_rpkm > 1).sum(axis=1) >= (kremling_expression_v5_day_rpkm.shape[1] * 0.8)]
kremling_expression_v5_night_rpkm_filtered = kremling_expression_v5_night_rpkm[(kremling_expression_v5_night_rpkm > 1).sum(axis=1) >= (kremling_expression_v5_night_rpkm.shape[1] * 0.8)]

Importing the TPM, CPM, TMM, UQ, CTF and CUF matrices and filter genes:

In [4]:
#kremling_expression_v5_day_tpm = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_tpm.tsv',
kremling_expression_v5_day_tpm = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_tpm.tsv',
                            sep='\t')
kremling_expression_v5_day_tpm.set_index('Name', inplace=True)

kremling_expression_v5_night_tpm = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_tpm.tsv',
#kremling_expression_v5_night_tpm = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_tpm.tsv',
                            sep='\t')
kremling_expression_v5_night_tpm.set_index('Name', inplace=True)

kremling_expression_v5_day_tpm_filtered = kremling_expression_v5_day_tpm[kremling_expression_v5_day_tpm.index.isin(genes_tokeep_day)]
kremling_expression_v5_night_tpm_filtered = kremling_expression_v5_night_tpm[kremling_expression_v5_night_tpm.index.isin(genes_tokeep_night)]

kremling_expression_v5_day_cpm = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_cpm.tsv',
#kremling_expression_v5_day_cpm = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_cpm.tsv',
                            sep='\t')
kremling_expression_v5_day_cpm.set_index('Name', inplace=True)

kremling_expression_v5_night_cpm = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_cpm.tsv',
#kremling_expression_v5_night_cpm = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_cpm.tsv',
                            sep='\t')
kremling_expression_v5_night_cpm.set_index('Name', inplace=True)

kremling_expression_v5_day_cpm_filtered = kremling_expression_v5_day_cpm[kremling_expression_v5_day_cpm.index.isin(genes_tokeep_day)]
kremling_expression_v5_night_cpm_filtered = kremling_expression_v5_night_cpm[kremling_expression_v5_night_cpm.index.isin(genes_tokeep_night)]

kremling_expression_v5_day_tmm = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_tmm.tsv',
#kremling_expression_v5_day_tmm = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_tmm.tsv',
                            sep='\t')
kremling_expression_v5_day_tmm.set_index('Name', inplace=True)

kremling_expression_v5_night_tmm = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_tmm.tsv',
#kremling_expression_v5_night_tmm = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_tmm.tsv',
                            sep='\t')
kremling_expression_v5_night_tmm.set_index('Name', inplace=True)

kremling_expression_v5_day_tmm_filtered = kremling_expression_v5_day_tmm[kremling_expression_v5_day_tmm.index.isin(genes_tokeep_day)]
kremling_expression_v5_night_tmm_filtered = kremling_expression_v5_night_tmm[kremling_expression_v5_night_tmm.index.isin(genes_tokeep_night)]

kremling_expression_v5_day_uq = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_uq.tsv',
#kremling_expression_v5_day_uq = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_uq.tsv',
                            sep='\t')
kremling_expression_v5_day_uq.set_index('Name', inplace=True)

kremling_expression_v5_night_uq = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_uq.tsv',
#kremling_expression_v5_night_uq = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_uq.tsv',
                            sep='\t')
kremling_expression_v5_night_uq.set_index('Name', inplace=True)

kremling_expression_v5_day_uq_filtered = kremling_expression_v5_day_uq[kremling_expression_v5_day_uq.index.isin(genes_tokeep_day)]
kremling_expression_v5_night_uq_filtered = kremling_expression_v5_night_uq[kremling_expression_v5_night_uq.index.isin(genes_tokeep_night)]

kremling_expression_v5_day_ctf = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_ctf.tsv',
#kremling_expression_v5_day_ctf = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_ctf.tsv',
                            sep='\t')
kremling_expression_v5_day_ctf.set_index('Name', inplace=True)

kremling_expression_v5_night_ctf = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_ctf.tsv',
#kremling_expression_v5_night_ctf = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_ctf.tsv',
                            sep='\t')
kremling_expression_v5_night_ctf.set_index('Name', inplace=True)

kremling_expression_v5_day_ctf_filtered = kremling_expression_v5_day_ctf[kremling_expression_v5_day_ctf.index.isin(genes_tokeep_day)]
kremling_expression_v5_night_ctf_filtered = kremling_expression_v5_night_ctf[kremling_expression_v5_night_ctf.index.isin(genes_tokeep_night)]

kremling_expression_v5_day_cuf = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_cuf.tsv',
#kremling_expression_v5_day_cuf = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_cuf.tsv',
                            sep='\t')
kremling_expression_v5_day_cuf.set_index('Name', inplace=True)

kremling_expression_v5_night_cuf = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_cuf.tsv',
#kremling_expression_v5_night_cuf = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_cuf.tsv',
                            sep='\t')
kremling_expression_v5_night_cuf.set_index('Name', inplace=True)

kremling_expression_v5_day_cuf_filtered = kremling_expression_v5_day_cuf[kremling_expression_v5_day_cuf.index.isin(genes_tokeep_day)]
kremling_expression_v5_night_cuf_filtered = kremling_expression_v5_night_cuf[kremling_expression_v5_night_cuf.index.isin(genes_tokeep_night)]

### Filtering OTUs

Since relative abundance will be used to filter out genes with low expression, it must be imported first:

In [5]:
import pandas as pd

otu_table_merged_day_relative_abund = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_day_night_otu_day_relative_abund.tsv',
#otu_table_merged_day_relative_abund = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_day_night_otu_day_relative_abund.tsv',
                            sep='\t')
otu_table_merged_day_relative_abund.set_index('OTU ID', inplace=True)

#otu_table_merged_night_relative_abund = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_day_night_otu_night_relative_abund.tsv',
otu_table_merged_night_relative_abund = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_day_night_otu_night_relative_abund.tsv',
                            sep='\t')
otu_table_merged_night_relative_abund.set_index('OTU ID', inplace=True)

In [6]:
otus_tokeep_day = otu_table_merged_day_relative_abund[(otu_table_merged_day_relative_abund > 0.001).sum(axis=1) >= (otu_table_merged_day_relative_abund.shape[1] * 0.1)].index
otus_tokeep_night = otu_table_merged_night_relative_abund[(otu_table_merged_night_relative_abund > 0.001).sum(axis=1) >= (otu_table_merged_night_relative_abund.shape[1] * 0.1)].index

OTU matrices with relative abundances are filtered (based on relative abundance) for correlations:

In [7]:
otu_table_merged_day_relative_abund_filtered = otu_table_merged_day_relative_abund[otu_table_merged_day_relative_abund.index.isin(otus_tokeep_day)]
otu_table_merged_night_relative_abund_filtered = otu_table_merged_night_relative_abund[otu_table_merged_night_relative_abund.index.isin(otus_tokeep_night)]

In [8]:
print(otu_table_merged_day_relative_abund_filtered.shape)
print(otu_table_merged_night_relative_abund_filtered.shape)

(3681, 176)
(3712, 228)


In [None]:
#otu_table_merged_day_relative_abund_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/otu_table_merged_day_relative_abund_filtered.tsv',
otu_table_merged_day_relative_abund_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/otu_table_merged_day_relative_abund_filtered.tsv',
                            sep='\t')
#otu_table_merged_night_relative_abund_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/otu_table_merged_night_relative_abund_filtered.tsv',
otu_table_merged_night_relative_abund_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/otu_table_merged_night_relative_abund_filtered.tsv',
                            sep='\t')

Importing the OTU CPM matrices and filter OTUs:

In [9]:
otu_table_merged_day_cpm = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_d_n_otu_day_cpm.tsv',
#otu_table_merged_day_cpm = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_d_n_otu_day_cpm.tsv',
                            sep='\t')
otu_table_merged_day_cpm.set_index('OTU ID', inplace=True)

otu_table_merged_night_cpm = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_d_n_otu_night_cpm.tsv',
#otu_table_merged_night_cpm = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_d_n_otu_night_cpm.tsv',
                            sep='\t')
otu_table_merged_night_cpm.set_index('OTU ID', inplace=True)

OTU matrices are filtered based on relative abundance:

In [10]:
otu_table_merged_day_cpm_filtered = otu_table_merged_day_cpm[otu_table_merged_day_cpm.index.isin(otus_tokeep_day)]
otu_table_merged_night_cpm_filtered = otu_table_merged_night_cpm[otu_table_merged_night_cpm.index.isin(otus_tokeep_night)]

In [11]:
print(otu_table_merged_day_cpm_filtered.shape)
print(otu_table_merged_night_cpm_filtered.shape)

(3681, 176)
(3712, 228)


Importing the OTU counts matrices and filter OTUs:

In [None]:
#otu_table_merged_counts = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_day_night_otu_counts.tsv',
otu_table_merged_counts = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_day_night_otu_counts.tsv',
                            sep='\t')
otu_table_merged_counts.set_index('OTU ID', inplace=True)

OTU matrices are filtered based on day and night samples:

In [None]:
otu_table_merged_counts_day = otu_table_merged_counts[otu_table_merged_day_cpm_filtered.columns]
otu_table_merged_counts_night = otu_table_merged_counts[otu_table_merged_night_cpm_filtered.columns]

OTU counts matrices are filtered based on relative abundance:

In [None]:
otu_table_merged_counts_day_filtered = otu_table_merged_counts_day[otu_table_merged_counts_day.index.isin(otus_tokeep_day)]
otu_table_merged_counts_night_filtered = otu_table_merged_counts_night[otu_table_merged_counts_night.index.isin(otus_tokeep_night)]

In [None]:
print(otu_table_merged_counts_day_filtered.shape)
print(otu_table_merged_counts_night_filtered.shape)

Exporting filtered count tables for day and night:

In [None]:
#otu_table_merged_counts_day_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/otu_table_merged_counts_day_filtered.tsv',
otu_table_merged_counts_day_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/otu_table_merged_counts_day_filtered.tsv',
                            sep='\t')
#otu_table_merged_counts_night_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/otu_table_merged_counts_night_filtered.tsv',
otu_table_merged_counts_night_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/otu_table_merged_counts_night_filtered.tsv',
                            sep='\t')

## Filtering low variance (expression or OTUs)


[Priya et al 2022](https://www.nature.com/articles/s41564-022-01121-z) used 25% quantile as cutoff for gene expression analysis.

Filtering the Gene CPM, TPM, RPKM, TMM, UQ, CTF and CUF matrices:

In [12]:
import numpy as np

# Calculate the coefficient of variation for each row
kremling_expression_v5_day_tpm_filtered_cv = kremling_expression_v5_day_tpm_filtered.apply(lambda row: np.std(row) / np.mean(row), axis=1)
kremling_expression_v5_night_tpm_filtered_cv = kremling_expression_v5_night_tpm_filtered.apply(lambda row: np.std(row) / np.mean(row), axis=1)
kremling_expression_v5_night_tpm_filtered_cv_filtered = kremling_expression_v5_night_tpm_filtered.loc[kremling_expression_v5_night_tpm_filtered_cv[kremling_expression_v5_night_tpm_filtered_cv > kremling_expression_v5_night_tpm_filtered_cv.quantile(q=0.25)].index]
kremling_expression_v5_day_tpm_filtered_cv_filtered = kremling_expression_v5_day_tpm_filtered.loc[kremling_expression_v5_day_tpm_filtered_cv[kremling_expression_v5_day_tpm_filtered_cv > kremling_expression_v5_day_tpm_filtered_cv.quantile(q=0.25)].index]

kremling_expression_v5_day_cpm_filtered_cv = kremling_expression_v5_day_cpm_filtered.apply(lambda row: np.std(row) / np.mean(row), axis=1)
kremling_expression_v5_night_cpm_filtered_cv = kremling_expression_v5_night_cpm_filtered.apply(lambda row: np.std(row) / np.mean(row), axis=1)
kremling_expression_v5_night_cpm_filtered_cv_filtered = kremling_expression_v5_night_cpm_filtered.loc[kremling_expression_v5_night_cpm_filtered_cv[kremling_expression_v5_night_cpm_filtered_cv > kremling_expression_v5_night_cpm_filtered_cv.quantile(q=0.25)].index]
kremling_expression_v5_day_cpm_filtered_cv_filtered = kremling_expression_v5_day_cpm_filtered.loc[kremling_expression_v5_day_cpm_filtered_cv[kremling_expression_v5_day_cpm_filtered_cv > kremling_expression_v5_day_cpm_filtered_cv.quantile(q=0.25)].index]

kremling_expression_v5_day_rpkm_filtered_cv = kremling_expression_v5_day_rpkm_filtered.apply(lambda row: np.std(row) / np.mean(row), axis=1)
kremling_expression_v5_night_rpkm_filtered_cv = kremling_expression_v5_night_rpkm_filtered.apply(lambda row: np.std(row) / np.mean(row), axis=1)
kremling_expression_v5_night_rpkm_filtered_cv_filtered = kremling_expression_v5_night_rpkm_filtered.loc[kremling_expression_v5_night_rpkm_filtered_cv[kremling_expression_v5_night_rpkm_filtered_cv > kremling_expression_v5_night_rpkm_filtered_cv.quantile(q=0.25)].index]
kremling_expression_v5_day_rpkm_filtered_cv_filtered = kremling_expression_v5_day_rpkm_filtered.loc[kremling_expression_v5_day_rpkm_filtered_cv[kremling_expression_v5_day_rpkm_filtered_cv > kremling_expression_v5_day_rpkm_filtered_cv.quantile(q=0.25)].index]

kremling_expression_v5_day_tmm_filtered_cv = kremling_expression_v5_day_tmm_filtered.apply(lambda row: np.std(row) / np.mean(row), axis=1)
kremling_expression_v5_night_tmm_filtered_cv = kremling_expression_v5_night_tmm_filtered.apply(lambda row: np.std(row) / np.mean(row), axis=1)
kremling_expression_v5_night_tmm_filtered_cv_filtered = kremling_expression_v5_night_tmm_filtered.loc[kremling_expression_v5_night_tmm_filtered_cv[kremling_expression_v5_night_tmm_filtered_cv > kremling_expression_v5_night_tmm_filtered_cv.quantile(q=0.25)].index]
kremling_expression_v5_day_tmm_filtered_cv_filtered = kremling_expression_v5_day_tmm_filtered.loc[kremling_expression_v5_day_tmm_filtered_cv[kremling_expression_v5_day_tmm_filtered_cv > kremling_expression_v5_day_tmm_filtered_cv.quantile(q=0.25)].index]

kremling_expression_v5_day_uq_filtered_cv = kremling_expression_v5_day_uq_filtered.apply(lambda row: np.std(row) / np.mean(row), axis=1)
kremling_expression_v5_night_uq_filtered_cv = kremling_expression_v5_night_uq_filtered.apply(lambda row: np.std(row) / np.mean(row), axis=1)
kremling_expression_v5_night_uq_filtered_cv_filtered = kremling_expression_v5_night_uq_filtered.loc[kremling_expression_v5_night_uq_filtered_cv[kremling_expression_v5_night_uq_filtered_cv > kremling_expression_v5_night_uq_filtered_cv.quantile(q=0.25)].index]
kremling_expression_v5_day_uq_filtered_cv_filtered = kremling_expression_v5_day_uq_filtered.loc[kremling_expression_v5_day_uq_filtered_cv[kremling_expression_v5_day_uq_filtered_cv > kremling_expression_v5_day_uq_filtered_cv.quantile(q=0.25)].index]

kremling_expression_v5_day_ctf_filtered_cv = kremling_expression_v5_day_ctf_filtered.apply(lambda row: np.std(row) / np.mean(row), axis=1)
kremling_expression_v5_night_ctf_filtered_cv = kremling_expression_v5_night_ctf_filtered.apply(lambda row: np.std(row) / np.mean(row), axis=1)
kremling_expression_v5_night_ctf_filtered_cv_filtered = kremling_expression_v5_night_ctf_filtered.loc[kremling_expression_v5_night_ctf_filtered_cv[kremling_expression_v5_night_ctf_filtered_cv > kremling_expression_v5_night_ctf_filtered_cv.quantile(q=0.25)].index]
kremling_expression_v5_day_ctf_filtered_cv_filtered = kremling_expression_v5_day_ctf_filtered.loc[kremling_expression_v5_day_ctf_filtered_cv[kremling_expression_v5_day_ctf_filtered_cv > kremling_expression_v5_day_ctf_filtered_cv.quantile(q=0.25)].index]

kremling_expression_v5_day_cuf_filtered_cv = kremling_expression_v5_day_cuf_filtered.apply(lambda row: np.std(row) / np.mean(row), axis=1)
kremling_expression_v5_night_cuf_filtered_cv = kremling_expression_v5_night_cuf_filtered.apply(lambda row: np.std(row) / np.mean(row), axis=1)
kremling_expression_v5_night_cuf_filtered_cv_filtered = kremling_expression_v5_night_cuf_filtered.loc[kremling_expression_v5_night_cuf_filtered_cv[kremling_expression_v5_night_cuf_filtered_cv > kremling_expression_v5_night_cuf_filtered_cv.quantile(q=0.25)].index]
kremling_expression_v5_day_cuf_filtered_cv_filtered = kremling_expression_v5_day_cuf_filtered.loc[kremling_expression_v5_day_cuf_filtered_cv[kremling_expression_v5_day_cuf_filtered_cv > kremling_expression_v5_day_cuf_filtered_cv.quantile(q=0.25)].index]

Filtering the OTU CPM matrices:

In [13]:
import numpy as np

# Calculate the coefficient of variation for each row
otu_table_merged_day_cpm_filtered_cv = otu_table_merged_day_cpm_filtered.apply(lambda row: np.std(row) / np.mean(row), axis=1)
otu_table_merged_night_cpm_filtered_cv = otu_table_merged_night_cpm_filtered.apply(lambda row: np.std(row) / np.mean(row), axis=1)
otu_table_merged_day_cpm_filtered_cv_filtered = otu_table_merged_day_cpm_filtered.loc[otu_table_merged_day_cpm_filtered_cv[otu_table_merged_day_cpm_filtered_cv > otu_table_merged_day_cpm_filtered_cv.quantile(q=0.25)].index]
otu_table_merged_night_cpm_filtered_cv_filtered = otu_table_merged_night_cpm_filtered.loc[otu_table_merged_night_cpm_filtered_cv[otu_table_merged_night_cpm_filtered_cv > otu_table_merged_night_cpm_filtered_cv.quantile(q=0.25)].index]

In [14]:
print(otu_table_merged_day_cpm_filtered_cv_filtered.shape)
print(otu_table_merged_night_cpm_filtered_cv_filtered.shape)

(2760, 176)
(2784, 228)


Filtering the OTU relative abundance matrices:

In [None]:
# Calculate the coefficient of variation for each row
otu_table_merged_day_relative_abund_filtered_cv = otu_table_merged_day_relative_abund_filtered.apply(lambda row: np.std(row) / np.mean(row), axis=1)
otu_table_merged_night_relative_abund_filtered_cv = otu_table_merged_night_relative_abund_filtered.apply(lambda row: np.std(row) / np.mean(row), axis=1)
otu_table_merged_day_relative_abund_filtered_cv_filtered = otu_table_merged_day_relative_abund_filtered.loc[otu_table_merged_day_relative_abund_filtered_cv[otu_table_merged_day_relative_abund_filtered_cv > otu_table_merged_day_relative_abund_filtered_cv.quantile(q=0.25)].index]
otu_table_merged_night_relative_abund_filtered_cv_filtered = otu_table_merged_night_relative_abund_filtered.loc[otu_table_merged_night_relative_abund_filtered_cv[otu_table_merged_night_relative_abund_filtered_cv > otu_table_merged_night_relative_abund_filtered_cv.quantile(q=0.25)].index]

In [None]:
print(otu_table_merged_day_relative_abund_filtered_cv_filtered.shape)
print(otu_table_merged_night_relative_abund_filtered_cv_filtered.shape)

## Exporting all matrices after filtering

Matrices with all OTU and gene expression data, after filtering out low expressed/abundant and low variance were exported to be transformed using R libraries. Exporting:

In [None]:
# Exporting within sample normalized data after filtering
#kremling_expression_v5_day_tpm_filtered_cv_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_tpm_filtered_cv_filtered.tsv',
kremling_expression_v5_day_tpm_filtered_cv_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_tpm_filtered_cv_filtered.tsv',
                            sep='\t')
#kremling_expression_v5_night_tpm_filtered_cv_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_tpm_filtered_cv_filtered.tsv',
kremling_expression_v5_night_tpm_filtered_cv_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_tpm_filtered_cv_filtered.tsv',
                            sep='\t')
#kremling_expression_v5_day_rpkm_filtered_cv_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_rpkm_filtered_cv_filtered.tsv',
kremling_expression_v5_day_rpkm_filtered_cv_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_rpkm_filtered_cv_filtered.tsv',
                            sep='\t')
#kremling_expression_v5_night_rpkm_filtered_cv_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_rpkm_filtered_cv_filtered.tsv',
kremling_expression_v5_night_rpkm_filtered_cv_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_rpkm_filtered_cv_filtered.tsv',
                            sep='\t')
#kremling_expression_v5_day_cpm_filtered_cv_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_cpm_filtered_cv_filtered.tsv',
kremling_expression_v5_day_cpm_filtered_cv_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_cpm_filtered_cv_filtered.tsv',
                            sep='\t')
#kremling_expression_v5_night_cpm_filtered_cv_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_cpm_filtered_cv_filtered.tsv',
kremling_expression_v5_night_cpm_filtered_cv_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_cpm_filtered_cv_filtered.tsv',
                            sep='\t')

# Exporting between sample normalized data after filtering
#kremling_expression_v5_day_tmm_filtered_cv_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_tmm_filtered_cv_filtered.tsv',
kremling_expression_v5_day_tmm_filtered_cv_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_tmm_filtered_cv_filtered.tsv',
                            sep='\t')
#kremling_expression_v5_night_tmm_filtered_cv_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_tmm_filtered_cv_filtered.tsv',
kremling_expression_v5_night_tmm_filtered_cv_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_tmm_filtered_cv_filtered.tsv',
                            sep='\t')
#kremling_expression_v5_day_uq_filtered_cv_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_uq_filtered_cv_filtered.tsv',
kremling_expression_v5_day_uq_filtered_cv_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_uq_filtered_cv_filtered.tsv',
                            sep='\t')
#kremling_expression_v5_night_uq_filtered_cv_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_uq_filtered_cv_filtered.tsv',
kremling_expression_v5_night_uq_filtered_cv_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_uq_filtered_cv_filtered.tsv',
                            sep='\t')
#kremling_expression_v5_day_ctf_filtered_cv_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_ctf_filtered_cv_filtered.tsv',
kremling_expression_v5_day_ctf_filtered_cv_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_ctf_filtered_cv_filtered.tsv',
                            sep='\t')
#kremling_expression_v5_night_ctf_filtered_cv_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_ctf_filtered_cv_filtered.tsv',
kremling_expression_v5_night_ctf_filtered_cv_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_ctf_filtered_cv_filtered.tsv',
                            sep='\t')
#kremling_expression_v5_day_cuf_filtered_cv_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_cuf_filtered_cv_filtered.tsv',
kremling_expression_v5_day_cuf_filtered_cv_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_cuf_filtered_cv_filtered.tsv',
                            sep='\t')
#kremling_expression_v5_night_cuf_filtered_cv_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_cuf_filtered_cv_filtered.tsv',
kremling_expression_v5_night_cuf_filtered_cv_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_cuf_filtered_cv_filtered.tsv',
                            sep='\t')

In [None]:
# Exporting OTU data after filtering
#otu_table_merged_day_cpm_filtered_cv_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/otu_table_merged_day_cpm_filtered_cv_filtered.tsv',
otu_table_merged_day_cpm_filtered_cv_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/otu_table_merged_day_cpm_filtered_cv_filtered.tsv',
                            sep='\t')
#otu_table_merged_night_cpm_filtered_cv_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/otu_table_merged_night_cpm_filtered_cv_filtered.tsv',
otu_table_merged_night_cpm_filtered_cv_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/otu_table_merged_night_cpm_filtered_cv_filtered.tsv',
                            sep='\t')
#otu_table_merged_day_relative_abund_filtered_cv_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/otu_table_merged_day_relative_abund_filtered_cv_filtered.tsv',
otu_table_merged_day_relative_abund_filtered_cv_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/otu_table_merged_day_relative_abund_filtered_cv_filtered.tsv',
                            sep='\t')
#otu_table_merged_night_relative_abund_filtered_cv_filtered.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/otu_table_merged_night_relative_abund_filtered_cv_filtered.tsv',
otu_table_merged_night_relative_abund_filtered_cv_filtered.to_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/otu_table_merged_night_relative_abund_filtered_cv_filtered.tsv',
                            sep='\t')

Exported OTU matrices (raw counts, CPM, relative abundance) after filtering low abundance and low variance data were transformed using different methods (VST, rlog, and asinh).

Importing matrices.

### asinh

In [None]:
import pandas as pd

#otu_table_merged_day_cpm_asinh = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_d_n_otu_day_cpm_asinh.tsv',
otu_table_merged_day_cpm_asinh = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_d_n_otu_day_cpm_asinh.tsv',
                            sep='\t')
otu_table_merged_day_cpm_asinh.set_index('OTU ID', inplace=True)

#otu_table_merged_night_cpm_asinh = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_d_n_otu_night_cpm_asinh.tsv',
otu_table_merged_night_cpm_asinh = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_d_n_otu_night_cpm_asinh.tsv',
                            sep='\t')
otu_table_merged_night_cpm_asinh.set_index('OTU ID', inplace=True)

### VST

### rlog

Exported matrices (RPKM, TPM, CPM, TMM, UQ, CTF and CUF) after removing low expressed genes and low variance, were transformed using different metods (VST, rlog, and asinh).

Importing the matrices after filtering and transformations.

### asinh

In [None]:
#RPKM + asinh
kremling_expression_v5_day_rpkm_asinh = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_rpkm_asinh.tsv',
                            sep='\t')
kremling_expression_v5_day_rpkm_asinh.set_index('Name', inplace=True)

kremling_expression_v5_night_rpkm_asinh = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_rpkm_asinh.tsv',
                            sep='\t')
kremling_expression_v5_night_rpkm_asinh.set_index('Name', inplace=True)

#TPM + asinh
kremling_expression_v5_day_tpm_asinh = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_tpm_asinh.tsv',
                            sep='\t')
kremling_expression_v5_day_tpm_asinh.set_index('Name', inplace=True)

kremling_expression_v5_night_tpm_asinh = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_tpm_asinh.tsv',
                            sep='\t')
kremling_expression_v5_night_tpm_asinh.set_index('Name', inplace=True)

#CPM + asinh
kremling_expression_v5_day_cpm_asinh = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_cpm_asinh.tsv',
                            sep='\t')
kremling_expression_v5_day_cpm_asinh.set_index('Name', inplace=True)

kremling_expression_v5_night_cpm_asinh = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_cpm_asinh.tsv',
                            sep='\t')
kremling_expression_v5_night_cpm_asinh.set_index('Name', inplace=True)

#TMM + asinh
kremling_expression_v5_day_tmm_asinh = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_tmm_asinh.tsv',
                            sep='\t')
kremling_expression_v5_day_tmm_asinh.set_index('Name', inplace=True)

kremling_expression_v5_night_tmm_asinh = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_tmm_asinh.tsv',
                            sep='\t')
kremling_expression_v5_night_tmm_asinh.set_index('Name', inplace=True)

#UQ + asinh
kremling_expression_v5_day_uq_asinh = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_uq_asinh.tsv',
                            sep='\t')
kremling_expression_v5_day_uq_asinh.set_index('Name', inplace=True)

kremling_expression_v5_night_uq_asinh = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_uq_asinh.tsv',
                            sep='\t')
kremling_expression_v5_night_uq_asinh.set_index('Name', inplace=True)

#CTF + asinh
kremling_expression_v5_day_ctf_asinh = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_ctf_asinh.tsv',
                            sep='\t')
kremling_expression_v5_day_ctf_asinh.set_index('Name', inplace=True)

kremling_expression_v5_night_ctf_asinh = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_ctf_asinh.tsv',
                            sep='\t')
kremling_expression_v5_night_ctf_asinh.set_index('Name', inplace=True)

#CUF + asinh
kremling_expression_v5_day_cuf_asinh = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_cuf_asinh.tsv',
                            sep='\t')
kremling_expression_v5_day_cuf_asinh.set_index('Name', inplace=True)

kremling_expression_v5_night_cuf_asinh = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_cuf_asinh.tsv',
                            sep='\t')
kremling_expression_v5_night_cuf_asinh.set_index('Name', inplace=True)

### VST

### rlog

## Correlations - gene (no asinh) versus OTU (CPM)

In [15]:
from corals.threads import set_threads_for_external_libraries
set_threads_for_external_libraries(n_threads=1)
import numpy as np
from corals.correlation.full.default import cor_full



### OTU (CPM) - Gene (CPM)

In [16]:
concat_df_night = pd.concat([kremling_expression_v5_night_cpm_filtered_cv_filtered, otu_table_merged_night_cpm_filtered_cv_filtered], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_cpm_filtered_cv_filtered, otu_table_merged_day_cpm_filtered_cv_filtered], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

# Calculating pvalues and corrected pvalues
from corals.correlation.utils import derive_pvalues, multiple_test_correction

cor_threshold = 0.6
cor_pvalue_threshold = 0.05

n_samples_day = concatenated_transposed_day.shape[0]
n_features_day = concatenated_transposed_day.shape[1]
n_samples_night = concatenated_transposed_night.shape[0]
n_features_night = concatenated_transposed_night.shape[1]

day_pvalues = derive_pvalues(cor_values_day, n_samples_day)
day_pvalues_corrected = multiple_test_correction(day_pvalues, n_features_day, method="bonferroni")
night_pvalues = derive_pvalues(cor_values_night, n_samples_night)
night_pvalues_corrected = multiple_test_correction(night_pvalues, n_features_night, method="bonferroni")

true_positions_day_cor = np.where(cor_values_day > cor_threshold)
true_positions_night_cor = np.where(cor_values_night > cor_threshold)

true_positions_day_pvalue = np.where(day_pvalues_corrected < cor_pvalue_threshold)
true_positions_night_pvalue = np.where(night_pvalues_corrected < cor_pvalue_threshold)

# Filtering correlations based on coef. and corrected pvalues
cor_tuples_day = []
pval_tuples_day = []

for i in range(np.size(true_positions_day_cor, 1)):
    cor_tuples_day.append((true_positions_day_cor[0][i],
    true_positions_day_cor[1][i]))

for i in range(np.size(true_positions_day_pvalue, 1)):
    pval_tuples_day.append((true_positions_day_pvalue[0][i],
    true_positions_day_pvalue[1][i]))

cor_tuples_night = []
pval_tuples_night = []

for i in range(np.size(true_positions_night_cor, 1)):
    cor_tuples_night.append((true_positions_night_cor[0][i],
    true_positions_night_cor[1][i]))

for i in range(np.size(true_positions_night_pvalue, 1)):
    pval_tuples_night.append((true_positions_night_pvalue[0][i],
    true_positions_night_pvalue[1][i]))

cor_tuples_day_set = set(cor_tuples_day)
pval_tuples_day_set = set(pval_tuples_day)
cor_tuples_night_set = set(cor_tuples_night)
pval_tuples_night_set = set(pval_tuples_night)

cor_pval_intersection_day = cor_tuples_day_set.intersection(pval_tuples_day_set)
cor_pval_intersection_night = cor_tuples_night_set.intersection(pval_tuples_night_set)

cor_pval_intersection_day_tuple = ([t[0] for t in list(cor_pval_intersection_day)],
[t[1] for t in list(cor_pval_intersection_day)])
cor_pval_intersection_night_tuple = ([t[0] for t in list(cor_pval_intersection_night)],
[t[1] for t in list(cor_pval_intersection_night)])

shape_row_day = kremling_expression_v5_day_cpm_filtered_cv_filtered.shape[0]
shape_row_night = kremling_expression_v5_night_cpm_filtered_cv_filtered.shape[0]

pairs_day_genecpm_otucpm = []
pairs_night_genecpm_otucpm = []

for i in range(len(cor_pval_intersection_day_tuple[0])):
    if (cor_pval_intersection_day_tuple[1][i] > (shape_row_day - 1)) and (cor_pval_intersection_day_tuple[0][i] < shape_row_day):
        pairs_day_genecpm_otucpm.append((str(cor_values_day.columns[cor_pval_intersection_day_tuple[1][i]]),
              str(cor_values_day.index[cor_pval_intersection_day_tuple[0][i]]),
              cor_values_day.iloc[cor_pval_intersection_day_tuple[0][i], cor_pval_intersection_day_tuple[1][i]],
              day_pvalues_corrected[cor_pval_intersection_day_tuple[0][i], cor_pval_intersection_day_tuple[1][i]]))

for i in range(len(cor_pval_intersection_night_tuple[0])):
    if (cor_pval_intersection_night_tuple[1][i] > (shape_row_night - 1)) and (cor_pval_intersection_night_tuple[0][i] < shape_row_night):
        pairs_night_genecpm_otucpm.append((str(cor_values_night.columns[cor_pval_intersection_night_tuple[1][i]]),
              str(cor_values_night.index[cor_pval_intersection_night_tuple[0][i]]),
              cor_values_night.iloc[cor_pval_intersection_night_tuple[0][i], cor_pval_intersection_night_tuple[1][i]],
              night_pvalues_corrected[cor_pval_intersection_night_tuple[0][i], cor_pval_intersection_night_tuple[1][i]]))

  ts = rf * rf * (df / (1 - rf * rf))


In [73]:
print(len(pairs_day_genecpm_otucpm))
print(len(pairs_night_genecpm_otucpm))

599
110


### OTU (CPM) - Gene (TPM)

In [74]:
concat_df_night = pd.concat([kremling_expression_v5_night_tpm_filtered_cv_filtered, otu_table_merged_night_cpm_filtered_cv_filtered], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_tpm_filtered_cv_filtered, otu_table_merged_day_cpm_filtered_cv_filtered], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

# Calculating pvalues and corrected pvalues
from corals.correlation.utils import derive_pvalues, multiple_test_correction

cor_threshold = 0.6
cor_pvalue_threshold = 0.05

n_samples_day = concatenated_transposed_day.shape[0]
n_features_day = concatenated_transposed_day.shape[1]
n_samples_night = concatenated_transposed_night.shape[0]
n_features_night = concatenated_transposed_night.shape[1]

day_pvalues = derive_pvalues(cor_values_day, n_samples_day)
day_pvalues_corrected = multiple_test_correction(day_pvalues, n_features_day, method="bonferroni")
night_pvalues = derive_pvalues(cor_values_night, n_samples_night)
night_pvalues_corrected = multiple_test_correction(night_pvalues, n_features_night, method="bonferroni")

true_positions_day_cor = np.where(cor_values_day > cor_threshold)
true_positions_night_cor = np.where(cor_values_night > cor_threshold)

true_positions_day_pvalue = np.where(day_pvalues_corrected < cor_pvalue_threshold)
true_positions_night_pvalue = np.where(night_pvalues_corrected < cor_pvalue_threshold)

# Filtering correlations based on coef. and corrected pvalues
cor_tuples_day = []
pval_tuples_day = []

for i in range(np.size(true_positions_day_cor, 1)):
    cor_tuples_day.append((true_positions_day_cor[0][i],
    true_positions_day_cor[1][i]))

for i in range(np.size(true_positions_day_pvalue, 1)):
    pval_tuples_day.append((true_positions_day_pvalue[0][i],
    true_positions_day_pvalue[1][i]))

cor_tuples_night = []
pval_tuples_night = []

for i in range(np.size(true_positions_night_cor, 1)):
    cor_tuples_night.append((true_positions_night_cor[0][i],
    true_positions_night_cor[1][i]))

for i in range(np.size(true_positions_night_pvalue, 1)):
    pval_tuples_night.append((true_positions_night_pvalue[0][i],
    true_positions_night_pvalue[1][i]))

cor_tuples_day_set = set(cor_tuples_day)
pval_tuples_day_set = set(pval_tuples_day)
cor_tuples_night_set = set(cor_tuples_night)
pval_tuples_night_set = set(pval_tuples_night)

cor_pval_intersection_day = cor_tuples_day_set.intersection(pval_tuples_day_set)
cor_pval_intersection_night = cor_tuples_night_set.intersection(pval_tuples_night_set)

cor_pval_intersection_day_tuple = ([t[0] for t in list(cor_pval_intersection_day)],
[t[1] for t in list(cor_pval_intersection_day)])
cor_pval_intersection_night_tuple = ([t[0] for t in list(cor_pval_intersection_night)],
[t[1] for t in list(cor_pval_intersection_night)])

shape_row_day = kremling_expression_v5_day_cpm_filtered_cv_filtered.shape[0]
shape_row_night = kremling_expression_v5_night_cpm_filtered_cv_filtered.shape[0]

pairs_day_genetpm_otucpm = []
pairs_night_genetpm_otucpm = []

for i in range(len(cor_pval_intersection_day_tuple[0])):
    if (cor_pval_intersection_day_tuple[1][i] > (shape_row_day - 1)) and (cor_pval_intersection_day_tuple[0][i] < shape_row_day):
        pairs_day_genetpm_otucpm.append((str(cor_values_day.columns[cor_pval_intersection_day_tuple[1][i]]),
              str(cor_values_day.index[cor_pval_intersection_day_tuple[0][i]]),
              cor_values_day.iloc[cor_pval_intersection_day_tuple[0][i], cor_pval_intersection_day_tuple[1][i]],
              day_pvalues_corrected[cor_pval_intersection_day_tuple[0][i], cor_pval_intersection_day_tuple[1][i]]))

for i in range(len(cor_pval_intersection_night_tuple[0])):
    if (cor_pval_intersection_night_tuple[1][i] > (shape_row_night - 1)) and (cor_pval_intersection_night_tuple[0][i] < shape_row_night):
        pairs_night_genetpm_otucpm.append((str(cor_values_night.columns[cor_pval_intersection_night_tuple[1][i]]),
              str(cor_values_night.index[cor_pval_intersection_night_tuple[0][i]]),
              cor_values_night.iloc[cor_pval_intersection_night_tuple[0][i], cor_pval_intersection_night_tuple[1][i]],
              night_pvalues_corrected[cor_pval_intersection_night_tuple[0][i], cor_pval_intersection_night_tuple[1][i]]))


  ts = rf * rf * (df / (1 - rf * rf))


In [75]:
print(len(pairs_day_genetpm_otucpm))
print(len(pairs_night_genetpm_otucpm))

552
112


### OTU (CPM) - Gene (TMM)

In [76]:
concat_df_night = pd.concat([kremling_expression_v5_night_tmm_filtered_cv_filtered, otu_table_merged_night_cpm_filtered_cv_filtered], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_tmm_filtered_cv_filtered, otu_table_merged_day_cpm_filtered_cv_filtered], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

# Calculating pvalues and corrected pvalues
from corals.correlation.utils import derive_pvalues, multiple_test_correction

cor_threshold = 0.6
cor_pvalue_threshold = 0.05

n_samples_day = concatenated_transposed_day.shape[0]
n_features_day = concatenated_transposed_day.shape[1]
n_samples_night = concatenated_transposed_night.shape[0]
n_features_night = concatenated_transposed_night.shape[1]

day_pvalues = derive_pvalues(cor_values_day, n_samples_day)
day_pvalues_corrected = multiple_test_correction(day_pvalues, n_features_day, method="bonferroni")
night_pvalues = derive_pvalues(cor_values_night, n_samples_night)
night_pvalues_corrected = multiple_test_correction(night_pvalues, n_features_night, method="bonferroni")

true_positions_day_cor = np.where(cor_values_day > cor_threshold)
true_positions_night_cor = np.where(cor_values_night > cor_threshold)

true_positions_day_pvalue = np.where(day_pvalues_corrected < cor_pvalue_threshold)
true_positions_night_pvalue = np.where(night_pvalues_corrected < cor_pvalue_threshold)

# Filtering correlations based on coef. and corrected pvalues
cor_tuples_day = []
pval_tuples_day = []

for i in range(np.size(true_positions_day_cor, 1)):
    cor_tuples_day.append((true_positions_day_cor[0][i],
    true_positions_day_cor[1][i]))

for i in range(np.size(true_positions_day_pvalue, 1)):
    pval_tuples_day.append((true_positions_day_pvalue[0][i],
    true_positions_day_pvalue[1][i]))

cor_tuples_night = []
pval_tuples_night = []

for i in range(np.size(true_positions_night_cor, 1)):
    cor_tuples_night.append((true_positions_night_cor[0][i],
    true_positions_night_cor[1][i]))

for i in range(np.size(true_positions_night_pvalue, 1)):
    pval_tuples_night.append((true_positions_night_pvalue[0][i],
    true_positions_night_pvalue[1][i]))

cor_tuples_day_set = set(cor_tuples_day)
pval_tuples_day_set = set(pval_tuples_day)
cor_tuples_night_set = set(cor_tuples_night)
pval_tuples_night_set = set(pval_tuples_night)

cor_pval_intersection_day = cor_tuples_day_set.intersection(pval_tuples_day_set)
cor_pval_intersection_night = cor_tuples_night_set.intersection(pval_tuples_night_set)

cor_pval_intersection_day_tuple = ([t[0] for t in list(cor_pval_intersection_day)],
[t[1] for t in list(cor_pval_intersection_day)])
cor_pval_intersection_night_tuple = ([t[0] for t in list(cor_pval_intersection_night)],
[t[1] for t in list(cor_pval_intersection_night)])

shape_row_day = kremling_expression_v5_day_cpm_filtered_cv_filtered.shape[0]
shape_row_night = kremling_expression_v5_night_cpm_filtered_cv_filtered.shape[0]

pairs_day_genetmm_otucpm = []
pairs_night_genetmm_otucpm = []

for i in range(len(cor_pval_intersection_day_tuple[0])):
    if (cor_pval_intersection_day_tuple[1][i] > (shape_row_day - 1)) and (cor_pval_intersection_day_tuple[0][i] < shape_row_day):
        pairs_day_genetmm_otucpm.append((str(cor_values_day.columns[cor_pval_intersection_day_tuple[1][i]]),
              str(cor_values_day.index[cor_pval_intersection_day_tuple[0][i]]),
              cor_values_day.iloc[cor_pval_intersection_day_tuple[0][i], cor_pval_intersection_day_tuple[1][i]],
              day_pvalues_corrected[cor_pval_intersection_day_tuple[0][i], cor_pval_intersection_day_tuple[1][i]]))

for i in range(len(cor_pval_intersection_night_tuple[0])):
    if (cor_pval_intersection_night_tuple[1][i] > (shape_row_night - 1)) and (cor_pval_intersection_night_tuple[0][i] < shape_row_night):
        pairs_night_genetmm_otucpm.append((str(cor_values_night.columns[cor_pval_intersection_night_tuple[1][i]]),
              str(cor_values_night.index[cor_pval_intersection_night_tuple[0][i]]),
              cor_values_night.iloc[cor_pval_intersection_night_tuple[0][i], cor_pval_intersection_night_tuple[1][i]],
              night_pvalues_corrected[cor_pval_intersection_night_tuple[0][i], cor_pval_intersection_night_tuple[1][i]]))

  ts = rf * rf * (df / (1 - rf * rf))


In [77]:
print(len(pairs_day_genetmm_otucpm))
print(len(pairs_night_genetmm_otucpm))

620
92


### OTU (CPM) - Gene (RPKM)

In [78]:
concat_df_night = pd.concat([kremling_expression_v5_night_rpkm_filtered_cv_filtered, otu_table_merged_night_cpm_filtered_cv_filtered], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_rpkm_filtered_cv_filtered, otu_table_merged_day_cpm_filtered_cv_filtered], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

# Calculating pvalues and corrected pvalues
from corals.correlation.utils import derive_pvalues, multiple_test_correction

cor_threshold = 0.6
cor_pvalue_threshold = 0.05

n_samples_day = concatenated_transposed_day.shape[0]
n_features_day = concatenated_transposed_day.shape[1]
n_samples_night = concatenated_transposed_night.shape[0]
n_features_night = concatenated_transposed_night.shape[1]

day_pvalues = derive_pvalues(cor_values_day, n_samples_day)
day_pvalues_corrected = multiple_test_correction(day_pvalues, n_features_day, method="bonferroni")
night_pvalues = derive_pvalues(cor_values_night, n_samples_night)
night_pvalues_corrected = multiple_test_correction(night_pvalues, n_features_night, method="bonferroni")

true_positions_day_cor = np.where(cor_values_day > cor_threshold)
true_positions_night_cor = np.where(cor_values_night > cor_threshold)

true_positions_day_pvalue = np.where(day_pvalues_corrected < cor_pvalue_threshold)
true_positions_night_pvalue = np.where(night_pvalues_corrected < cor_pvalue_threshold)

# Filtering correlations based on coef. and corrected pvalues
cor_tuples_day = []
pval_tuples_day = []

for i in range(np.size(true_positions_day_cor, 1)):
    cor_tuples_day.append((true_positions_day_cor[0][i],
    true_positions_day_cor[1][i]))

for i in range(np.size(true_positions_day_pvalue, 1)):
    pval_tuples_day.append((true_positions_day_pvalue[0][i],
    true_positions_day_pvalue[1][i]))

cor_tuples_night = []
pval_tuples_night = []

for i in range(np.size(true_positions_night_cor, 1)):
    cor_tuples_night.append((true_positions_night_cor[0][i],
    true_positions_night_cor[1][i]))

for i in range(np.size(true_positions_night_pvalue, 1)):
    pval_tuples_night.append((true_positions_night_pvalue[0][i],
    true_positions_night_pvalue[1][i]))

cor_tuples_day_set = set(cor_tuples_day)
pval_tuples_day_set = set(pval_tuples_day)
cor_tuples_night_set = set(cor_tuples_night)
pval_tuples_night_set = set(pval_tuples_night)

cor_pval_intersection_day = cor_tuples_day_set.intersection(pval_tuples_day_set)
cor_pval_intersection_night = cor_tuples_night_set.intersection(pval_tuples_night_set)

cor_pval_intersection_day_tuple = ([t[0] for t in list(cor_pval_intersection_day)],
[t[1] for t in list(cor_pval_intersection_day)])
cor_pval_intersection_night_tuple = ([t[0] for t in list(cor_pval_intersection_night)],
[t[1] for t in list(cor_pval_intersection_night)])

shape_row_day = kremling_expression_v5_day_cpm_filtered_cv_filtered.shape[0]
shape_row_night = kremling_expression_v5_night_cpm_filtered_cv_filtered.shape[0]

pairs_day_generpkm_otucpm = []
pairs_night_generpkm_otucpm = []

for i in range(len(cor_pval_intersection_day_tuple[0])):
    if (cor_pval_intersection_day_tuple[1][i] > (shape_row_day - 1)) and (cor_pval_intersection_day_tuple[0][i] < shape_row_day):
        pairs_day_generpkm_otucpm.append((str(cor_values_day.columns[cor_pval_intersection_day_tuple[1][i]]),
              str(cor_values_day.index[cor_pval_intersection_day_tuple[0][i]]),
              cor_values_day.iloc[cor_pval_intersection_day_tuple[0][i], cor_pval_intersection_day_tuple[1][i]],
              day_pvalues_corrected[cor_pval_intersection_day_tuple[0][i], cor_pval_intersection_day_tuple[1][i]]))

for i in range(len(cor_pval_intersection_night_tuple[0])):
    if (cor_pval_intersection_night_tuple[1][i] > (shape_row_night - 1)) and (cor_pval_intersection_night_tuple[0][i] < shape_row_night):
        pairs_night_generpkm_otucpm.append((str(cor_values_night.columns[cor_pval_intersection_night_tuple[1][i]]),
              str(cor_values_night.index[cor_pval_intersection_night_tuple[0][i]]),
              cor_values_night.iloc[cor_pval_intersection_night_tuple[0][i], cor_pval_intersection_night_tuple[1][i]],
              night_pvalues_corrected[cor_pval_intersection_night_tuple[0][i], cor_pval_intersection_night_tuple[1][i]]))

  ts = rf * rf * (df / (1 - rf * rf))


In [85]:
print(len(pairs_day_generpkm_otucpm))
print(len(pairs_night_generpkm_otucpm))

599
110


### OTU (CPM) - Gene (UQ)

In [79]:
concat_df_night = pd.concat([kremling_expression_v5_night_uq_filtered_cv_filtered, otu_table_merged_night_cpm_filtered_cv_filtered], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_uq_filtered_cv_filtered, otu_table_merged_day_cpm_filtered_cv_filtered], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

# Calculating pvalues and corrected pvalues
from corals.correlation.utils import derive_pvalues, multiple_test_correction

cor_threshold = 0.6
cor_pvalue_threshold = 0.05

n_samples_day = concatenated_transposed_day.shape[0]
n_features_day = concatenated_transposed_day.shape[1]
n_samples_night = concatenated_transposed_night.shape[0]
n_features_night = concatenated_transposed_night.shape[1]

day_pvalues = derive_pvalues(cor_values_day, n_samples_day)
day_pvalues_corrected = multiple_test_correction(day_pvalues, n_features_day, method="bonferroni")
night_pvalues = derive_pvalues(cor_values_night, n_samples_night)
night_pvalues_corrected = multiple_test_correction(night_pvalues, n_features_night, method="bonferroni")

true_positions_day_cor = np.where(cor_values_day > cor_threshold)
true_positions_night_cor = np.where(cor_values_night > cor_threshold)

true_positions_day_pvalue = np.where(day_pvalues_corrected < cor_pvalue_threshold)
true_positions_night_pvalue = np.where(night_pvalues_corrected < cor_pvalue_threshold)

# Filtering correlations based on coef. and corrected pvalues
cor_tuples_day = []
pval_tuples_day = []

for i in range(np.size(true_positions_day_cor, 1)):
    cor_tuples_day.append((true_positions_day_cor[0][i],
    true_positions_day_cor[1][i]))

for i in range(np.size(true_positions_day_pvalue, 1)):
    pval_tuples_day.append((true_positions_day_pvalue[0][i],
    true_positions_day_pvalue[1][i]))

cor_tuples_night = []
pval_tuples_night = []

for i in range(np.size(true_positions_night_cor, 1)):
    cor_tuples_night.append((true_positions_night_cor[0][i],
    true_positions_night_cor[1][i]))

for i in range(np.size(true_positions_night_pvalue, 1)):
    pval_tuples_night.append((true_positions_night_pvalue[0][i],
    true_positions_night_pvalue[1][i]))

cor_tuples_day_set = set(cor_tuples_day)
pval_tuples_day_set = set(pval_tuples_day)
cor_tuples_night_set = set(cor_tuples_night)
pval_tuples_night_set = set(pval_tuples_night)

cor_pval_intersection_day = cor_tuples_day_set.intersection(pval_tuples_day_set)
cor_pval_intersection_night = cor_tuples_night_set.intersection(pval_tuples_night_set)

cor_pval_intersection_day_tuple = ([t[0] for t in list(cor_pval_intersection_day)],
[t[1] for t in list(cor_pval_intersection_day)])
cor_pval_intersection_night_tuple = ([t[0] for t in list(cor_pval_intersection_night)],
[t[1] for t in list(cor_pval_intersection_night)])

shape_row_day = kremling_expression_v5_day_cpm_filtered_cv_filtered.shape[0]
shape_row_night = kremling_expression_v5_night_cpm_filtered_cv_filtered.shape[0]

pairs_day_geneuq_otucpm = []
pairs_night_geneuq_otucpm = []

for i in range(len(cor_pval_intersection_day_tuple[0])):
    if (cor_pval_intersection_day_tuple[1][i] > (shape_row_day - 1)) and (cor_pval_intersection_day_tuple[0][i] < shape_row_day):
        pairs_day_geneuq_otucpm.append((str(cor_values_day.columns[cor_pval_intersection_day_tuple[1][i]]),
              str(cor_values_day.index[cor_pval_intersection_day_tuple[0][i]]),
              cor_values_day.iloc[cor_pval_intersection_day_tuple[0][i], cor_pval_intersection_day_tuple[1][i]],
              day_pvalues_corrected[cor_pval_intersection_day_tuple[0][i], cor_pval_intersection_day_tuple[1][i]]))

for i in range(len(cor_pval_intersection_night_tuple[0])):
    if (cor_pval_intersection_night_tuple[1][i] > (shape_row_night - 1)) and (cor_pval_intersection_night_tuple[0][i] < shape_row_night):
        pairs_night_geneuq_otucpm.append((str(cor_values_night.columns[cor_pval_intersection_night_tuple[1][i]]),
              str(cor_values_night.index[cor_pval_intersection_night_tuple[0][i]]),
              cor_values_night.iloc[cor_pval_intersection_night_tuple[0][i], cor_pval_intersection_night_tuple[1][i]],
              night_pvalues_corrected[cor_pval_intersection_night_tuple[0][i], cor_pval_intersection_night_tuple[1][i]]))

  ts = rf * rf * (df / (1 - rf * rf))


In [80]:
print(len(pairs_day_geneuq_otucpm))
print(len(pairs_night_geneuq_otucpm))

1931
95


### OTU (CPM) - Gene (CTF)

In [81]:
concat_df_night = pd.concat([kremling_expression_v5_night_ctf_filtered_cv_filtered, otu_table_merged_night_cpm_filtered_cv_filtered], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_ctf_filtered_cv_filtered, otu_table_merged_day_cpm_filtered_cv_filtered], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

# Calculating pvalues and corrected pvalues
from corals.correlation.utils import derive_pvalues, multiple_test_correction

cor_threshold = 0.6
cor_pvalue_threshold = 0.05

n_samples_day = concatenated_transposed_day.shape[0]
n_features_day = concatenated_transposed_day.shape[1]
n_samples_night = concatenated_transposed_night.shape[0]
n_features_night = concatenated_transposed_night.shape[1]

day_pvalues = derive_pvalues(cor_values_day, n_samples_day)
day_pvalues_corrected = multiple_test_correction(day_pvalues, n_features_day, method="bonferroni")
night_pvalues = derive_pvalues(cor_values_night, n_samples_night)
night_pvalues_corrected = multiple_test_correction(night_pvalues, n_features_night, method="bonferroni")

true_positions_day_cor = np.where(cor_values_day > cor_threshold)
true_positions_night_cor = np.where(cor_values_night > cor_threshold)

true_positions_day_pvalue = np.where(day_pvalues_corrected < cor_pvalue_threshold)
true_positions_night_pvalue = np.where(night_pvalues_corrected < cor_pvalue_threshold)

# Filtering correlations based on coef. and corrected pvalues
cor_tuples_day = []
pval_tuples_day = []

for i in range(np.size(true_positions_day_cor, 1)):
    cor_tuples_day.append((true_positions_day_cor[0][i],
    true_positions_day_cor[1][i]))

for i in range(np.size(true_positions_day_pvalue, 1)):
    pval_tuples_day.append((true_positions_day_pvalue[0][i],
    true_positions_day_pvalue[1][i]))

cor_tuples_night = []
pval_tuples_night = []

for i in range(np.size(true_positions_night_cor, 1)):
    cor_tuples_night.append((true_positions_night_cor[0][i],
    true_positions_night_cor[1][i]))

for i in range(np.size(true_positions_night_pvalue, 1)):
    pval_tuples_night.append((true_positions_night_pvalue[0][i],
    true_positions_night_pvalue[1][i]))

cor_tuples_day_set = set(cor_tuples_day)
pval_tuples_day_set = set(pval_tuples_day)
cor_tuples_night_set = set(cor_tuples_night)
pval_tuples_night_set = set(pval_tuples_night)

cor_pval_intersection_day = cor_tuples_day_set.intersection(pval_tuples_day_set)
cor_pval_intersection_night = cor_tuples_night_set.intersection(pval_tuples_night_set)

cor_pval_intersection_day_tuple = ([t[0] for t in list(cor_pval_intersection_day)],
[t[1] for t in list(cor_pval_intersection_day)])
cor_pval_intersection_night_tuple = ([t[0] for t in list(cor_pval_intersection_night)],
[t[1] for t in list(cor_pval_intersection_night)])

shape_row_day = kremling_expression_v5_day_cpm_filtered_cv_filtered.shape[0]
shape_row_night = kremling_expression_v5_night_cpm_filtered_cv_filtered.shape[0]

pairs_day_genectf_otucpm = []
pairs_night_genectf_otucpm = []

for i in range(len(cor_pval_intersection_day_tuple[0])):
    if (cor_pval_intersection_day_tuple[1][i] > (shape_row_day - 1)) and (cor_pval_intersection_day_tuple[0][i] < shape_row_day):
        pairs_day_genectf_otucpm.append((str(cor_values_day.columns[cor_pval_intersection_day_tuple[1][i]]),
              str(cor_values_day.index[cor_pval_intersection_day_tuple[0][i]]),
              cor_values_day.iloc[cor_pval_intersection_day_tuple[0][i], cor_pval_intersection_day_tuple[1][i]],
              day_pvalues_corrected[cor_pval_intersection_day_tuple[0][i], cor_pval_intersection_day_tuple[1][i]]))

for i in range(len(cor_pval_intersection_night_tuple[0])):
    if (cor_pval_intersection_night_tuple[1][i] > (shape_row_night - 1)) and (cor_pval_intersection_night_tuple[0][i] < shape_row_night):
        pairs_night_genectf_otucpm.append((str(cor_values_night.columns[cor_pval_intersection_night_tuple[1][i]]),
              str(cor_values_night.index[cor_pval_intersection_night_tuple[0][i]]),
              cor_values_night.iloc[cor_pval_intersection_night_tuple[0][i], cor_pval_intersection_night_tuple[1][i]],
              night_pvalues_corrected[cor_pval_intersection_night_tuple[0][i], cor_pval_intersection_night_tuple[1][i]]))

  ts = rf * rf * (df / (1 - rf * rf))


In [82]:
print(len(pairs_day_genectf_otucpm))
print(len(pairs_night_genectf_otucpm))

1045
170


### OTU (CPM) - Gene (CUF)

In [83]:
concat_df_night = pd.concat([kremling_expression_v5_night_cuf_filtered_cv_filtered, otu_table_merged_night_cpm_filtered_cv_filtered], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_cuf_filtered_cv_filtered, otu_table_merged_day_cpm_filtered_cv_filtered], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

# Calculating pvalues and corrected pvalues
from corals.correlation.utils import derive_pvalues, multiple_test_correction

cor_threshold = 0.6
cor_pvalue_threshold = 0.05

n_samples_day = concatenated_transposed_day.shape[0]
n_features_day = concatenated_transposed_day.shape[1]
n_samples_night = concatenated_transposed_night.shape[0]
n_features_night = concatenated_transposed_night.shape[1]

day_pvalues = derive_pvalues(cor_values_day, n_samples_day)
day_pvalues_corrected = multiple_test_correction(day_pvalues, n_features_day, method="bonferroni")
night_pvalues = derive_pvalues(cor_values_night, n_samples_night)
night_pvalues_corrected = multiple_test_correction(night_pvalues, n_features_night, method="bonferroni")

true_positions_day_cor = np.where(cor_values_day > cor_threshold)
true_positions_night_cor = np.where(cor_values_night > cor_threshold)

true_positions_day_pvalue = np.where(day_pvalues_corrected < cor_pvalue_threshold)
true_positions_night_pvalue = np.where(night_pvalues_corrected < cor_pvalue_threshold)

# Filtering correlations based on coef. and corrected pvalues
cor_tuples_day = []
pval_tuples_day = []

for i in range(np.size(true_positions_day_cor, 1)):
    cor_tuples_day.append((true_positions_day_cor[0][i],
    true_positions_day_cor[1][i]))

for i in range(np.size(true_positions_day_pvalue, 1)):
    pval_tuples_day.append((true_positions_day_pvalue[0][i],
    true_positions_day_pvalue[1][i]))

cor_tuples_night = []
pval_tuples_night = []

for i in range(np.size(true_positions_night_cor, 1)):
    cor_tuples_night.append((true_positions_night_cor[0][i],
    true_positions_night_cor[1][i]))

for i in range(np.size(true_positions_night_pvalue, 1)):
    pval_tuples_night.append((true_positions_night_pvalue[0][i],
    true_positions_night_pvalue[1][i]))

cor_tuples_day_set = set(cor_tuples_day)
pval_tuples_day_set = set(pval_tuples_day)
cor_tuples_night_set = set(cor_tuples_night)
pval_tuples_night_set = set(pval_tuples_night)

cor_pval_intersection_day = cor_tuples_day_set.intersection(pval_tuples_day_set)
cor_pval_intersection_night = cor_tuples_night_set.intersection(pval_tuples_night_set)

cor_pval_intersection_day_tuple = ([t[0] for t in list(cor_pval_intersection_day)],
[t[1] for t in list(cor_pval_intersection_day)])
cor_pval_intersection_night_tuple = ([t[0] for t in list(cor_pval_intersection_night)],
[t[1] for t in list(cor_pval_intersection_night)])

shape_row_day = kremling_expression_v5_day_cpm_filtered_cv_filtered.shape[0]
shape_row_night = kremling_expression_v5_night_cpm_filtered_cv_filtered.shape[0]

pairs_day_genecuf_otucpm = []
pairs_night_genecuf_otucpm = []

for i in range(len(cor_pval_intersection_day_tuple[0])):
    if (cor_pval_intersection_day_tuple[1][i] > (shape_row_day - 1)) and (cor_pval_intersection_day_tuple[0][i] < shape_row_day):
        pairs_day_genecuf_otucpm.append((str(cor_values_day.columns[cor_pval_intersection_day_tuple[1][i]]),
              str(cor_values_day.index[cor_pval_intersection_day_tuple[0][i]]),
              cor_values_day.iloc[cor_pval_intersection_day_tuple[0][i], cor_pval_intersection_day_tuple[1][i]],
              day_pvalues_corrected[cor_pval_intersection_day_tuple[0][i], cor_pval_intersection_day_tuple[1][i]]))

for i in range(len(cor_pval_intersection_night_tuple[0])):
    if (cor_pval_intersection_night_tuple[1][i] > (shape_row_night - 1)) and (cor_pval_intersection_night_tuple[0][i] < shape_row_night):
        pairs_night_genecuf_otucpm.append((str(cor_values_night.columns[cor_pval_intersection_night_tuple[1][i]]),
              str(cor_values_night.index[cor_pval_intersection_night_tuple[0][i]]),
              cor_values_night.iloc[cor_pval_intersection_night_tuple[0][i], cor_pval_intersection_night_tuple[1][i]],
              night_pvalues_corrected[cor_pval_intersection_night_tuple[0][i], cor_pval_intersection_night_tuple[1][i]]))

  ts = rf * rf * (df / (1 - rf * rf))


In [84]:
print(len(pairs_day_genecuf_otucpm))
print(len(pairs_night_genecuf_otucpm))

928
158


In [None]:
import upsetplot

from upsetplot import from_contents

In [None]:
correlations_day = from_contents(
    {"day gene (cpm) otu (cpm)": pairs_day_genecpm_otucpm,
    "day gene (tpm) otu (cpm)": pairs_day_genetpm_otucpm,
    "day gene (tmm) otu (cpm)": pairs_day_genetmm_otucpm,
    "day gene (rpkm) otu (cpm)": pairs_day_generpkm_otucpm,
    "day gene (uq) otu (cpm)": pairs_day_geneuq_otucpm,
    "day gene (ctf) otu (cpm)": pairs_day_genectf_otucpm,
    "day gene (cuf) otu (cpm)": pairs_day_genecuf_otucpm}
)

from upsetplot import UpSet

ax_dict = UpSet(correlations_day, subset_size="count").plot()

In [None]:
correlations_night = from_contents(
    {"night gene (cpm) otu (cpm)": pairs_night_genecpm_otucpm,
    "night gene (tpm) otu (cpm)": pairs_night_genetpm_otucpm,
    "night gene (tmm) otu (cpm)": pairs_night_genetmm_otucpm,
    "night gene (rpkm) otu (cpm)": pairs_night_generpkm_otucpm,
    "night gene (uq) otu (cpm)": pairs_night_geneuq_otucpm,
    "night gene (ctf) otu (cpm)": pairs_night_genectf_otucpm,
    "night gene (cuf) otu (cpm)": pairs_night_genecuf_otucpm}
)

from upsetplot import UpSet

ax_dict = UpSet(correlations_night, subset_size="count").plot()

## Summarizing results of different normalization methods (not transfomed) versus OTU (CPM)

In [None]:
otucpm_severalgeneexpmethods_night = set(pairs_night_genecpm_otucpm).intersection(set(pairs_night_genetpm_otucpm),
                                             set(pairs_night_genetmm_otucpm),
                                             set(pairs_night_generpkm_otucpm),
                                             set(pairs_night_geneuq_otucpm),
                                             set(pairs_night_genectf_otucpm),
                                             set(pairs_night_genecuf_otucpm))

otucpm_severalgeneexpmethods_day = set(pairs_day_genecpm_otucpm).intersection(set(pairs_day_genetpm_otucpm),
                                             set(pairs_day_genetmm_otucpm),
                                             set(pairs_day_generpkm_otucpm),
                                             set(pairs_day_geneuq_otucpm),
                                             set(pairs_day_genectf_otucpm),
                                             set(pairs_day_genecuf_otucpm))

In [None]:
union_day_night = set(otucpm_severalgeneexpmethods_night).union(otucpm_severalgeneexpmethods_day)
intersection_day_night = set(otucpm_severalgeneexpmethods_night).intersection(otucpm_severalgeneexpmethods_day)

In [None]:
unique_day = set(otucpm_severalgeneexpmethods_night).difference(otucpm_severalgeneexpmethods_day)
unique_night = set(otucpm_severalgeneexpmethods_day).difference(otucpm_severalgeneexpmethods_night)

In [None]:
print(len(union_day_night),
      len(intersection_day_night),
      len(unique_day),
      len(unique_night))

In [None]:
intersection_day_night

In [None]:
set([corpair[0] for corpair in intersection_day_night])

In [None]:
list(set([corpair[1] for corpair in intersection_day_night]))

In [None]:
import pandas as pd

import matplotlib.pyplot as plt

# Plot the selected row
kremling_expression_v5_night_tpm.loc[list(set([corpair[1] for corpair in intersection_day_night]))].T.plot(kind='line')

# Set plot title and labels
plt.title('Line Plot')
plt.ylabel('Value')
plt.xlabel('Samples')
plt.xticks([])

# Display the plot
plt.show()

In [None]:
import pandas as pd

import matplotlib.pyplot as plt

# Plot the selected row
kremling_expression_v5_day_tpm.loc[list(set([corpair[1] for corpair in intersection_day_night]))].T.plot(kind='line')

# Set plot title and labels
plt.title('Line Plot')
plt.ylabel('Value')
plt.xlabel('Samples')
plt.xticks([])

# Display the plot
plt.show()

In [None]:
from scipy.stats import zscore
import pandas as pd
import matplotlib.pyplot as plt

#Computing zscores
kremling_expression_v5_night_tpm.loc[list(set([corpair[1] for corpair in intersection_day_night]))].apply(zscore, axis=1).T.plot(kind='line')

# Set plot title and labels
plt.title('Line Plot')
plt.ylabel('Value')
plt.xlabel('Samples')
plt.xticks([])

# Display the plot
plt.show()

In [None]:
from scipy.stats import zscore
import pandas as pd
import matplotlib.pyplot as plt

#Computing zscores
kremling_expression_v5_day_tpm.loc[list(set([corpair[1] for corpair in intersection_day_night]))].apply(zscore, axis=1).T.plot(kind='line')

# Set plot title and labels
plt.title('Line Plot')
plt.ylabel('Value')
plt.xlabel('Samples')
plt.xticks([])

# Display the plot
plt.show()

### Getting maize annotation and OTU taxonomy

#### Maize annotation

Annotation file was downloaded from Phytozome for the maize v5 genome.

In [None]:
import pandas as pd

zmays_v5_annotation = pd.read_csv("/home/rsantos/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/annotation/Phytozome/PhytozomeV13/Zmays/Zm-B73-REFERENCE-NAM-5.0.55/annotation/Zmays_833_Zm-B73-REFERENCE-NAM-5.0.55.annotation_info.txt",
            sep='\t')
zmays_v5_annotation.set_index('transcriptName', inplace=True)
zmays_v5_annotation.head()

In [None]:
zmays_v5_annotation.loc[list(set([corpair[1] for corpair in intersection_day_night]))]

#### OTU taxonomy

Taxonomy assigment was carried out using qiime2 and GTDB version 214.

For some reason, the assignment is possible only until the genus level (likely due to a bug, because the file apparently shows species level but breaking the field after the whitespace)

In [None]:
import pandas as pd

otu_assignment_gtdb2014 = pd.read_csv("/media/rsantos/4TB_drive/Projects/UGA_RACS/16S/Wallace_et_al_2018_2/Classification_GTDB/gtdb-214_rep_set_taxonomy/taxonomy.tsv",
                                      sep='\t')
otu_assignment_gtdb2014.set_index('Feature ID', inplace=True)
otu_assignment_gtdb2014.head()

In [None]:
pd.set_option('max_colwidth', 1000)
otu_assignment_gtdb2014.loc[list(set([corpair[0] for corpair in intersection_day_night]))]['Taxon']

In [None]:
# This is apparently the default width; return to it
pd.set_option('max_colwidth', 50)

## Correlations - gene (asinh) versus OTU (CPM - asinh)

Initial tests with gene expression transformed using the asinh function did not return anything if correlations were made with untransformed OTU values (CPM without asinh). Therefore, I tested correlations after transforming all data equally (OTUs and Gene expressions were transformed)

In [None]:
from corals.threads import set_threads_for_external_libraries
set_threads_for_external_libraries(n_threads=1)
import numpy as np
from corals.correlation.full.default import cor_full

### OTU (CPM) - Gene (CPM) - ASINH

In [None]:
concat_df_night = pd.concat([kremling_expression_v5_night_cpm_asinh, otu_table_merged_night_cpm_asinh], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_cpm_asinh, otu_table_merged_day_cpm_asinh], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

true_positions_day = np.where(cor_values_day > 0.5)
shape_row_day = kremling_expression_v5_day_cpm_asinh.shape[0]
true_positions_night = np.where(cor_values_night > 0.5)
shape_row_night = kremling_expression_v5_night_cpm_asinh.shape[0]

pairs_day_genecpmasinh_otucpm = []
pairs_night_genecpmasinh_otucpm = []

for i in range(len(true_positions_day[0])):
    if (true_positions_day[1][i] > (shape_row_day - 1)) and (true_positions_day[0][i] < shape_row_day):
        pairs_day_genecpmasinh_otucpm.append((str(cor_values_day.columns[true_positions_day[1][i]]),
              str(cor_values_day.index[true_positions_day[0][i]])))

for i in range(len(true_positions_night[0])):
    if (true_positions_night[1][i] > (shape_row_night - 1)) and (true_positions_night[0][i] < shape_row_night):
        pairs_night_genecpmasinh_otucpm.append((str(cor_values_night.columns[true_positions_night[1][i]]),
              str(cor_values_night.index[true_positions_night[0][i]])))

In [None]:
print(len(pairs_day_genecpmasinh_otucpm))
print(len(pairs_night_genecpmasinh_otucpm))

### OTU (CPM) - Gene (TPM) - ASINH

In [None]:
concat_df_night = pd.concat([kremling_expression_v5_night_tpm_asinh, otu_table_merged_night_cpm_asinh], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_tpm_asinh, otu_table_merged_day_cpm_asinh], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

true_positions_day = np.where(cor_values_day > 0.5)
shape_row_day = kremling_expression_v5_day_tpm_asinh.shape[0]
true_positions_night = np.where(cor_values_night > 0.5)
shape_row_night = kremling_expression_v5_night_tpm_asinh.shape[0]

pairs_day_genetpmasinh_otucpm = []
pairs_night_genetpmasinh_otucpm = []

for i in range(len(true_positions_day[0])):
    if (true_positions_day[1][i] > (shape_row_day - 1)) and (true_positions_day[0][i] < shape_row_day):
        pairs_day_genetpmasinh_otucpm.append((str(cor_values_day.columns[true_positions_day[1][i]]),
              str(cor_values_day.index[true_positions_day[0][i]])))

for i in range(len(true_positions_night[0])):
    if (true_positions_night[1][i] > (shape_row_night - 1)) and (true_positions_night[0][i] < shape_row_night):
        pairs_night_genetpmasinh_otucpm.append((str(cor_values_night.columns[true_positions_night[1][i]]),
              str(cor_values_night.index[true_positions_night[0][i]])))

In [None]:
print(len(pairs_day_genetpmasinh_otucpm))
print(len(pairs_night_genetpmasinh_otucpm))

### OTU (CPM) - Gene (RPKM) - ASINH

In [None]:
concat_df_night = pd.concat([kremling_expression_v5_night_rpkm_asinh, otu_table_merged_night_cpm_asinh], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_rpkm_asinh, otu_table_merged_day_cpm_asinh], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

true_positions_day = np.where(cor_values_day > 0.5)
shape_row_day = kremling_expression_v5_day_rpkm_asinh.shape[0]
true_positions_night = np.where(cor_values_night > 0.5)
shape_row_night = kremling_expression_v5_night_rpkm_asinh.shape[0]

pairs_day_generpkmasinh_otucpm = []
pairs_night_generpkmasinh_otucpm = []

for i in range(len(true_positions_day[0])):
    if (true_positions_day[1][i] > (shape_row_day - 1)) and (true_positions_day[0][i] < shape_row_day):
        pairs_day_generpkmasinh_otucpm.append((str(cor_values_day.columns[true_positions_day[1][i]]),
              str(cor_values_day.index[true_positions_day[0][i]])))

for i in range(len(true_positions_night[0])):
    if (true_positions_night[1][i] > (shape_row_night - 1)) and (true_positions_night[0][i] < shape_row_night):
        pairs_night_generpkmasinh_otucpm.append((str(cor_values_night.columns[true_positions_night[1][i]]),
              str(cor_values_night.index[true_positions_night[0][i]])))

In [None]:
print(len(pairs_day_generpkmasinh_otucpm))
print(len(pairs_night_generpkmasinh_otucpm))

### OTU (CPM) - Gene (TMM) - ASINH

In [None]:
concat_df_night = pd.concat([kremling_expression_v5_night_tmm_asinh, otu_table_merged_night_cpm_asinh], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_tmm_asinh, otu_table_merged_day_cpm_asinh], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

true_positions_day = np.where(cor_values_day > 0.5)
shape_row_day = kremling_expression_v5_day_tmm_asinh.shape[0]
true_positions_night = np.where(cor_values_night > 0.5)
shape_row_night = kremling_expression_v5_night_tmm_asinh.shape[0]

pairs_day_genetmmasinh_otucpm = []
pairs_night_genetmmasinh_otucpm = []

for i in range(len(true_positions_day[0])):
    if (true_positions_day[1][i] > (shape_row_day - 1)) and (true_positions_day[0][i] < shape_row_day):
        pairs_day_genetmmasinh_otucpm.append((str(cor_values_day.columns[true_positions_day[1][i]]),
              str(cor_values_day.index[true_positions_day[0][i]])))

for i in range(len(true_positions_night[0])):
    if (true_positions_night[1][i] > (shape_row_night - 1)) and (true_positions_night[0][i] < shape_row_night):
        pairs_night_genetmmasinh_otucpm.append((str(cor_values_night.columns[true_positions_night[1][i]]),
              str(cor_values_night.index[true_positions_night[0][i]])))

In [None]:
print(len(pairs_day_genetmmasinh_otucpm))
print(len(pairs_night_genetmmasinh_otucpm))

### OTU (CPM) - Gene (UQ) - ASINH

In [None]:
concat_df_night = pd.concat([kremling_expression_v5_night_uq_asinh, otu_table_merged_night_cpm_asinh], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_uq_asinh, otu_table_merged_day_cpm_asinh], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

true_positions_day = np.where(cor_values_day > 0.5)
shape_row_day = kremling_expression_v5_day_uq_asinh.shape[0]
true_positions_night = np.where(cor_values_night > 0.5)
shape_row_night = kremling_expression_v5_night_uq_asinh.shape[0]

pairs_day_geneuqasinh_otucpm = []
pairs_night_geneuqasinh_otucpm = []

for i in range(len(true_positions_day[0])):
    if (true_positions_day[1][i] > (shape_row_day - 1)) and (true_positions_day[0][i] < shape_row_day):
        pairs_day_geneuqasinh_otucpm.append((str(cor_values_day.columns[true_positions_day[1][i]]),
              str(cor_values_day.index[true_positions_day[0][i]])))

for i in range(len(true_positions_night[0])):
    if (true_positions_night[1][i] > (shape_row_night - 1)) and (true_positions_night[0][i] < shape_row_night):
        pairs_night_geneuqasinh_otucpm.append((str(cor_values_night.columns[true_positions_night[1][i]]),
              str(cor_values_night.index[true_positions_night[0][i]])))

In [None]:
print(len(pairs_day_geneuqasinh_otucpm))
print(len(pairs_night_geneuqasinh_otucpm))

### OTU (CPM) - Gene (CTF) - ASINH

In [None]:
concat_df_night = pd.concat([kremling_expression_v5_night_ctf_asinh, otu_table_merged_night_cpm_asinh], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_ctf_asinh, otu_table_merged_day_cpm_asinh], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

true_positions_day = np.where(cor_values_day > 0.5)
shape_row_day = kremling_expression_v5_day_ctf_asinh.shape[0]
true_positions_night = np.where(cor_values_night > 0.5)
shape_row_night = kremling_expression_v5_night_ctf_asinh.shape[0]

pairs_day_genectfasinh_otucpm = []
pairs_night_genectfasinh_otucpm = []

for i in range(len(true_positions_day[0])):
    if (true_positions_day[1][i] > (shape_row_day - 1)) and (true_positions_day[0][i] < shape_row_day):
        pairs_day_genectfasinh_otucpm.append((str(cor_values_day.columns[true_positions_day[1][i]]),
              str(cor_values_day.index[true_positions_day[0][i]])))

for i in range(len(true_positions_night[0])):
    if (true_positions_night[1][i] > (shape_row_night - 1)) and (true_positions_night[0][i] < shape_row_night):
        pairs_night_genectfasinh_otucpm.append((str(cor_values_night.columns[true_positions_night[1][i]]),
              str(cor_values_night.index[true_positions_night[0][i]])))

In [None]:
print(len(pairs_day_genectfasinh_otucpm))
print(len(pairs_night_genectfasinh_otucpm))

### OTU (CPM) - Gene (CUF) - ASINH

In [None]:
concat_df_night = pd.concat([kremling_expression_v5_night_cuf_asinh, otu_table_merged_night_cpm_asinh], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_cuf_asinh, otu_table_merged_day_cpm_asinh], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

true_positions_day = np.where(cor_values_day > 0.5)
shape_row_day = kremling_expression_v5_day_cuf_asinh.shape[0]
true_positions_night = np.where(cor_values_night > 0.5)
shape_row_night = kremling_expression_v5_night_cuf_asinh.shape[0]

pairs_day_genecufasinh_otucpm = []
pairs_night_genecufasinh_otucpm = []

for i in range(len(true_positions_day[0])):
    if (true_positions_day[1][i] > (shape_row_day - 1)) and (true_positions_day[0][i] < shape_row_day):
        pairs_day_genecufasinh_otucpm.append((str(cor_values_day.columns[true_positions_day[1][i]]),
              str(cor_values_day.index[true_positions_day[0][i]])))

for i in range(len(true_positions_night[0])):
    if (true_positions_night[1][i] > (shape_row_night - 1)) and (true_positions_night[0][i] < shape_row_night):
        pairs_night_genecufasinh_otucpm.append((str(cor_values_night.columns[true_positions_night[1][i]]),
              str(cor_values_night.index[true_positions_night[0][i]])))

In [None]:
print(len(pairs_day_genecufasinh_otucpm))
print(len(pairs_night_genecufasinh_otucpm))

In [None]:
import upsetplot

from upsetplot import from_contents

In [None]:
correlations_asinh_night = from_contents(
    {"night gene (cpm) otu (cpm)": pairs_night_genecpmasinh_otucpm,
    "night gene (tpm) otu (cpm)": pairs_night_genetpmasinh_otucpm,
    "night gene (tmm) otu (cpm)": pairs_night_genetmmasinh_otucpm,
    "night gene (rpkm) otu (cpm)": pairs_night_generpkmasinh_otucpm,
    "night gene (uq) otu (cpm)": pairs_night_geneuqasinh_otucpm,
    "night gene (ctf) otu (cpm)": pairs_night_genectfasinh_otucpm,
    "night gene (cuf) otu (cpm)": pairs_night_genecufasinh_otucpm}
)

from upsetplot import UpSet

ax_dict = UpSet(correlations_asinh_night, subset_size="count").plot()

In [None]:
correlations_asinh_day = from_contents(
    {"day gene (cpm) otu (cpm)": pairs_day_genecpmasinh_otucpm,
    "day gene (tpm) otu (cpm)": pairs_day_genetpmasinh_otucpm,
    "day gene (tmm) otu (cpm)": pairs_day_genetmmasinh_otucpm,
    "day gene (rpkm) otu (cpm)": pairs_day_generpkmasinh_otucpm,
    "day gene (uq) otu (cpm)": pairs_day_geneuqasinh_otucpm,
    "day gene (ctf) otu (cpm)": pairs_day_genectfasinh_otucpm,
    "day gene (cuf) otu (cpm)": pairs_day_genecufasinh_otucpm}
)

from upsetplot import UpSet

ax_dict = UpSet(correlations_asinh_day, subset_size="count").plot()

In [None]:
correlations_asinh_notasinh_day = from_contents(
    {"day gene (cpm) otu (cpm) (ASINH)": pairs_day_genecpmasinh_otucpm,
    "day gene (tpm) otu (cpm) (ASINH)": pairs_day_genetpmasinh_otucpm,
    "day gene (tmm) otu (cpm) (ASINH)": pairs_day_genetmmasinh_otucpm,
    "day gene (rpkm) otu (cpm) (ASINH)": pairs_day_generpkmasinh_otucpm,
    "day gene (uq) otu (cpm) (ASINH)": pairs_day_geneuqasinh_otucpm,
    "day gene (ctf) otu (cpm) (ASINH)": pairs_day_genectfasinh_otucpm,
    "day gene (cuf) otu (cpm) (ASINH)": pairs_day_genecufasinh_otucpm,
    "day gene (cpm) otu (cpm)": pairs_day_genecpm_otucpm,
    "day gene (tpm) otu (cpm)": pairs_day_genetpm_otucpm,
    "day gene (tmm) otu (cpm)": pairs_day_genetmm_otucpm,
    "day gene (rpkm) otu (cpm)": pairs_day_generpkm_otucpm,
    "day gene (uq) otu (cpm)": pairs_day_geneuq_otucpm,
    "day gene (ctf) otu (cpm)": pairs_day_genectf_otucpm,
    "day gene (cuf) otu (cpm)": pairs_day_genecuf_otucpm}
)

from upsetplot import UpSet

ax_dict = UpSet(correlations_asinh_notasinh_day, subset_size="count").plot()

In [None]:
correlations_asinh_notasinh_night = from_contents(
    {"day gene (cpm) otu (cpm) (ASINH)": pairs_night_genecpmasinh_otucpm,
    "day gene (tpm) otu (cpm) (ASINH)": pairs_night_genetpmasinh_otucpm,
    "day gene (tmm) otu (cpm) (ASINH)": pairs_night_genetmmasinh_otucpm,
    "day gene (rpkm) otu (cpm) (ASINH)": pairs_night_generpkmasinh_otucpm,
    "day gene (uq) otu (cpm) (ASINH)": pairs_night_geneuqasinh_otucpm,
    "day gene (ctf) otu (cpm) (ASINH)": pairs_night_genectfasinh_otucpm,
    "day gene (cuf) otu (cpm) (ASINH)": pairs_night_genecufasinh_otucpm,
    "day gene (cpm) otu (cpm)": pairs_night_genecpm_otucpm,
    "day gene (tpm) otu (cpm)": pairs_night_genetpm_otucpm,
    "day gene (tmm) otu (cpm)": pairs_night_genetmm_otucpm,
    "day gene (rpkm) otu (cpm)": pairs_night_generpkm_otucpm,
    "day gene (uq) otu (cpm)": pairs_night_geneuq_otucpm,
    "day gene (ctf) otu (cpm)": pairs_night_genectf_otucpm,
    "day gene (cuf) otu (cpm)": pairs_night_genecuf_otucpm}
)

from upsetplot import UpSet

ax_dict = UpSet(correlations_asinh_notasinh_day, subset_size="count").plot()

## Correlations - gene (VST) versus OTU (CPM - VST)

Testing correlations after transforming all data equally (OTUs and Gene expressions were transformed using DESeq2 VST function).

## Correlations - gene (no asinh) versus OTU (relative abundance)

Testing correlations of OTUs (relative abundances after filtering) and Gene expressions (all normalizations, after filtering).

In [None]:
from corals.threads import set_threads_for_external_libraries
set_threads_for_external_libraries(n_threads=1)
import numpy as np
from corals.correlation.full.default import cor_full

### OTU (relative abundances) - Gene (CPM)

In [None]:
concat_df_night = pd.concat([kremling_expression_v5_night_cpm_filtered_cv_filtered, otu_table_merged_night_relative_abund_filtered_cv_filtered], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_cpm_filtered_cv_filtered, otu_table_merged_day_relative_abund_filtered_cv_filtered], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

true_positions_day = np.where(cor_values_day > 0.6)
shape_row_day = kremling_expression_v5_day_cpm_filtered_cv_filtered.shape[0]
true_positions_night = np.where(cor_values_night > 0.6)
shape_row_night = kremling_expression_v5_night_cpm_filtered_cv_filtered.shape[0]

pairs_day_genecpm_oturelabund = []
pairs_night_genecpm_oturelabund = []

for i in range(len(true_positions_day[0])):
    if (true_positions_day[1][i] > (shape_row_day - 1)) and (true_positions_day[0][i] < shape_row_day):
        pairs_day_genecpm_oturelabund.append((str(cor_values_day.columns[true_positions_day[1][i]]),
              str(cor_values_day.index[true_positions_day[0][i]])))

for i in range(len(true_positions_night[0])):
    if (true_positions_night[1][i] > (shape_row_night - 1)) and (true_positions_night[0][i] < shape_row_night):
        pairs_night_genecpm_oturelabund.append((str(cor_values_night.columns[true_positions_night[1][i]]),
              str(cor_values_night.index[true_positions_night[0][i]])))

In [None]:
print(len(pairs_day_genecpm_oturelabund))
print(len(pairs_night_genecpm_oturelabund))

### OTU (relative abundances) - Gene (RPKM)

In [None]:
concat_df_night = pd.concat([kremling_expression_v5_night_rpkm_filtered_cv_filtered, otu_table_merged_night_relative_abund_filtered_cv_filtered], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_rpkm_filtered_cv_filtered, otu_table_merged_day_relative_abund_filtered_cv_filtered], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

true_positions_day = np.where(cor_values_day > 0.6)
shape_row_day = kremling_expression_v5_day_rpkm_filtered_cv_filtered.shape[0]
true_positions_night = np.where(cor_values_night > 0.6)
shape_row_night = kremling_expression_v5_night_rpkm_filtered_cv_filtered.shape[0]

pairs_day_generpkm_oturelabund = []
pairs_night_generpkm_oturelabund = []

for i in range(len(true_positions_day[0])):
    if (true_positions_day[1][i] > (shape_row_day - 1)) and (true_positions_day[0][i] < shape_row_day):
        pairs_day_generpkm_oturelabund.append((str(cor_values_day.columns[true_positions_day[1][i]]),
              str(cor_values_day.index[true_positions_day[0][i]])))

for i in range(len(true_positions_night[0])):
    if (true_positions_night[1][i] > (shape_row_night - 1)) and (true_positions_night[0][i] < shape_row_night):
        pairs_night_generpkm_oturelabund.append((str(cor_values_night.columns[true_positions_night[1][i]]),
              str(cor_values_night.index[true_positions_night[0][i]])))

In [None]:
print(len(pairs_day_generpkm_oturelabund))
print(len(pairs_night_generpkm_oturelabund))

### OTU (relative abundances) - Gene (TPM)

In [None]:
concat_df_night = pd.concat([kremling_expression_v5_night_tpm_filtered_cv_filtered, otu_table_merged_night_relative_abund_filtered_cv_filtered], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_tpm_filtered_cv_filtered, otu_table_merged_day_relative_abund_filtered_cv_filtered], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

true_positions_day = np.where(cor_values_day > 0.6)
shape_row_day = kremling_expression_v5_day_tpm_filtered_cv_filtered.shape[0]
true_positions_night = np.where(cor_values_night > 0.6)
shape_row_night = kremling_expression_v5_night_tpm_filtered_cv_filtered.shape[0]

pairs_day_genetpm_oturelabund = []
pairs_night_genetpm_oturelabund = []

for i in range(len(true_positions_day[0])):
    if (true_positions_day[1][i] > (shape_row_day - 1)) and (true_positions_day[0][i] < shape_row_day):
        pairs_day_genetpm_oturelabund.append((str(cor_values_day.columns[true_positions_day[1][i]]),
              str(cor_values_day.index[true_positions_day[0][i]])))

for i in range(len(true_positions_night[0])):
    if (true_positions_night[1][i] > (shape_row_night - 1)) and (true_positions_night[0][i] < shape_row_night):
        pairs_night_genetpm_oturelabund.append((str(cor_values_night.columns[true_positions_night[1][i]]),
              str(cor_values_night.index[true_positions_night[0][i]])))

In [None]:
print(len(pairs_day_genetpm_oturelabund))
print(len(pairs_night_genetpm_oturelabund))

### OTU (relative abundances) - Gene (TMM)

In [None]:
concat_df_night = pd.concat([kremling_expression_v5_night_tmm_filtered_cv_filtered, otu_table_merged_night_relative_abund_filtered_cv_filtered], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_tmm_filtered_cv_filtered, otu_table_merged_day_relative_abund_filtered_cv_filtered], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

true_positions_day = np.where(cor_values_day > 0.6)
shape_row_day = kremling_expression_v5_day_tmm_filtered_cv_filtered.shape[0]
true_positions_night = np.where(cor_values_night > 0.6)
shape_row_night = kremling_expression_v5_night_tmm_filtered_cv_filtered.shape[0]

pairs_day_genetmm_oturelabund = []
pairs_night_genetmm_oturelabund = []

for i in range(len(true_positions_day[0])):
    if (true_positions_day[1][i] > (shape_row_day - 1)) and (true_positions_day[0][i] < shape_row_day):
        pairs_day_genetmm_oturelabund.append((str(cor_values_day.columns[true_positions_day[1][i]]),
              str(cor_values_day.index[true_positions_day[0][i]])))

for i in range(len(true_positions_night[0])):
    if (true_positions_night[1][i] > (shape_row_night - 1)) and (true_positions_night[0][i] < shape_row_night):
        pairs_night_genetmm_oturelabund.append((str(cor_values_night.columns[true_positions_night[1][i]]),
              str(cor_values_night.index[true_positions_night[0][i]])))

In [None]:
print(len(pairs_day_genetmm_oturelabund))
print(len(pairs_night_genetmm_oturelabund))

### OTU (relative abundances) - Gene (UQ)

In [None]:
concat_df_night = pd.concat([kremling_expression_v5_night_uq_filtered_cv_filtered, otu_table_merged_night_relative_abund_filtered_cv_filtered], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_uq_filtered_cv_filtered, otu_table_merged_day_relative_abund_filtered_cv_filtered], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

true_positions_day = np.where(cor_values_day > 0.6)
shape_row_day = kremling_expression_v5_day_uq_filtered_cv_filtered.shape[0]
true_positions_night = np.where(cor_values_night > 0.6)
shape_row_night = kremling_expression_v5_night_uq_filtered_cv_filtered.shape[0]

pairs_day_geneuq_oturelabund = []
pairs_night_geneuq_oturelabund = []

for i in range(len(true_positions_day[0])):
    if (true_positions_day[1][i] > (shape_row_day - 1)) and (true_positions_day[0][i] < shape_row_day):
        pairs_day_geneuq_oturelabund.append((str(cor_values_day.columns[true_positions_day[1][i]]),
              str(cor_values_day.index[true_positions_day[0][i]])))

for i in range(len(true_positions_night[0])):
    if (true_positions_night[1][i] > (shape_row_night - 1)) and (true_positions_night[0][i] < shape_row_night):
        pairs_night_geneuq_oturelabund.append((str(cor_values_night.columns[true_positions_night[1][i]]),
              str(cor_values_night.index[true_positions_night[0][i]])))

In [None]:
print(len(pairs_day_geneuq_oturelabund))
print(len(pairs_night_geneuq_oturelabund))

### OTU (relative abundances) - Gene (CTF)

In [None]:
concat_df_night = pd.concat([kremling_expression_v5_night_ctf_filtered_cv_filtered, otu_table_merged_night_relative_abund_filtered_cv_filtered], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_ctf_filtered_cv_filtered, otu_table_merged_day_relative_abund_filtered_cv_filtered], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

true_positions_day = np.where(cor_values_day > 0.6)
shape_row_day = kremling_expression_v5_day_ctf_filtered_cv_filtered.shape[0]
true_positions_night = np.where(cor_values_night > 0.6)
shape_row_night = kremling_expression_v5_night_ctf_filtered_cv_filtered.shape[0]

pairs_day_genectf_oturelabund = []
pairs_night_genectf_oturelabund = []

for i in range(len(true_positions_day[0])):
    if (true_positions_day[1][i] > (shape_row_day - 1)) and (true_positions_day[0][i] < shape_row_day):
        pairs_day_genectf_oturelabund.append((str(cor_values_day.columns[true_positions_day[1][i]]),
              str(cor_values_day.index[true_positions_day[0][i]])))

for i in range(len(true_positions_night[0])):
    if (true_positions_night[1][i] > (shape_row_night - 1)) and (true_positions_night[0][i] < shape_row_night):
        pairs_night_genectf_oturelabund.append((str(cor_values_night.columns[true_positions_night[1][i]]),
              str(cor_values_night.index[true_positions_night[0][i]])))

In [None]:
print(len(pairs_day_genectf_oturelabund))
print(len(pairs_night_genectf_oturelabund))

### OTU (relative abundances) - Gene (CUF)

In [None]:
concat_df_night = pd.concat([kremling_expression_v5_night_cuf_filtered_cv_filtered, otu_table_merged_night_relative_abund_filtered_cv_filtered], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_cuf_filtered_cv_filtered, otu_table_merged_day_relative_abund_filtered_cv_filtered], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

true_positions_day = np.where(cor_values_day > 0.6)
shape_row_day = kremling_expression_v5_day_cuf_filtered_cv_filtered.shape[0]
true_positions_night = np.where(cor_values_night > 0.6)
shape_row_night = kremling_expression_v5_night_cuf_filtered_cv_filtered.shape[0]

pairs_day_genecuf_oturelabund = []
pairs_night_genecuf_oturelabund = []

for i in range(len(true_positions_day[0])):
    if (true_positions_day[1][i] > (shape_row_day - 1)) and (true_positions_day[0][i] < shape_row_day):
        pairs_day_genecuf_oturelabund.append((str(cor_values_day.columns[true_positions_day[1][i]]),
              str(cor_values_day.index[true_positions_day[0][i]])))

for i in range(len(true_positions_night[0])):
    if (true_positions_night[1][i] > (shape_row_night - 1)) and (true_positions_night[0][i] < shape_row_night):
        pairs_night_genecuf_oturelabund.append((str(cor_values_night.columns[true_positions_night[1][i]]),
              str(cor_values_night.index[true_positions_night[0][i]])))

In [None]:
print(len(pairs_day_genecuf_oturelabund))
print(len(pairs_night_genecuf_oturelabund))