# Running correlations beteween gene expression and OTU abundance


Filtering steps (low abundance and low expression, or low variance genes/OTUs) were described in another notebook.
Here, I (RACS) use all filtered matrices for running correlations for each group of maize genotypes.

Importing the within sample (RPKM, TPM, CPM) and between sample (TMM, UQ, CTF, CUF) normalization matrices for day and night:

In [1]:
import pandas as pd

# Importing within sample normalized data after filtering
kremling_expression_v5_day_tpm_filtered_cv_filtered = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_tpm_filtered_cv_filtered.tsv',
                            sep='\t')
kremling_expression_v5_night_tpm_filtered_cv_filtered = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_tpm_filtered_cv_filtered.tsv',
                            sep='\t')
kremling_expression_v5_day_rpkm_filtered_cv_filtered = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_rpkm_filtered_cv_filtered.tsv',
                            sep='\t')
kremling_expression_v5_night_rpkm_filtered_cv_filtered = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_rpkm_filtered_cv_filtered.tsv',
                            sep='\t')
kremling_expression_v5_day_cpm_filtered_cv_filtered = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_cpm_filtered_cv_filtered.tsv',
                            sep='\t')
kremling_expression_v5_night_cpm_filtered_cv_filtered = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_cpm_filtered_cv_filtered.tsv',
                            sep='\t')

# Importing between sample normalized data after filtering
kremling_expression_v5_day_tmm_filtered_cv_filtered = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_tmm_filtered_cv_filtered.tsv',
                            sep='\t')
kremling_expression_v5_night_tmm_filtered_cv_filtered = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_tmm_filtered_cv_filtered.tsv',
                            sep='\t')
kremling_expression_v5_day_uq_filtered_cv_filtered = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_uq_filtered_cv_filtered.tsv',
                            sep='\t')
kremling_expression_v5_night_uq_filtered_cv_filtered = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_uq_filtered_cv_filtered.tsv',
                            sep='\t')
kremling_expression_v5_day_ctf_filtered_cv_filtered = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_ctf_filtered_cv_filtered.tsv',
                            sep='\t')
kremling_expression_v5_night_ctf_filtered_cv_filtered = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_ctf_filtered_cv_filtered.tsv',
                            sep='\t')
kremling_expression_v5_day_cuf_filtered_cv_filtered = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_day_cuf_filtered_cv_filtered.tsv',
                            sep='\t')
kremling_expression_v5_night_cuf_filtered_cv_filtered = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/rnaseq_kremling2018/quantification/kremling_expression_v5_night_cuf_filtered_cv_filtered.tsv',
                            sep='\t')

Importing OTU (CPM) matrices for day and night:

In [2]:
# Importing OTU data after filtering
otu_table_merged_day_cpm_filtered_cv_filtered = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/otu_table_merged_day_cpm_filtered_cv_filtered.tsv',
                            sep='\t')
otu_table_merged_night_cpm_filtered_cv_filtered = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/otu_table_merged_night_cpm_filtered_cv_filtered.tsv',
                            sep='\t')

## Importing groups

In [4]:
wallace_et_al_2018_group_assignments_romay2013 = pd.read_csv('/home/santosrac/Repositories/maize_microbiome_transcriptomics/genotype_information/wallace_et_al_2018_group_assignments_romay2013.tsv',
                                                             sep='\t')
wallace_et_al_2018_group_assignments_romay2013.set_index('Plot_Day', inplace=True)
wallace_et_al_2018_group_assignments_romay2013.head()

Unnamed: 0_level_0,Genotype,Accesion N,N GBS samples,N Plants,Avg. IBS,% missing,Breeding program,Pop structure
Plot_Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
14A0309_26,33-16,Ames26771,4.0,3.0,0.998,0.34,Other,unclassified
14A0233_8,38-11,Ames26604,6.0,5.0,0.992,0.21,Other,unclassified
14A0079_26,4226,NSL30904,4.0,3.0,0.998,0.29,Other,unclassified
14A0311_8,4722,PI587130,3.0,2.0,0.998,0.39,Other,unclassified
14A0021_8,A188,Ames22443,8.0,7.0,0.996,0.29,Minnesota,unclassified


## Running correlations

I (RACS) will start by analyzing correlations in three largest groups:
* stiff stalk (39)
* tropical (38)
* non-stiff stalk (27)

In [None]:
from corals.threads import set_threads_for_external_libraries
set_threads_for_external_libraries(n_threads=1)
import numpy as np
from corals.correlation.full.default import cor_full

### stiff stalk

In [44]:
concat_df_night = pd.concat([kremling_expression_v5_night_cuf_filtered_cv_filtered, otu_table_merged_night_cpm_filtered_cv_filtered], axis=0)
concat_df_day = pd.concat([kremling_expression_v5_day_cuf_filtered_cv_filtered, otu_table_merged_day_cpm_filtered_cv_filtered], axis=0)

concatenated_transposed_day = concat_df_day.transpose()
concatenated_transposed_night = concat_df_night.transpose()

cor_values_day = cor_full(concatenated_transposed_day)
cor_values_night = cor_full(concatenated_transposed_night)

true_positions_day = np.where(cor_values_day > 0.6)
shape_row_day = kremling_expression_v5_day_cuf_filtered_cv_filtered.shape[0]
true_positions_night = np.where(cor_values_night > 0.6)
shape_row_night = kremling_expression_v5_night_cuf_filtered_cv_filtered.shape[0]

pairs_day_genecuf_otucpm = []
pairs_night_genecuf_otucpm = []

for i in range(len(true_positions_day[0])):
    if (true_positions_day[1][i] > (shape_row_day - 1)) and (true_positions_day[0][i] < shape_row_day):
        pairs_day_genecuf_otucpm.append((str(cor_values_day.columns[true_positions_day[1][i]]),
              str(cor_values_day.index[true_positions_day[0][i]])))

for i in range(len(true_positions_night[0])):
    if (true_positions_night[1][i] > (shape_row_night - 1)) and (true_positions_night[0][i] < shape_row_night):
        pairs_night_genecuf_otucpm.append((str(cor_values_night.columns[true_positions_night[1][i]]),
              str(cor_values_night.index[true_positions_night[0][i]])))