# Normalization methods for metataxonomics

| Data type      | Normalization         | Normalization category    | Condition  |
|----------------|-----------------------|---------------------------|------------|
| Metataxonomics | Estimated counts     |   | D and N |
| Metataxonomics | Relative abundance     |   | D and N |
| Metataxonomics | CPM     |   | D and N |


## Importing the count table

In [26]:
import pandas as pd

otu_table_merged_d_n = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_day_night_otu_counts.tsv',
                                   sep='\t', index_col='OTU ID')
                                   
otu_table_merged_d_n.head()

Unnamed: 0_level_0,14A0247_8,14A0051_8,14A0381_26,14A0533_26,14A0281_8,14A0295_8,14A0169_26,14A0069_8,14A0497_26,14A0023_8,...,14A0345_8,14A0267_8,14A0009_8,14A0007_8,14A0093_26,14A0137_26,14A0265_8,14A0155_26,14A0167_26,14A0481_26
OTU ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4479944,1.0,2.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
995900,0.0,0.0,0.0,0.0,5.0,8.0,6.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1124709,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
541139,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
533625,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,40.0,1.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,2.0


## Importing Kremling day and night matrices to make sure normalization is carried out separately

In [27]:
kremling_raw_expression_v5_night = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/correlations_rnaseq_metataxonomics/kremling_expression_v5_night.tsv',
                                           sep='\t')
kremling_raw_expression_v5_night.set_index('Name', inplace=True)

kremling_raw_expression_v5_day = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/correlations_rnaseq_metataxonomics/kremling_expression_v5_day.tsv',
                                           sep='\t')
kremling_raw_expression_v5_day.set_index('Name', inplace=True)

In [28]:
otu_table_merged_d_n_day = otu_table_merged_d_n[kremling_raw_expression_v5_day.columns]
otu_table_merged_d_n_night = otu_table_merged_d_n[kremling_raw_expression_v5_night.columns]

## Normalizing with Counts per Million (CPM)

In [29]:
otu_table_merged_day_cpm = otu_table_merged_d_n_day.divide(otu_table_merged_d_n_day.sum())
otu_table_merged_day_cpm = otu_table_merged_day_cpm * 1e6

otu_table_merged_night_cpm = otu_table_merged_d_n_night.divide(otu_table_merged_d_n_night.sum())
otu_table_merged_night_cpm = otu_table_merged_night_cpm * 1e6

In [30]:
otu_table_merged_day_cpm.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_d_n_otu_day_cpm.tsv',
                            sep='\t')
otu_table_merged_night_cpm.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_d_n_otu_night_cpm.tsv',
                            sep='\t')

## Normalizing with Relative Abundance

In [31]:
otu_table_merged_day_relative_abund = otu_table_merged_d_n_day.divide(otu_table_merged_d_n_day.sum())
otu_table_merged_day_relative_abund = otu_table_merged_day_relative_abund * 100

otu_table_merged_night_relative_abund = otu_table_merged_d_n_night.divide(otu_table_merged_d_n_night.sum())
otu_table_merged_night_relative_abund = otu_table_merged_night_relative_abund * 100

In [32]:
otu_table_merged_day_relative_abund.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_day_night_otu_day_relative_abund.tsv',
                            sep='\t')

otu_table_merged_night_relative_abund.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_day_night_otu_night_relative_abund.tsv',
                            sep='\t')