# Normalization methods for metataxonomics

Normalization of OTU abundance was carried out at different levels: the original OTU counts, the collapsed OTU counts at the genus level, and the collapsed OTU counts at family level, as shown here:

| Taxonomy level | Data type      | Normalization         | Normalization category    | Condition  |
|--------|----------------|-----------------------|---------------------------|------------|
| Original OTUs  | Metataxonomics | Estimated counts     |   | D and N |
| Original OTUs | Metataxonomics | Relative abundance     |   | D and N |
| Original OTUs | Metataxonomics | CPM     |   | D and N |
| Family (GTDB) | Metataxonomics | Relative abundance     |   | D and N |
| Family (GTDB) | Metataxonomics | CPM     |   | D and N |
| Genus (GTDB) | Metataxonomics | Relative abundance     |   | D and N |
| Genus (GTDB) | Metataxonomics | CPM     |   | D and N |


## Importing the count tables

In [1]:
import pandas as pd

otu_table_merged_d_n = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_day_night_otu_counts.tsv',
                                   sep='\t', index_col='OTU ID')
genus_table_merged_d_n = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/colapse_counts_gtdb/genus_counts.tsv',
                                    sep='\t', index_col='Genus')
family_table_merged_d_n = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/colapse_counts_gtdb/family_counts.tsv',
                                    sep='\t', index_col='Family')

## Importing Kremling day and night matrices to make sure normalization is carried out separately

In [2]:
kremling_raw_expression_v5_night = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/correlations_rnaseq_metataxonomics/kremling_expression_v5_night.tsv',
                                           sep='\t')
kremling_raw_expression_v5_night.set_index('Name', inplace=True)

kremling_raw_expression_v5_day = pd.read_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/correlations_rnaseq_metataxonomics/kremling_expression_v5_day.tsv',
                                           sep='\t')
kremling_raw_expression_v5_day.set_index('Name', inplace=True)

In [3]:
otu_table_merged_d_n_day = otu_table_merged_d_n[kremling_raw_expression_v5_day.columns]
otu_table_merged_d_n_night = otu_table_merged_d_n[kremling_raw_expression_v5_night.columns]
genus_table_merged_d_n_day = genus_table_merged_d_n[kremling_raw_expression_v5_day.columns]
genus_table_merged_d_n_night = genus_table_merged_d_n[kremling_raw_expression_v5_night.columns]
family_table_merged_d_n_day = family_table_merged_d_n[kremling_raw_expression_v5_day.columns]
family_table_merged_d_n_night = family_table_merged_d_n[kremling_raw_expression_v5_night.columns]

## Normalizing with Counts per Million (CPM)

In [4]:
otu_table_merged_day_cpm = otu_table_merged_d_n_day.divide(otu_table_merged_d_n_day.sum())
otu_table_merged_day_cpm = otu_table_merged_day_cpm * 1e6

otu_table_merged_night_cpm = otu_table_merged_d_n_night.divide(otu_table_merged_d_n_night.sum())
otu_table_merged_night_cpm = otu_table_merged_night_cpm * 1e6

genus_table_merged_d_n_day_cpm = genus_table_merged_d_n_day.divide(genus_table_merged_d_n_day.sum())
genus_table_merged_d_n_day_cpm = genus_table_merged_d_n_day_cpm * 1e6

genus_table_merged_d_n_night_cpm = genus_table_merged_d_n_night.divide(genus_table_merged_d_n_night.sum())
genus_table_merged_d_n_night_cpm = genus_table_merged_d_n_night_cpm * 1e6

family_table_merged_d_n_day_cpm = family_table_merged_d_n_day.divide(family_table_merged_d_n_day.sum())
family_table_merged_d_n_day_cpm = family_table_merged_d_n_day_cpm * 1e6

family_table_merged_d_n_night_cpm = family_table_merged_d_n_night.divide(family_table_merged_d_n_night.sum())
family_table_merged_d_n_night_cpm = family_table_merged_d_n_night_cpm * 1e6

In [5]:
otu_table_merged_day_cpm.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_d_n_otu_day_cpm.tsv',
                            sep='\t')
otu_table_merged_night_cpm.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_d_n_otu_night_cpm.tsv',
                            sep='\t')
genus_table_merged_d_n_day_cpm.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/genus_table_merged_d_n_day_cpm.tsv',
                                      sep='\t')
genus_table_merged_d_n_night_cpm.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/genus_table_merged_d_n_night_cpm.tsv',
                                        sep='\t')
family_table_merged_d_n_day_cpm.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/family_table_merged_d_n_day_cpm.tsv',
                                       sep='\t')
family_table_merged_d_n_night_cpm.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/family_table_merged_d_n_night_cpm.tsv',
                                       sep='\t')

## Normalizing with Relative Abundance

In [6]:
otu_table_merged_day_relative_abund = otu_table_merged_d_n_day.divide(otu_table_merged_d_n_day.sum())
otu_table_merged_day_relative_abund = otu_table_merged_day_relative_abund * 100

otu_table_merged_night_relative_abund = otu_table_merged_d_n_night.divide(otu_table_merged_d_n_night.sum())
otu_table_merged_night_relative_abund = otu_table_merged_night_relative_abund * 100

genus_table_merged_d_n_day_relative_abund = genus_table_merged_d_n_day.divide(genus_table_merged_d_n_day.sum())
genus_table_merged_d_n_day_relative_abund = genus_table_merged_d_n_day_relative_abund * 100

genus_table_merged_d_n_night_relative_abund = genus_table_merged_d_n_night.divide(genus_table_merged_d_n_night.sum())
genus_table_merged_d_n_night_relative_abund = genus_table_merged_d_n_night_relative_abund * 100

family_table_merged_d_n_day_relative_abund = family_table_merged_d_n_day.divide(family_table_merged_d_n_day.sum())
family_table_merged_d_n_day_relative_abund = family_table_merged_d_n_day_relative_abund * 100

family_table_merged_d_n_night_relative_abund = family_table_merged_d_n_night.divide(family_table_merged_d_n_night.sum())
family_table_merged_d_n_night_relative_abund = family_table_merged_d_n_night_relative_abund * 100

In [7]:
otu_table_merged_day_relative_abund.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_day_night_otu_day_relative_abund.tsv',
                            sep='\t')

otu_table_merged_night_relative_abund.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/summed_day_night_otu_night_relative_abund.tsv',
                            sep='\t')

genus_table_merged_d_n_day_relative_abund.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/genus_table_merged_d_n_day_relative_abund.tsv',
                            sep='\t')

genus_table_merged_d_n_night_relative_abund.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/genus_table_merged_d_n_night_relative_abund.tsv',
                            sep='\t')

family_table_merged_d_n_day_relative_abund.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/family_table_merged_d_n_day_relative_abund.tsv',
                            sep='\t')

family_table_merged_d_n_night_relative_abund.to_csv('/home/rsantos/Repositories/maize_microbiome_transcriptomics/16S_wallace2018/combine_day_night_samples/family_table_merged_d_n_night_relative_abund.tsv',
                            sep='\t')
