# Run RPCA with Gemelli

-  Run datasets individually (no shared ASVs between datasets)

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
import glob
import os

#current directory
print('current directory:',os.getcwd())

#For illustrator import:
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42

results_directory = '../RPCA/'

current directory: /Users/kpitz/github/GLOMICON/intercomparison/Merged_Datasets/scripts


### File Locations

In [2]:
prefix = 'GLOMICON'

#Data Directory
directory = '../data/'

#Directory for saving Figures
plot_dir = '../figures/RPCA/'
print(plot_dir)

../figures/RPCA/


### Functions

In [3]:
# Dada2 Banzai Output Functions
levels = ['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']

def make_metadata(infile):
    df = pd.read_csv(infile,index_col=0)
    df.index.names = ['sample_name']
    #df['date'] = pd.to_datetime(df['date'])
    print('Number samples:', len(df.index))
    return df

def make_taxa_asv(infile):
    df = pd.read_csv(infile)
    #df = df.drop('Unnamed: 0', axis=0)
    #df = df.rename(columns= {'Unnamed: 0':'ASV'})
    #df.set_index('ASV', inplace=True)
    print('Number ASVs:', len(df.index))
    return df

#from metadata file, limit OTU table and taxa table to those present in those samples
def from_metadata_to_taxareads(meta_data, otu_table, taxa_table):
    #standard M6 output; sample_names as index; OTUs as index
    cols = list(meta_data)
    otu_lim = pd.concat([meta_data, otu_table.T],join='inner', axis=1)
    otu_lim.drop(cols, inplace=True, axis=1)
    otu_lim=otu_lim.T
    otu_lim['Total']=otu_lim.sum(axis=1)
    otu_lim = otu_lim.loc[otu_lim['Total']>0]
    otu_lim.drop('Total', axis=1, inplace=True)
    cols=list(otu_lim)
    taxa_lim=pd.concat([otu_lim, taxa_table], axis=1, join='inner')
    taxa_lim.drop(cols, inplace=True, axis=1)
    return otu_lim, taxa_lim

# AWI

In [4]:
Analizing_Institute = 'AWI'

## Load Data

In [5]:
marker = '18S'

print('#####' + marker + '#####')

# otu table
file = prefix +'_asv_merged.csv'
print(directory+file)
df = make_taxa_asv(directory+file)
df.set_index('ASV', inplace=True)
otu_all = df.copy()

# taxa table
file = prefix +'_taxa_merged_updated.csv'
print(directory+file)
df = make_taxa_asv(directory+file)
df = df.drop('Unnamed: 0', axis=1)
df.set_index('ASV', inplace=True)
taxa_all = df.copy()

# metadata
file = prefix +'_meta_merged.csv'
print(directory+file)
df = make_metadata(directory+file)
meta_all = df.copy()

# sequence table
file = prefix +'_seq_merged.csv'
print(directory+file)
df = make_taxa_asv(directory+file)
df.set_index('ASV', inplace=True)
seq_all = df.copy()

seq_all.head()

#####18S#####
../data/GLOMICON_asv_merged.csv
Number ASVs: 14547
../data/GLOMICON_taxa_merged_updated.csv
Number ASVs: 14547
../data/GLOMICON_meta_merged.csv
Number samples: 170
../data/GLOMICON_seq_merged.csv
Number ASVs: 14547


Unnamed: 0_level_0,sequence,Analyzing_Institute
ASV,Unnamed: 1_level_1,Unnamed: 2_level_1
ASV_1,CAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTCGG...,UDAL
ASV_2,GCACCTACCGATTGAATGGTCCGGTGAAGACTCGGGATTGTGGTCT...,MBARI
ASV_3,GCACCTACCGATTGAATGGTCCGGTGAGGCCTCGGGATCGTGGCGA...,MBARI
ASV_4,GCACCTACCGATTGAATGGTCCGGTGAAGCCTCGGGATTGTGGTTG...,MBARI
ASV_5,GCTCCTACCGATTGAGTGATCCGGTGAATAATTCGGACTGCAGCAG...,MBARI


In [6]:
print(list(taxa_all))
taxa_all.head()

['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']


Unnamed: 0_level_0,Kingdom,Phylum,Class,Order,Family,Genus,Species
ASV,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ASV_1,,Haptophyta,,Phaeocystales,Phaeocystaceae,Phaeocystis,Phaeocystis pouchetii
ASV_10,,Bacillariophyta,Mediophyceae,,,,
ASV_100,,,Dinophyceae,Gymnodiniales,,,
ASV_1000,,Ciliophora,Spirotrichea,,,,
ASV_10000,,,Dinophyceae,,,,


In [7]:
meta_all.head()

Unnamed: 0_level_0,Analyzing_Institute,Collecting_Institute
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Davenport_02_0008,AWI,MBARI
Davenport_06_0008,AWI,MBARI
Davenport_11_0008,AWI,MBARI
Davenport_15_0008,AWI,MBARI
Davenport_19_0008,AWI,MBARI


In [8]:
otu_all.head()

Unnamed: 0_level_0,Davenport_02_0008,Davenport_06_0008,Davenport_11_0008,Davenport_15_0008,Davenport_19_0008,Framstrait_01_0008,Framstrait_05_0008,Framstrait_09_0008,Framstrait_13_0008,Framstrait_17_0008,...,E-G30-eMockB-18,E-G29-eMockA-11,E-G32-bMock2,G11r-DAL30,E-G35-bMock22,G20r-ROS12,E-G31-eMockB-20,E-G34-bMock20,E-G1-AWI11,E-G33-bMock9
ASV,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ASV_1,,,,,,,,,,,...,1712.0,1266.0,33675.0,124.0,34286.0,4.0,1383.0,56835.0,360.0,34134.0
ASV_2,,,,,,,,,,,...,,,,,,,,,,
ASV_3,,,,,,,,,,,...,,,,,,,,,,
ASV_4,,,,,,,,,,,...,,,,,,,,,,
ASV_5,,,,,,,,,,,...,,,,,,,,,,


## Limit data by metadata parameters

- Run each Analyzing Institute separately
- remove control sequences

In [12]:
df = meta_all.copy()
#print(df['depth'].max())
df = df.loc[df['Analyzing_Institute'] == Analizing_Institute]
df = df.loc[df['Collecting_Institute'].isin(['BLOOMMOCK', 'EVENMOCK'])==False]
df = df.loc[df['Collecting_Institute'].isna()==False]
print(df['Collecting_Institute'].unique())

meta_lim = df.copy()
df.head()

['MBARI' 'AWI' 'UDalhousie' 'NOAA' 'NOC']


Unnamed: 0_level_0,Analyzing_Institute,Collecting_Institute
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Davenport_02_0008,AWI,MBARI
Davenport_06_0008,AWI,MBARI
Davenport_11_0008,AWI,MBARI
Davenport_15_0008,AWI,MBARI
Davenport_19_0008,AWI,MBARI


In [13]:
otu_lim, taxa_lim = from_metadata_to_taxareads(meta_lim, otu_all, taxa_all)

## Create Biom File

In [14]:
# create limited biom file

# asv table
filename = results_directory + "Qiime2_asv.tsv"
#check filename
print(filename)

df = otu_lim.copy()
df.index.names = ['#OTUID']
df.to_csv(filename,sep='\t')

# taxa table
filename = results_directory + "Qiime2_taxa.tsv"
#check filename
print(filename)

df = taxa_lim.copy()
df.index.names = ['#OTUID']
df.to_csv(filename,sep='\t')

# metadata table
filename = results_directory + "Qiime2_meta.tsv"
#check filename
print(filename)
df = meta_lim.copy()
df.index.names = ['#SampleID']
df.to_csv(filename,sep='\t')


../RPCA/Qiime2_asv.tsv
../RPCA/Qiime2_taxa.tsv
../RPCA/Qiime2_meta.tsv


## Make BIOM file

 - biom commands in python seem buggy - run in bash for now.
 - EDIT FILE PATH BELOW TO BE CORRECT DIRECTORY

In [15]:
%%bash
cd /Users/kpitz/github/GLOMICON/intercomparison/Merged_Datasets/RPCA/
pwd
# run in correct conda environment (gemelli)
#Make biom file
conda run -n gemelli biom convert -i Qiime2_asv.tsv -o table.from_txt_json.biom --table-type="OTU table" --to-json
#add metadata files to biom file - change the merged_tax_table_for_biomm.txt and _merged_for_biom.txt files
conda run -n gemelli biom add-metadata -i table.from_txt_json.biom -o table.w_md.biom --observation-metadata-fp Qiime2_taxa.tsv --sample-metadata-fp Qiime2_meta.tsv

/Users/kpitz/github/GLOMICON/intercomparison/Merged_Datasets/RPCA


## Run RPCA

In [16]:
from biom import load_table
from gemelli.rpca import rpca
# import the data table
table = load_table('/Users/kpitz/github/GLOMICON/intercomparison/Merged_Datasets/RPCA/table.w_md.biom')
# perform RPCA
ordination, distance = rpca(table, min_sample_count=500)


  mat = np.log(matrix_closure(mat))


In [17]:
ordination.proportion_explained

PC1    0.573971
PC2    0.388929
PC3    0.037100
dtype: float64

In [18]:
# save whole ordination object

file = results_directory + Analizing_Institute + '_ordination'
ordination.write(file = file, format = "ordination")

# export proportion explained
file = results_directory + Analizing_Institute + '_prop_explained.csv'
print(file)
ordination.proportion_explained.to_csv(file)

# export scores
file = results_directory + Analizing_Institute + '_scores.csv'
print(file)
scores = pd.concat([ordination.samples, meta_lim], axis=1)
scores.to_csv(file)

# export loadings
file = results_directory + Analizing_Institute + '_loadings.csv'
print(file)
loadings = pd.concat([ordination.features, taxa_lim], axis=1)
loadings.to_csv(file)

# distance matrix:
file = results_directory + Analizing_Institute + '_distance.csv'
print(file)
df_distance = pd.DataFrame(distance.data, distance.ids, distance.ids)
df_distance.to_csv(file)
df_distance.head()

../RPCA/AWI_prop_explained.csv
../RPCA/AWI_scores.csv
../RPCA/AWI_loadings.csv
../RPCA/AWI_distance.csv


Unnamed: 0,Davenport_02_0008,Davenport_06_0008,Davenport_11_0008,Davenport_15_0008,Davenport_19_0008,Framstrait_01_0008,Framstrait_05_0008,Framstrait_09_0008,Framstrait_13_0008,Framstrait_17_0008,...,LaJolla_03_0049,LaJolla_09_0049,LaJolla_15_0049,LaJolla_21_0049,LaJolla_27_0049,Plymouth_06_0049,Plymouth_13_0049,Plymouth_19_0049,Plymouth_29_0049,Plymouth_30_0049
Davenport_02_0008,0.0,0.054618,0.143042,0.255648,0.266803,2.223301,2.173862,1.872766,2.174032,2.07299,...,2.71522,2.747794,2.495645,2.694622,2.710109,2.268517,2.275382,2.430715,2.232895,2.236141
Davenport_06_0008,0.054618,0.0,0.114646,0.274594,0.254935,2.246254,2.195373,1.89093,2.195566,2.093756,...,2.670957,2.702669,2.45135,2.650051,2.665206,2.252983,2.258782,2.411235,2.21466,2.217766
Davenport_11_0008,0.143042,0.114646,0.0,0.222953,0.148166,2.35916,2.307922,2.003806,2.307951,2.206183,...,2.704261,2.733058,2.48432,2.68219,2.696572,2.339459,2.345135,2.493924,2.297524,2.297978
Davenport_15_0008,0.255648,0.274594,0.222953,0.0,0.164459,2.420401,2.37697,2.085174,2.377473,2.278945,...,2.896053,2.926399,2.67295,2.87318,2.891511,2.456569,2.46884,2.625003,2.420341,2.413605
Davenport_19_0008,0.266803,0.254935,0.148166,0.164459,0.0,2.488833,2.439779,2.139385,2.439846,2.339029,...,2.800326,2.826908,2.579202,2.776883,2.791998,2.459778,2.467573,2.615371,2.416359,2.41187


### Make New Folder and move new files there:

- be careful that naming structure works because files are being deleted from one place and moved to another

In [19]:
#current directory where files are located:
print(results_directory)

'''# datetime object containing current date and time
now = datetime.now()
print("now =", now)
dt_string = now.strftime("%Y%m%d")
print("date and time =", dt_string)'''

new_dir = results_directory + Analizing_Institute + '/'
#New directory files will be moved to:
print(new_dir)

../RPCA/
../RPCA/AWI/


In [20]:
if not os.path.exists(new_dir):
    os.makedirs(new_dir)
search = results_directory+'*.csv'
print(search)
# Get list of files present in current directory (to move):
files = glob.glob(search)
files = glob.glob(results_directory+'*.csv') + glob.glob(results_directory+'*.tsv') + glob.glob(results_directory+'*.biom')+ glob.glob(results_directory+'*ordination')
print(files)
print('Moving files to subdirectory:')
for i in files:
    file = i
    new_file = i.replace(results_directory, new_dir)
    print(file)
    print(new_file)
    os.rename(file, new_file)

../RPCA/*.csv
['../RPCA/AWI_scores.csv', '../RPCA/AWI_prop_explained.csv', '../RPCA/AWI_distance.csv', '../RPCA/AWI_loadings.csv', '../RPCA/Qiime2_meta.tsv', '../RPCA/Qiime2_asv.tsv', '../RPCA/Qiime2_taxa.tsv', '../RPCA/table.from_txt_json.biom', '../RPCA/table.w_md.biom', '../RPCA/AWI_ordination']
Moving files to subdirectory:
../RPCA/AWI_scores.csv
../RPCA/AWI/AWI_scores.csv
../RPCA/AWI_prop_explained.csv
../RPCA/AWI/AWI_prop_explained.csv
../RPCA/AWI_distance.csv
../RPCA/AWI/AWI_distance.csv
../RPCA/AWI_loadings.csv
../RPCA/AWI/AWI_loadings.csv
../RPCA/Qiime2_meta.tsv
../RPCA/AWI/Qiime2_meta.tsv
../RPCA/Qiime2_asv.tsv
../RPCA/AWI/Qiime2_asv.tsv
../RPCA/Qiime2_taxa.tsv
../RPCA/AWI/Qiime2_taxa.tsv
../RPCA/table.from_txt_json.biom
../RPCA/AWI/table.from_txt_json.biom
../RPCA/table.w_md.biom
../RPCA/AWI/table.w_md.biom
../RPCA/AWI_ordination
../RPCA/AWI/AWI_ordination


# SBR

In [21]:
Analizing_Institute = 'SBR'

## Limit data by metadata parameters

- Run each Analyzing Institute separately

In [22]:
df = meta_all.copy()
#print(df['depth'].max())
df = df.loc[df['Analyzing_Institute'] == Analizing_Institute]
df = df.loc[df['Collecting_Institute'].isin(['BLOOMMOCK', 'EVENMOCK'])==False]
df = df.loc[df['Collecting_Institute'].isna()==False]
print(df['Collecting_Institute'].unique())
meta_lim = df.copy()
df.head()

['UDalhousie' 'MBARI' 'NOAA' 'SBR' 'NOC']


Unnamed: 0_level_0,Analyzing_Institute,Collecting_Institute
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1
cj-DAL03,SBR,UDalhousie
cj-DAL09,SBR,UDalhousie
cj-DAL15,SBR,UDalhousie
cj-DAL21,SBR,UDalhousie
cj-DAL27,SBR,UDalhousie


In [23]:
otu_lim, taxa_lim = from_metadata_to_taxareads(meta_lim, otu_all, taxa_all)

## Create Biom File

In [24]:
# create limited biom file

# asv table
filename = results_directory + "Qiime2_asv.tsv"
#check filename
print(filename)

df = otu_lim.copy()
df.index.names = ['#OTUID']
df.to_csv(filename,sep='\t')

# taxa table
filename = results_directory + "Qiime2_taxa.tsv"
#check filename
print(filename)

df = taxa_lim.copy()
df.index.names = ['#OTUID']
df.to_csv(filename,sep='\t')

# metadata table
filename = results_directory + "Qiime2_meta.tsv"
#check filename
print(filename)
df = meta_lim.copy()
df.index.names = ['#SampleID']
df.to_csv(filename,sep='\t')


../RPCA/Qiime2_asv.tsv
../RPCA/Qiime2_taxa.tsv
../RPCA/Qiime2_meta.tsv


## Make BIOM file

 - biom commands in python seem buggy - run in bash for now.
 - EDIT FILE PATH BELOW TO BE CORRECT DIRECTORY

In [25]:
%%bash
cd /Users/kpitz/github/GLOMICON/intercomparison/Merged_Datasets/RPCA/
pwd
# run in correct conda environment (gemelli)
#Make biom file
conda run -n gemelli biom convert -i Qiime2_asv.tsv -o table.from_txt_json.biom --table-type="OTU table" --to-json
#add metadata files to biom file - change the merged_tax_table_for_biomm.txt and _merged_for_biom.txt files
conda run -n gemelli biom add-metadata -i table.from_txt_json.biom -o table.w_md.biom --observation-metadata-fp Qiime2_taxa.tsv --sample-metadata-fp Qiime2_meta.tsv

/Users/kpitz/github/GLOMICON/intercomparison/Merged_Datasets/RPCA


## Run RPCA

In [26]:
from biom import load_table
from gemelli.rpca import rpca
# import the data table
table = load_table('/Users/kpitz/github/GLOMICON/intercomparison/Merged_Datasets/RPCA/table.w_md.biom')
# perform RPCA
ordination, distance = rpca(table, min_sample_count=500)


  mat = np.log(matrix_closure(mat))


In [27]:
ordination.proportion_explained

PC1    0.539678
PC2    0.418951
PC3    0.041372
dtype: float64

In [28]:
# save whole ordination object

file = results_directory + Analizing_Institute + '_ordination'
ordination.write(file = file, format = "ordination")

# export proportion explained
file = results_directory + Analizing_Institute + '_prop_explained.csv'
print(file)
ordination.proportion_explained.to_csv(file)

# export scores
file = results_directory + Analizing_Institute + '_scores.csv'
print(file)
scores = pd.concat([ordination.samples, meta_lim], axis=1)
scores.to_csv(file)

# export loadings
file = results_directory + Analizing_Institute + '_loadings.csv'
print(file)
loadings = pd.concat([ordination.features, taxa_lim], axis=1)
loadings.to_csv(file)

# distance matrix:
file = results_directory + Analizing_Institute + '_distance.csv'
print(file)
df_distance = pd.DataFrame(distance.data, distance.ids, distance.ids)
df_distance.to_csv(file)
df_distance.head()

../RPCA/SBR_prop_explained.csv
../RPCA/SBR_scores.csv
../RPCA/SBR_loadings.csv
../RPCA/SBR_distance.csv


Unnamed: 0,cj-DAL03,cj-DAL09,cj-DAL15,cj-DAL21,cj-DAL27,cj-MBA03,cj-MBA07,cj-MBA12,cj-MBA16,cj-MBA20,...,cj-ROS01,cj-ROS05,cj-ROS09,cj-ROS13,cj-ROS17,cj-SOC05,cj-SOC12,cj-SOC18,cj-SOC24,cj-SOC28
cj-DAL03,0.0,0.257658,0.478394,0.576236,0.23371,2.922666,3.062676,2.958917,2.569675,2.795052,...,3.061888,3.000755,3.011132,2.910567,2.882491,1.777126,1.597629,1.524727,1.603831,1.616649
cj-DAL09,0.257658,0.0,0.224059,0.331184,0.115012,2.765269,2.882369,2.792616,2.382412,2.621774,...,2.967741,2.904746,2.924157,2.824443,2.803867,1.578928,1.390807,1.36465,1.401571,1.408961
cj-DAL15,0.478394,0.224059,0.0,0.186749,0.29582,2.602465,2.701442,2.623326,2.19962,2.446609,...,2.862552,2.797909,2.824892,2.727244,2.711782,1.409944,1.22081,1.236045,1.22932,1.237374
cj-DAL21,0.576236,0.331184,0.186749,0.0,0.353595,2.626079,2.709448,2.636,2.191048,2.46468,...,2.872325,2.809437,2.83959,2.738463,2.73346,1.317381,1.109967,1.152967,1.141544,1.12803
cj-DAL27,0.23371,0.115012,0.29582,0.353595,0.0,2.791601,2.913238,2.817344,2.405793,2.653754,...,2.952822,2.891549,2.908645,2.8062,2.788278,1.565975,1.376996,1.330959,1.392672,1.396278


### Make New Folder and move new files there:

- be careful that naming structure works because files are being deleted from one place and moved to another

In [29]:
#current directory where files are located:
print(results_directory)

'''# datetime object containing current date and time
now = datetime.now()
print("now =", now)
dt_string = now.strftime("%Y%m%d")
print("date and time =", dt_string)'''

new_dir = results_directory + Analizing_Institute + '/'
#New directory files will be moved to:
print(new_dir)

../RPCA/
../RPCA/SBR/


In [30]:
if not os.path.exists(new_dir):
    os.makedirs(new_dir)
search = results_directory+'*.csv'
print(search)
# Get list of files present in current directory (to move):
files = glob.glob(search)
files = glob.glob(results_directory+'*.csv') + glob.glob(results_directory+'*.tsv') + glob.glob(results_directory+'*.biom')+ glob.glob(results_directory+'*ordination')
print(files)
print('Moving files to subdirectory:')
for i in files:
    file = i
    new_file = i.replace(results_directory, new_dir)
    print(file)
    print(new_file)
    os.rename(file, new_file)

../RPCA/*.csv
['../RPCA/SBR_scores.csv', '../RPCA/SBR_prop_explained.csv', '../RPCA/SBR_distance.csv', '../RPCA/SBR_loadings.csv', '../RPCA/Qiime2_meta.tsv', '../RPCA/Qiime2_asv.tsv', '../RPCA/Qiime2_taxa.tsv', '../RPCA/table.from_txt_json.biom', '../RPCA/table.w_md.biom', '../RPCA/SBR_ordination']
Moving files to subdirectory:
../RPCA/SBR_scores.csv
../RPCA/SBR/SBR_scores.csv
../RPCA/SBR_prop_explained.csv
../RPCA/SBR/SBR_prop_explained.csv
../RPCA/SBR_distance.csv
../RPCA/SBR/SBR_distance.csv
../RPCA/SBR_loadings.csv
../RPCA/SBR/SBR_loadings.csv
../RPCA/Qiime2_meta.tsv
../RPCA/SBR/Qiime2_meta.tsv
../RPCA/Qiime2_asv.tsv
../RPCA/SBR/Qiime2_asv.tsv
../RPCA/Qiime2_taxa.tsv
../RPCA/SBR/Qiime2_taxa.tsv
../RPCA/table.from_txt_json.biom
../RPCA/SBR/table.from_txt_json.biom
../RPCA/table.w_md.biom
../RPCA/SBR/table.w_md.biom
../RPCA/SBR_ordination
../RPCA/SBR/SBR_ordination


# MBARI

In [31]:
Analizing_Institute = 'MBARI'

## Limit data by metadata parameters

- Run each Analyzing Institute separately

In [32]:
df = meta_all.copy()
#print(df['depth'].max())
df = df.loc[df['Analyzing_Institute'] == Analizing_Institute]
df = df.loc[df['Collecting_Institute'].isin(['BLOOMMOCK', 'EVENMOCK'])==False]
df = df.loc[df['Collecting_Institute'].isna()==False]
print(df['Collecting_Institute'].unique())
meta_lim = df.copy()
df.head()

['MBARI' 'NOAA' 'NOC' 'UDalhousie']


Unnamed: 0_level_0,Analyzing_Institute,Collecting_Institute
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1
CN18Sc37_12_Rep_Stdy5_AO,MBARI,MBARI
CN18Sc37_12_Rep_Stdy10_AO,MBARI,MBARI
CN18Sc37_12_Rep_Stdy14_AO,MBARI,MBARI
CN18Sc37_12_Rep_Stdy18_AO,MBARI,MBARI
CN18Sc37_12_Rep_Stdy22_AO,MBARI,MBARI


In [33]:
otu_lim, taxa_lim = from_metadata_to_taxareads(meta_lim, otu_all, taxa_all)

## Create Biom File

In [34]:
# create limited biom file

# asv table
filename = results_directory + "Qiime2_asv.tsv"
#check filename
print(filename)

df = otu_lim.copy()
df.index.names = ['#OTUID']
df.to_csv(filename,sep='\t')

# taxa table
filename = results_directory + "Qiime2_taxa.tsv"
#check filename
print(filename)

df = taxa_lim.copy()
df.index.names = ['#OTUID']
df.to_csv(filename,sep='\t')

# metadata table
filename = results_directory + "Qiime2_meta.tsv"
#check filename
print(filename)
df = meta_lim.copy()
df.index.names = ['#SampleID']
df.to_csv(filename,sep='\t')


../RPCA/Qiime2_asv.tsv
../RPCA/Qiime2_taxa.tsv
../RPCA/Qiime2_meta.tsv


## Make BIOM file

 - biom commands in python seem buggy - run in bash for now.
 - EDIT FILE PATH BELOW TO BE CORRECT DIRECTORY

In [35]:
%%bash
cd /Users/kpitz/github/GLOMICON/intercomparison/Merged_Datasets/RPCA/
pwd
# run in correct conda environment (gemelli)
#Make biom file
conda run -n gemelli biom convert -i Qiime2_asv.tsv -o table.from_txt_json.biom --table-type="OTU table" --to-json
#add metadata files to biom file - change the merged_tax_table_for_biomm.txt and _merged_for_biom.txt files
conda run -n gemelli biom add-metadata -i table.from_txt_json.biom -o table.w_md.biom --observation-metadata-fp Qiime2_taxa.tsv --sample-metadata-fp Qiime2_meta.tsv

/Users/kpitz/github/GLOMICON/intercomparison/Merged_Datasets/RPCA


## Run RPCA

In [36]:
from biom import load_table
from gemelli.rpca import rpca
# import the data table
table = load_table('/Users/kpitz/github/GLOMICON/intercomparison/Merged_Datasets/RPCA/table.w_md.biom')
# perform RPCA
ordination, distance = rpca(table, min_sample_count=500)


  mat = np.log(matrix_closure(mat))


In [37]:
ordination.proportion_explained

PC1    0.513191
PC2    0.481574
PC3    0.005235
dtype: float64

In [38]:
# save whole ordination object

file = results_directory + Analizing_Institute + '_ordination'
ordination.write(file = file, format = "ordination")

# export proportion explained
file = results_directory + Analizing_Institute + '_prop_explained.csv'
print(file)
ordination.proportion_explained.to_csv(file)

# export scores
file = results_directory + Analizing_Institute + '_scores.csv'
print(file)
scores = pd.concat([ordination.samples, meta_lim], axis=1)
scores.to_csv(file)

# export loadings
file = results_directory + Analizing_Institute + '_loadings.csv'
print(file)
loadings = pd.concat([ordination.features, taxa_lim], axis=1)
loadings.to_csv(file)

# distance matrix:
file = results_directory + Analizing_Institute + '_distance.csv'
print(file)
df_distance = pd.DataFrame(distance.data, distance.ids, distance.ids)
df_distance.to_csv(file)
df_distance.head()

../RPCA/MBARI_prop_explained.csv
../RPCA/MBARI_scores.csv
../RPCA/MBARI_loadings.csv
../RPCA/MBARI_distance.csv


Unnamed: 0,CN18Sc37_12_Rep_Stdy5_AO,CN18Sc37_12_Rep_Stdy10_AO,CN18Sc37_12_Rep_Stdy14_AO,CN18Sc37_12_Rep_Stdy18_AO,CN18Sc37_12_Rep_Stdy22_AO,NOAA11_AO,NOAA14_AO,NOAA20_AO,NOC2_AO,NOC9_AO,NOC12_AO,NOC31_AO,NOC51_AO,UDalhousie2_AO,UDalhousie8_AO,UDalhousie14_AO,UDalhousie20_AO,UDalhousie26_AO
CN18Sc37_12_Rep_Stdy5_AO,0.0,0.283544,0.283763,0.280933,0.313148,1.330479,1.28912,1.451128,2.648804,2.650215,2.554497,2.495903,2.655605,2.632686,2.746353,2.637815,2.665468,2.721659
CN18Sc37_12_Rep_Stdy10_AO,0.283544,0.0,0.152406,0.141485,0.176835,1.448886,1.414348,1.589437,2.748127,2.743078,2.650771,2.591364,2.747154,2.461958,2.581043,2.468614,2.49877,2.557126
CN18Sc37_12_Rep_Stdy14_AO,0.283763,0.152406,0.0,0.104579,0.216762,1.544572,1.505472,1.679233,2.846519,2.842286,2.746321,2.693236,2.84797,2.602783,2.721853,2.609297,2.640426,2.696657
CN18Sc37_12_Rep_Stdy18_AO,0.280933,0.141485,0.104579,0.0,0.115076,1.472572,1.432919,1.610932,2.76497,2.75917,2.662166,2.613077,2.765488,2.545689,2.66201,2.551051,2.582875,2.634206
CN18Sc37_12_Rep_Stdy22_AO,0.313148,0.176835,0.216762,0.115076,0.0,1.387033,1.348201,1.530441,2.66854,2.661034,2.564093,2.517025,2.667499,2.460206,2.573877,2.464528,2.496717,2.544002


### Make New Folder and move new files there:

- be careful that naming structure works because files are being deleted from one place and moved to another

In [39]:
#current directory where files are located:
print(results_directory)

'''# datetime object containing current date and time
now = datetime.now()
print("now =", now)
dt_string = now.strftime("%Y%m%d")
print("date and time =", dt_string)'''

new_dir = results_directory + Analizing_Institute + '/'
#New directory files will be moved to:
print(new_dir)

../RPCA/
../RPCA/MBARI/


In [40]:
if not os.path.exists(new_dir):
    os.makedirs(new_dir)
search = results_directory+'*.csv'
print(search)
# Get list of files present in current directory (to move):
files = glob.glob(search)
files = glob.glob(results_directory+'*.csv') + glob.glob(results_directory+'*.tsv') + glob.glob(results_directory+'*.biom')+ glob.glob(results_directory+'*ordination')
print(files)
print('Moving files to subdirectory:')
for i in files:
    file = i
    new_file = i.replace(results_directory, new_dir)
    print(file)
    print(new_file)
    os.rename(file, new_file)

../RPCA/*.csv
['../RPCA/MBARI_scores.csv', '../RPCA/MBARI_distance.csv', '../RPCA/MBARI_prop_explained.csv', '../RPCA/MBARI_loadings.csv', '../RPCA/Qiime2_meta.tsv', '../RPCA/Qiime2_asv.tsv', '../RPCA/Qiime2_taxa.tsv', '../RPCA/table.from_txt_json.biom', '../RPCA/table.w_md.biom', '../RPCA/MBARI_ordination']
Moving files to subdirectory:
../RPCA/MBARI_scores.csv
../RPCA/MBARI/MBARI_scores.csv
../RPCA/MBARI_distance.csv
../RPCA/MBARI/MBARI_distance.csv
../RPCA/MBARI_prop_explained.csv
../RPCA/MBARI/MBARI_prop_explained.csv
../RPCA/MBARI_loadings.csv
../RPCA/MBARI/MBARI_loadings.csv
../RPCA/Qiime2_meta.tsv
../RPCA/MBARI/Qiime2_meta.tsv
../RPCA/Qiime2_asv.tsv
../RPCA/MBARI/Qiime2_asv.tsv
../RPCA/Qiime2_taxa.tsv
../RPCA/MBARI/Qiime2_taxa.tsv
../RPCA/table.from_txt_json.biom
../RPCA/MBARI/table.from_txt_json.biom
../RPCA/table.w_md.biom
../RPCA/MBARI/table.w_md.biom
../RPCA/MBARI_ordination
../RPCA/MBARI/MBARI_ordination


# NOAA

In [41]:
Analizing_Institute = 'NOAA'

## Limit data by metadata parameters

- Run each Analyzing Institute separately

In [42]:
df = meta_all.copy()
#print(df['depth'].max())
df = df.loc[df['Analyzing_Institute'] == Analizing_Institute]
df = df.loc[df['Collecting_Institute'].isin(['BLOOMMOCK', 'EVENMOCK'])==False]
df = df.loc[df['Collecting_Institute'].isna()==False]
print(df['Collecting_Institute'].unique())
meta_lim = df.copy()
df.head()

['AWI' 'NOC' 'SBR']


Unnamed: 0_level_0,Analyzing_Institute,Collecting_Institute
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1
GLOMICON_AWI_12,NOAA,AWI
GLOMICON_AWI_16,NOAA,AWI
GLOMICON_AWI_20,NOAA,AWI
GLOMICON_AWI_4,NOAA,AWI
GLOMICON_AWI_8,NOAA,AWI


In [43]:
otu_lim, taxa_lim = from_metadata_to_taxareads(meta_lim, otu_all, taxa_all)

## Create Biom File

In [44]:
# create limited biom file

# asv table
filename = results_directory + "Qiime2_asv.tsv"
#check filename
print(filename)

df = otu_lim.copy()
df.index.names = ['#OTUID']
df.to_csv(filename,sep='\t')

# taxa table
filename = results_directory + "Qiime2_taxa.tsv"
#check filename
print(filename)

df = taxa_lim.copy()
df.index.names = ['#OTUID']
df.to_csv(filename,sep='\t')

# metadata table
filename = results_directory + "Qiime2_meta.tsv"
#check filename
print(filename)
df = meta_lim.copy()
df.index.names = ['#SampleID']
df.to_csv(filename,sep='\t')


../RPCA/Qiime2_asv.tsv
../RPCA/Qiime2_taxa.tsv
../RPCA/Qiime2_meta.tsv


## Make BIOM file

 - biom commands in python seem buggy - run in bash for now.
 - EDIT FILE PATH BELOW TO BE CORRECT DIRECTORY

In [45]:
%%bash
cd /Users/kpitz/github/GLOMICON/intercomparison/Merged_Datasets/RPCA/
pwd
# run in correct conda environment (gemelli)
#Make biom file
conda run -n gemelli biom convert -i Qiime2_asv.tsv -o table.from_txt_json.biom --table-type="OTU table" --to-json
#add metadata files to biom file - change the merged_tax_table_for_biomm.txt and _merged_for_biom.txt files
conda run -n gemelli biom add-metadata -i table.from_txt_json.biom -o table.w_md.biom --observation-metadata-fp Qiime2_taxa.tsv --sample-metadata-fp Qiime2_meta.tsv

/Users/kpitz/github/GLOMICON/intercomparison/Merged_Datasets/RPCA


## Run RPCA

In [46]:
from biom import load_table
from gemelli.rpca import rpca
# import the data table
table = load_table('/Users/kpitz/github/GLOMICON/intercomparison/Merged_Datasets/RPCA/table.w_md.biom')
# perform RPCA
ordination, distance = rpca(table, min_sample_count=500)


  mat = np.log(matrix_closure(mat))


In [47]:
ordination.proportion_explained

PC1    0.588569
PC2    0.401785
PC3    0.009646
dtype: float64

In [48]:
# save whole ordination object

file = results_directory + Analizing_Institute + '_ordination'
ordination.write(file = file, format = "ordination")

# export proportion explained
file = results_directory + Analizing_Institute + '_prop_explained.csv'
print(file)
ordination.proportion_explained.to_csv(file)

# export scores
file = results_directory + Analizing_Institute + '_scores.csv'
print(file)
scores = pd.concat([ordination.samples, meta_lim], axis=1)
scores.to_csv(file)

# export loadings
file = results_directory + Analizing_Institute + '_loadings.csv'
print(file)
loadings = pd.concat([ordination.features, taxa_lim], axis=1)
loadings.to_csv(file)

# distance matrix:
file = results_directory + Analizing_Institute + '_distance.csv'
print(file)
df_distance = pd.DataFrame(distance.data, distance.ids, distance.ids)
df_distance.to_csv(file)
df_distance.head()

../RPCA/NOAA_prop_explained.csv
../RPCA/NOAA_scores.csv
../RPCA/NOAA_loadings.csv
../RPCA/NOAA_distance.csv


Unnamed: 0,GLOMICON_AWI_12,GLOMICON_AWI_16,GLOMICON_AWI_20,GLOMICON_AWI_4,GLOMICON_AWI_8,GLOMICON_NOC_10,GLOMICON_NOC_16,GLOMICON_NOC_22,GLOMICON_NOC_3,GLOMICON_NOC_32,GLOMICON_Roscoff_11,GLOMICON_Roscoff_15,GLOMICON_Roscoff_19,GLOMICON_Roscoff_3,GLOMICON_Roscoff_7
GLOMICON_AWI_12,0.0,0.085268,0.148385,0.157048,0.228674,2.374761,2.569904,2.379059,2.606283,2.503908,2.740263,2.468494,2.12624,2.49518,2.643843
GLOMICON_AWI_16,0.085268,0.0,0.19412,0.208968,0.15084,2.324451,2.523655,2.332603,2.566793,2.458967,2.714493,2.438659,2.08208,2.471294,2.621791
GLOMICON_AWI_20,0.148385,0.19412,0.0,0.260097,0.318058,2.306495,2.494421,2.306108,2.519942,2.426663,2.764448,2.486976,2.140965,2.517067,2.667649
GLOMICON_AWI_4,0.157048,0.208968,0.260097,0.0,0.296725,2.458144,2.651631,2.457438,2.686395,2.58484,2.666883,2.407004,2.095955,2.42122,2.565102
GLOMICON_AWI_8,0.228674,0.15084,0.318058,0.296725,0.0,2.242374,2.447449,2.254222,2.501637,2.384664,2.608658,2.329156,1.958309,2.368621,2.520953


### Make New Folder and move new files there:

- be careful that naming structure works because files are being deleted from one place and moved to another

In [49]:
#current directory where files are located:
print(results_directory)

'''# datetime object containing current date and time
now = datetime.now()
print("now =", now)
dt_string = now.strftime("%Y%m%d")
print("date and time =", dt_string)'''

new_dir = results_directory + Analizing_Institute + '/'
#New directory files will be moved to:
print(new_dir)

../RPCA/
../RPCA/NOAA/


In [50]:
if not os.path.exists(new_dir):
    os.makedirs(new_dir)
search = results_directory+'*.csv'
print(search)
# Get list of files present in current directory (to move):
files = glob.glob(search)
files = glob.glob(results_directory+'*.csv') + glob.glob(results_directory+'*.tsv') + glob.glob(results_directory+'*.biom')+ glob.glob(results_directory+'*ordination')
print(files)
print('Moving files to subdirectory:')
for i in files:
    file = i
    new_file = i.replace(results_directory, new_dir)
    print(file)
    print(new_file)
    os.rename(file, new_file)

../RPCA/*.csv
['../RPCA/NOAA_scores.csv', '../RPCA/NOAA_distance.csv', '../RPCA/NOAA_loadings.csv', '../RPCA/NOAA_prop_explained.csv', '../RPCA/Qiime2_meta.tsv', '../RPCA/Qiime2_asv.tsv', '../RPCA/Qiime2_taxa.tsv', '../RPCA/table.from_txt_json.biom', '../RPCA/table.w_md.biom', '../RPCA/NOAA_ordination']
Moving files to subdirectory:
../RPCA/NOAA_scores.csv
../RPCA/NOAA/NOAA_scores.csv
../RPCA/NOAA_distance.csv
../RPCA/NOAA/NOAA_distance.csv
../RPCA/NOAA_loadings.csv
../RPCA/NOAA/NOAA_loadings.csv
../RPCA/NOAA_prop_explained.csv
../RPCA/NOAA/NOAA_prop_explained.csv
../RPCA/Qiime2_meta.tsv
../RPCA/NOAA/Qiime2_meta.tsv
../RPCA/Qiime2_asv.tsv
../RPCA/NOAA/Qiime2_asv.tsv
../RPCA/Qiime2_taxa.tsv
../RPCA/NOAA/Qiime2_taxa.tsv
../RPCA/table.from_txt_json.biom
../RPCA/NOAA/table.from_txt_json.biom
../RPCA/table.w_md.biom
../RPCA/NOAA/table.w_md.biom
../RPCA/NOAA_ordination
../RPCA/NOAA/NOAA_ordination


# UDalhousie

In [51]:
Analizing_Institute = 'UDAL'

## Limit data by metadata parameters

- Run each Analyzing Institute separately

In [52]:
df = meta_all.copy()
#print(df['depth'].max())
df = df.loc[df['Analyzing_Institute'] == Analizing_Institute]
df = df.loc[df['Collecting_Institute'].isin(['BLOOMMOCK', 'EVENMOCK'])==False]
df = df.loc[df['Collecting_Institute'].isna()==False]
print(df['Collecting_Institute'].unique())
meta_lim = df.copy()
df.head()

['AWI' 'NOC' 'NOAA' 'UDalhousie' 'SBR']


Unnamed: 0_level_0,Analyzing_Institute,Collecting_Institute
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1
G8r-AWI19,UDAL,AWI
G19r-AWI7,UDAL,AWI
G12r-NOC23,UDAL,NOC
E-G2-NOAA29,UDAL,NOAA
G22r-DAL24,UDAL,UDalhousie


In [53]:
otu_lim, taxa_lim = from_metadata_to_taxareads(meta_lim, otu_all, taxa_all)

## Create Biom File

In [54]:
# create limited biom file

# asv table
filename = results_directory + "Qiime2_asv.tsv"
#check filename
print(filename)

df = otu_lim.copy()
df.index.names = ['#OTUID']
df.to_csv(filename,sep='\t')

# taxa table
filename = results_directory + "Qiime2_taxa.tsv"
#check filename
print(filename)

df = taxa_lim.copy()
df.index.names = ['#OTUID']
df.to_csv(filename,sep='\t')

# metadata table
filename = results_directory + "Qiime2_meta.tsv"
#check filename
print(filename)
df = meta_lim.copy()
df.index.names = ['#SampleID']
df.to_csv(filename,sep='\t')


../RPCA/Qiime2_asv.tsv
../RPCA/Qiime2_taxa.tsv
../RPCA/Qiime2_meta.tsv


## Make BIOM file

 - biom commands in python seem buggy - run in bash for now.
 - EDIT FILE PATH BELOW TO BE CORRECT DIRECTORY

In [55]:
%%bash
cd /Users/kpitz/github/GLOMICON/intercomparison/Merged_Datasets/RPCA/
pwd
# run in correct conda environment (gemelli)
#Make biom file
conda run -n gemelli biom convert -i Qiime2_asv.tsv -o table.from_txt_json.biom --table-type="OTU table" --to-json
#add metadata files to biom file - change the merged_tax_table_for_biomm.txt and _merged_for_biom.txt files
conda run -n gemelli biom add-metadata -i table.from_txt_json.biom -o table.w_md.biom --observation-metadata-fp Qiime2_taxa.tsv --sample-metadata-fp Qiime2_meta.tsv

/Users/kpitz/github/GLOMICON/intercomparison/Merged_Datasets/RPCA


## Run RPCA

In [56]:
from biom import load_table
from gemelli.rpca import rpca
# import the data table
table = load_table('/Users/kpitz/github/GLOMICON/intercomparison/Merged_Datasets/RPCA/table.w_md.biom')
# perform RPCA
ordination, distance = rpca(table, min_sample_count=500)


  mat = np.log(matrix_closure(mat))


In [57]:
ordination.proportion_explained

PC1    0.554822
PC2    0.379224
PC3    0.065955
dtype: float64

In [58]:
# save whole ordination object

file = results_directory + Analizing_Institute + '_ordination'
ordination.write(file = file, format = "ordination")

# export proportion explained
file = results_directory + Analizing_Institute + '_prop_explained.csv'
print(file)
ordination.proportion_explained.to_csv(file)

# export scores
file = results_directory + Analizing_Institute + '_scores.csv'
print(file)
scores = pd.concat([ordination.samples, meta_lim], axis=1)
scores.to_csv(file)

# export loadings
file = results_directory + Analizing_Institute + '_loadings.csv'
print(file)
loadings = pd.concat([ordination.features, taxa_lim], axis=1)
loadings.to_csv(file)

# distance matrix:
file = results_directory + Analizing_Institute + '_distance.csv'
print(file)
df_distance = pd.DataFrame(distance.data, distance.ids, distance.ids)
df_distance.to_csv(file)
df_distance.head()

../RPCA/UDAL_prop_explained.csv
../RPCA/UDAL_scores.csv
../RPCA/UDAL_loadings.csv
../RPCA/UDAL_distance.csv


Unnamed: 0,G8r-AWI19,G19r-AWI7,G12r-NOC23,E-G2-NOAA29,G22r-DAL24,E-G16-NOC11,G4r-ROS16,G7r-NOC20,E-G14-ROS20,E-G13-NOAA23,...,E-G3-DAL6,G9r-NOAA17,G23r-NOAA12,G17r-DAL12,G25r-ROS8,E-G6-DAL18,E-G15-AWI3,G11r-DAL30,G20r-ROS12,E-G1-AWI11
G8r-AWI19,0.0,0.264801,1.772755,1.785528,3.080397,1.773684,3.462982,2.133165,2.718589,1.696005,...,2.718259,1.638764,1.423419,3.091847,3.251021,3.18775,0.227845,3.016081,3.363564,0.051225
G19r-AWI7,0.264801,0.0,1.890152,1.899402,2.990714,1.880322,3.594093,2.233382,2.844422,1.804158,...,2.682503,1.785078,1.429424,2.999132,3.369366,3.123533,0.235951,2.933013,3.491712,0.258206
G12r-NOC23,1.772755,1.890152,0.0,0.890163,3.104786,0.323908,2.039363,0.451474,1.474042,0.87441,...,2.743862,1.261231,0.930157,3.140819,1.796511,3.163053,1.664953,3.095381,1.965983,1.731339
E-G2-NOAA29,1.785528,1.899402,0.890163,0.0,2.441426,0.592976,1.700835,0.830991,0.948354,0.102048,...,1.966383,0.522024,0.880481,2.479369,1.480811,2.431961,1.730151,2.403579,1.595286,1.751639
G22r-DAL24,3.080397,2.990714,3.104786,2.441426,0.0,2.81021,3.220551,2.954352,2.708599,2.410254,...,0.71991,2.480154,2.323504,0.051172,3.03059,0.356652,3.006134,0.17461,3.109776,3.055572


### Make New Folder and move new files there:

- be careful that naming structure works because files are being deleted from one place and moved to another

In [59]:
#current directory where files are located:
print(results_directory)

'''# datetime object containing current date and time
now = datetime.now()
print("now =", now)
dt_string = now.strftime("%Y%m%d")
print("date and time =", dt_string)'''

new_dir = results_directory + Analizing_Institute + '/'
#New directory files will be moved to:
print(new_dir)

../RPCA/
../RPCA/UDAL/


In [60]:
if not os.path.exists(new_dir):
    os.makedirs(new_dir)
search = results_directory+'*.csv'
print(search)
# Get list of files present in current directory (to move):
files = glob.glob(search)
files = glob.glob(results_directory+'*.csv') + glob.glob(results_directory+'*.tsv') + glob.glob(results_directory+'*.biom')+ glob.glob(results_directory+'*ordination')
print(files)
print('Moving files to subdirectory:')
for i in files:
    file = i
    new_file = i.replace(results_directory, new_dir)
    print(file)
    print(new_file)
    os.rename(file, new_file)

../RPCA/*.csv
['../RPCA/UDAL_prop_explained.csv', '../RPCA/UDAL_loadings.csv', '../RPCA/UDAL_scores.csv', '../RPCA/UDAL_distance.csv', '../RPCA/Qiime2_meta.tsv', '../RPCA/Qiime2_asv.tsv', '../RPCA/Qiime2_taxa.tsv', '../RPCA/table.from_txt_json.biom', '../RPCA/table.w_md.biom', '../RPCA/UDAL_ordination']
Moving files to subdirectory:
../RPCA/UDAL_prop_explained.csv
../RPCA/UDAL/UDAL_prop_explained.csv
../RPCA/UDAL_loadings.csv
../RPCA/UDAL/UDAL_loadings.csv
../RPCA/UDAL_scores.csv
../RPCA/UDAL/UDAL_scores.csv
../RPCA/UDAL_distance.csv
../RPCA/UDAL/UDAL_distance.csv
../RPCA/Qiime2_meta.tsv
../RPCA/UDAL/Qiime2_meta.tsv
../RPCA/Qiime2_asv.tsv
../RPCA/UDAL/Qiime2_asv.tsv
../RPCA/Qiime2_taxa.tsv
../RPCA/UDAL/Qiime2_taxa.tsv
../RPCA/table.from_txt_json.biom
../RPCA/UDAL/table.from_txt_json.biom
../RPCA/table.w_md.biom
../RPCA/UDAL/table.w_md.biom
../RPCA/UDAL_ordination
../RPCA/UDAL/UDAL_ordination
