In [9]:
import pandas as pd
from functools import reduce

In [24]:
def abundance_table(file_list, species=True):
    abundance_frames = []
    
    for file in file_list:
        df = pd.read_csv(file, sep='\t')
        sample_name = file.split('.')[0]
        df = df[['name', 'fraction_total_reads']]
        df.rename(columns={'fraction_total_reads': sample_name}, inplace=True)
        abundance_frames.append(df)
    
    abundance_matrix = reduce(lambda left, right: pd.merge(left, right, on='name', how='outer'), abundance_frames).fillna(0)
    if species == True:
        abundance_matrix.columns = ['Species', 'Sample1', 'Sample2', 'Sample3', 'Sample4']
    else:
        abundance_matrix.columns = ['Genus', 'Sample1', 'Sample2', 'Sample3', 'Sample4']
    
    abundance_matrix = abundance_matrix.loc[
        (abundance_matrix[['Sample1', 'Sample2', 'Sample3', 'Sample4']] != 0).any(axis=1)
    ]
    
    return abundance_matrix

In [25]:
# Soil microbe species
abundance_matrix_soil = abundance_table({
    'barcode13.02.bracken.microb.ncbi.txt',
    'barcode14.02.bracken.microb.ncbi.txt',
    'barcode15.02.bracken.microb.ncbi.txt',
    'barcode16.02.bracken.microb.ncbi.txt'
}, species=True)

abundance_matrix_soil.to_csv('species_abundance_soil.csv', index=False)
print(abundance_matrix_soil)

                            Species  Sample1  Sample2  Sample3  Sample4
0    s__2-12-FULL-68-20 sp001770655  0.00710  0.00000  0.00000  0.00000
1               s__AV55 sp003219335  0.00031  0.00000  0.00000  0.00000
2               s__AV55 sp003219435  0.00044  0.00000  0.00000  0.00000
3      s__Acinetobacter sp004336635  0.00305  0.00000  0.00000  0.00000
4     s__Actinomadura_B rubrobrunea  0.00178  0.01466  0.00610  0.00000
..                              ...      ...      ...      ...      ...
198           s__VBAS01 sp005882315  0.00000  0.00000  0.00177  0.00000
199       s__Variovorax sp001424835  0.00131  0.00000  0.00100  0.00000
200             s__Williamsia faeni  0.02030  0.00000  0.01482  0.00000
201          s__ZC4RG25 sp002919265  0.00417  0.00862  0.02649  0.02326
202          s__ZC4RG25 sp003242645  0.00928  0.04569  0.10282  0.06478

[203 rows x 5 columns]


In [32]:
# Soil microbe genus
abundance_matrix_soil_G = abundance_table({
    'barcode13.G02.bracken.microb.txt',
    'barcode14.G02.bracken.microb.txt',
    'barcode15.G02.bracken.microb.txt',
    'barcode16.G02.bracken.microb.txt'
}, species=False)

abundance_matrix_soil_G.to_csv('genus_abundance_soil.csv', index=False)
print(abundance_matrix_soil_G)

                  Genus  Sample1  Sample2  Sample3  Sample4
0    g__2-12-FULL-68-20  0.00000  0.00000  0.00336  0.00000
1        g__20CM-2-65-7  0.00000  0.00000  0.00333  0.00000
2               g__AR12  0.00000  0.00000  0.00028  0.00000
3               g__AR31  0.00000  0.00103  0.00275  0.00000
4                g__AV2  0.00000  0.00000  0.00024  0.00000
..                  ...      ...      ...      ...      ...
175           g__VAZQ01  0.00000  0.00000  0.00026  0.00000
176           g__VBAS01  0.00000  0.00119  0.00000  0.00000
177       g__Variovorax  0.00000  0.00343  0.00283  0.00000
178       g__Williamsia  0.00000  0.00762  0.00976  0.00000
179          g__ZC4RG25  0.06308  0.08948  0.00983  0.04023

[180 rows x 5 columns]


In [31]:
# Soil core species
abundance_matrix_soil_core = abundance_table({
    'barcode13.bracken.txt',
    'barcode14.bracken.txt',
    'barcode15.bracken.txt',
    'barcode16.bracken.txt'
}, species=True)

abundance_matrix_soil_core.to_csv('species_abundance_soil_core.csv', index=False)
print(abundance_matrix_soil_core)

                       Species  Sample1  Sample2  Sample3  Sample4
0       Acetivibrio saccincola  0.00000  0.00000  0.00000  0.00084
1     Acetivibrio thermocellus  0.00000  0.00000  0.00014  0.00077
2    Acidobacteriota bacterium  0.00000  0.00000  0.00003  0.00000
3      Acinetobacter baumannii  0.00000  0.00000  0.00000  0.00864
4      Acinetobacter johnsonii  0.00000  0.00000  0.00096  0.00000
..                         ...      ...      ...      ...      ...
365   uncultured crenarchaeote  0.01208  0.02012  0.01418  0.03986
366          uncultured fungus  0.00587  0.00589  0.01693  0.00287
367        uncultured organism  0.00000  0.00000  0.02196  0.00000
368      uncultured prokaryote  0.00000  0.00000  0.01284  0.00000
369  uncultured soil bacterium  0.00000  0.00000  0.00808  0.00000

[367 rows x 5 columns]


In [33]:
# Soil core genus
abundance_matrix_soil_core_G = abundance_table({
    'barcode13.G.bracken.txt',
    'barcode14.G.bracken.txt',
    'barcode15.G.bracken.txt',
    'barcode16.G.bracken.txt'
}, species=False)

abundance_matrix_soil_core_G.to_csv('genus_abundance_soil_core.csv', index=False)
print(abundance_matrix_soil_core_G)

             Genus  Sample1  Sample2  Sample3  Sample4
0      Acetivibrio  0.00168  0.00167  0.00017  0.00737
1    Achromobacter  0.00056  0.00144  0.00018  0.00123
2       Acidovorax  0.00000  0.00161  0.00008  0.00000
3    Acinetobacter  0.00967  0.00236  0.00289  0.00000
4     Actinomadura  0.00200  0.00420  0.00004  0.00255
..             ...      ...      ...      ...      ...
211   Ureibacillus  0.00573  0.00449  0.00105  0.00242
212  Usitatibacter  0.00000  0.00081  0.00164  0.00107
213     Variovorax  0.00447  0.00409  0.00137  0.00533
214   Xanthobacter  0.00000  0.00000  0.00021  0.00000
215    Xanthomonas  0.00000  0.00000  0.00000  0.00074

[216 rows x 5 columns]


In [28]:
# Kefir microbe species
abundance_matrix_kefir = abundance_table({
    'barcode01.bracken.ncbi.txt',
    'barcode02.bracken.ncbi.txt',
    'barcode03.bracken.ncbi.txt',
    'barcode04.bracken.ncbi.txt'
}, species=True)

abundance_matrix_kefir.to_csv('species_abundance_kefir.csv', index=False)
print(abundance_matrix_kefir)

                           Species  Sample1  Sample2  Sample3  Sample4
1           s__Acetobacter fabarum  0.00001  0.00014  0.00000  0.00000
2     s__Acetobacter okinawensis_A  0.00000  0.00001  0.00000  0.00000
4        s__Acinetobacter albensis  0.00000  0.00000  0.00004  0.00000
10    s__Acinetobacter sp000214135  0.00000  0.00000  0.00001  0.00000
13    s__Acinetobacter sp002135415  0.00000  0.00000  0.00125  0.00000
..                             ...      ...      ...      ...      ...
150   s__Streptococcus ruminantium  0.00000  0.00000  0.00002  0.00000
151  s__Streptomyces griseocarneus  0.00003  0.00008  0.00001  0.00003
152  s__Tetragenococcus halophilus  0.00000  0.00001  0.00000  0.00001
153  s__Tetragenococcus muriaticus  0.00000  0.00002  0.00001  0.00002
154        s__Trueperella pyogenes  0.00000  0.00000  0.00001  0.00000

[91 rows x 5 columns]


In [29]:
# Kefir microbe genus
abundance_matrix_kefir_G = abundance_table({
    'barcode01.G.bracken.txt',
    'barcode02.G.bracken.txt',
    'barcode03.G.bracken.txt',
    'barcode04.G.bracken.txt'
}, species=False)

abundance_matrix_kefir_G.to_csv('genus_abundance_kefir.csv', index=False)
print(abundance_matrix_kefir_G)

                      Genus  Sample1  Sample2  Sample3  Sample4
0            g__Acetobacter  0.00028  0.00000  0.00002  0.00000
1          g__Acinetobacter  0.00014  0.00005  0.00004  0.00199
2              g__Aeromonas  0.00000  0.00000  0.00000  0.00001
4            g__Brochothrix  0.00000  0.00000  0.00000  0.00016
5         g__Carnobacterium  0.00000  0.00000  0.00000  0.00001
6            g__Citrobacter  0.00001  0.00000  0.00000  0.00000
8          g__Clostridium_P  0.00000  0.00002  0.00000  0.00000
9   g__Companilactobacillus  0.00005  0.00004  0.00002  0.00005
10       g__Corynebacterium  0.00001  0.00000  0.00000  0.00000
12          g__Enterobacter  0.00001  0.00000  0.00000  0.00000
13        g__Enterococcus_B  0.00009  0.00001  0.00000  0.00004
14        g__Enterococcus_G  0.00002  0.00001  0.00004  0.00002
18  g__Furfurilactobacillus  0.00022  0.00010  0.00034  0.00018
19       g__Fusobacterium_C  0.00001  0.00000  0.00000  0.00001
22          g__Klebsiella_A  0.00002  0.

In [27]:
# Kefir core species
abundance_matrix_kefir_core = abundance_table({
    'barcode01.bracken.core.txt',
    'barcode02.bracken.core.txt',
    'barcode03.bracken.core.txt',
    'barcode04.bracken.core.txt'
}, species=True)

abundance_matrix_kefir_core.to_csv('species_abundance_kefir_core.csv', index=False)
print(abundance_matrix_kefir_core)

                        Species  Sample1  Sample2  Sample3  Sample4
0        Acetobacter orientalis  0.00000  0.00000  0.00001  0.00000
1       Acinetobacter baumannii  0.00007  0.00000  0.00000  0.00000
3       Acinetobacter johnsonii  0.00004  0.00000  0.00000  0.00000
4           Acinetobacter junii  0.00001  0.00000  0.00000  0.00000
5         Acinetobacter lwoffii  0.00018  0.00000  0.00000  0.00000
..                          ...      ...      ...      ...      ...
111     Staphylococcus simulans  0.00000  0.00001  0.00001  0.00010
112  Streptococcus dysgalactiae  0.00000  0.00001  0.00000  0.00000
113   Streptococcus sp. ZY19097  0.00000  0.00000  0.00000  0.00001
114        Streptococcus uberis  0.00001  0.00000  0.00000  0.00000
115        Trueperella pyogenes  0.00001  0.00000  0.00000  0.00000

[101 rows x 5 columns]


In [30]:
# Kefir core genus
abundance_matrix_kefir_core_G = abundance_table({
    'barcode01.G.bracken.core.txt',
    'barcode02.G.bracken.core.txt',
    'barcode03.G.bracken.core.txt',
    'barcode04.G.bracken.core.txt'
}, species=False)

abundance_matrix_kefir_core_G.to_csv('genus_abundance_kefir_core.csv', index=False)
print(abundance_matrix_kefir_core_G)

                   Genus  Sample1  Sample2  Sample3  Sample4
0            Acetobacter  0.00001  0.00000  0.00001  0.00000
1          Acinetobacter  0.00000  0.00000  0.00001  0.00046
2              Aeromonas  0.00000  0.00000  0.00000  0.00002
3                    Bos  0.00005  0.00008  0.00016  0.00002
4            Brochothrix  0.00000  0.00000  0.00000  0.00017
5                Bubalus  0.00002  0.00004  0.00012  0.00001
6                  Capra  0.00123  0.00169  0.00480  0.00034
7         Carnobacterium  0.00000  0.00000  0.00000  0.00001
8            Citrobacter  0.00000  0.00000  0.00005  0.00000
9            Clostridium  0.00000  0.00005  0.00000  0.00000
10  Companilactobacillus  0.00000  0.00001  0.00002  0.00001
11       Corynebacterium  0.00019  0.00043  0.00024  0.00011
12         Cutibacterium  0.00000  0.00001  0.00001  0.00000
13             Dioscorea  0.00000  0.00000  0.00000  0.00053
14          Enterococcus  0.00001  0.00001  0.00011  0.00008
15               Erwinia