# Construct tables

In [1]:
import os
import csv
import functools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
CLOSE_PLOTS = True

In [3]:
OUTDIR = "out/tables"
IMGDIR = "out/images"

os.makedirs(OUTDIR, exist_ok=True)
os.makedirs(IMGDIR, exist_ok=True)

In [4]:
SOILS = [
    'Soil3', 'Soil5', 'Soil6', 'Soil9', 'Soil11', 
    'Soil12', 'Soil14', 'Soil15', 'Soil16', 'Soil17'
]

In [5]:
DMD_TOPHITS_FPATH = "out/dmnd_combined_top_hits.tsv"
df_dmd_tophits = pd.read_csv(
    DMD_TOPHITS_FPATH,
    sep="\t",
)
df_dmd_tophits

Unnamed: 0,taxid,ko,qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore
0,1,K12266,pbru:LZC95_42830,EHHOFP_00040,43.5,331,164,6,116,438,118,433,1.030000e-72,233.0
1,1,K12266,mrm:A7982_00688,EHHOFP_00785,47.4,247,121,1,150,396,140,377,5.900000e-59,199.0
2,1,K00372,artu:E7Y32_00510,EHHOFP_02725,24.4,664,404,24,71,665,76,710,2.250000e-19,89.7
3,1,K00360,sen:SACE_4911,EHHOFP_03050,27.6,243,147,10,28,261,81,303,1.390000e-08,53.5
4,1,K04748,cser:CCO03_02485,EHHOFP_03455,27.8,133,91,3,17,148,202,330,3.460000e-06,44.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1356,983594,K00363,bve:AK36_3846,WP_027791810.1,93.0,115,8,0,1,115,1,115,1.130000e-76,219.0
1357,983594,K00362,bam:Bamb_3936,WP_027791811.1,94.7,851,45,0,1,851,1,851,0.000000e+00,1620.0
1358,983594,K26138,pve:UC34_18440,WP_027792274.1,39.5,114,65,4,8,118,5,117,1.230000e-21,81.3
1359,983594,K26139,maq:Maqu_0173,WP_049096615.1,36.7,283,173,3,22,303,130,407,1.370000e-42,154.0


In [6]:
KO_INFO_FPATH = "data/ko_information.tsv"
DF_KO_INFO = pd.read_csv(
    KO_INFO_FPATH, sep="\t", index_col=0
)

def _mapfunc(s):
    s = s.lower()
    if "nitrate reductase" in s:
        return "nitrate reductase"
    elif "nitrite reductase" in s:
        return "nitrite reductase"
    elif "nitric oxide reductase" in s:
        return "nitric oxide reductase"
    elif "nitrous oxide reductase" in s or "nitrous-oxide reductase" in s:
        return "nitrous oxide reductase"
    elif "hydroxylamine reductase" in s:
        return "hydroxylamine reductase"
    else:
        return "other"
    
DF_KO_INFO["category"] = DF_KO_INFO["NAME"].apply(_mapfunc)

KO_LIST = list(DF_KO_INFO.index)

DF_KO_INFO

Unnamed: 0_level_0,SYMBOL,NAME,category
ENTRY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
K00360,nasB,assimilatory nitrate reductase electron transf...,nitrate reductase
K00361,nasB,nitrite reductase [NAD(P)H] [EC:1.7.1.4],nitrite reductase
K00362,nirB,nitrite reductase (NADH) large subunit [EC:1.7...,nitrite reductase
K00363,nirD,nitrite reductase (NADH) small subunit [EC:1.7...,nitrite reductase
K00366,nirA,ferredoxin-nitrite reductase [EC:1.7.7.1],nitrite reductase
K00367,narB,ferredoxin-nitrate reductase [EC:1.7.7.2],nitrate reductase
K00368,nirK,nitrite reductase (NO-forming) [EC:1.7.2.1],nitrite reductase
K00370,"narG, narZ, nxrA","nitrate reductase / nitrite oxidoreductase, al...",nitrate reductase
K00371,"narH, narY, nxrB","nitrate reductase / nitrite oxidoreductase, be...",nitrate reductase
K00372,"nasC, nasA",assimilatory nitrate reductase catalytic subun...,nitrate reductase


In [7]:
KO_CATEGORIES = sorted(DF_KO_INFO["category"].unique())
KO_CATEGORY_SETS = {
    cat: DF_KO_INFO[DF_KO_INFO["category"] == cat].index.values
    for cat in KO_CATEGORIES
}
for k, v in KO_CATEGORY_SETS.items():
    print(f"{k}:\n{v}\n")


hydroxylamine reductase:
['K05601']

nitrate reductase:
['K00360' 'K00367' 'K00370' 'K00371' 'K00372' 'K00373' 'K00374' 'K02567'
 'K02568' 'K02570' 'K02571' 'K10534']

nitric oxide reductase:
['K02164' 'K02305' 'K02448' 'K04561' 'K04747' 'K04748' 'K12264' 'K12265'
 'K12266' 'K15877']

nitrite reductase:
['K00361' 'K00362' 'K00363' 'K00366' 'K00368' 'K03385' 'K04017' 'K04018'
 'K15864' 'K15876' 'K17877' 'K19343' 'K26138' 'K26139']

nitrous oxide reductase:
['K00376' 'K19339']

other:
['K00844']



In [8]:
TAXA_INFO_FPATH = "data/taxid_to_scaffold.csv"

TAXA_DF = pd.read_csv(TAXA_INFO_FPATH)
TAXA_LIST = list(TAXA_DF[~pd.isna(TAXA_DF["taxid"])]["taxid"].values)

TAXA_DF


Unnamed: 0,taxid,species
0,1,Lee_A8Q_1_Ecoli_contig_1_polypolish
1,2849180,Parabacteroides_sp._MSK.9.14
2,10710,
3,305,Ralstonia_solanacearum_strain_KACC_10722
4,1280,CP013959.1_Staphylococcus_aureus_strain_V605
5,511145,
6,190485,NC_003902.1_Xanthomonas_campestris_pv._campest...
7,267608,NC_003295.1_Ralstonia_solanacearum_GMI1000
8,216595,NC_012660.1_Pseudomonas_fluorescens_SBW25
9,272558,NC_002570.2_Bacillus_halodurans_C-125


In [9]:
COVERAGE_DIR = "data/coverage_arrays"

coverage_filelist = os.listdir(COVERAGE_DIR)
coverage_filelist = [f for f in coverage_filelist if f.endswith(".npz")]

print(f"coverage files ({(len(coverage_filelist))}):", coverage_filelist)

coverage files (249): ['coverage_arrays_Soil17_CE_251_N3_16_None_T9.npz', 'coverage_arrays_Soil5_CE_201_0_None_T9.npz', 'coverage_arrays_Soil17_CE_251_N3_24_None_T9.npz', 'coverage_arrays_Soil9_CE_277_0_CHL_T9.npz', 'coverage_arrays_Soil15_CE_251_N5_0_None_T0.npz', 'coverage_arrays_Soil5_CE_201_-28_None_T9.npz', 'coverage_arrays_Soil12_CE_229_-6_None_T9.npz', 'coverage_arrays_Soil3_CE_239_9_None_T9.npz', 'coverage_arrays_Soil3_CE_239_0_CHL_No_Nitrate_T9.npz', 'coverage_arrays_Soil6_CE_73_-70_CHL_T9.npz', 'coverage_arrays_Soil5_CE_201_7_CHL_T9.npz', 'coverage_arrays_Soil15_CE_251_N5_22_CHL_T9.npz', 'coverage_arrays_Soil3_CE_239_0_None_No_Nitrate_T9.npz', 'coverage_arrays_Soil17_CE_251_N3_0_None_T0.npz', 'coverage_arrays_Soil17_CE_251_N3_6_CHL_T9.npz', 'coverage_arrays_Soil9_CE_277_-13_CHL_T9.npz', 'coverage_arrays_Soil14_CE_251_N2_0_None_No_Nitrate_T9.npz', 'coverage_arrays_Soil12_CE_229_3_None_T9.npz', 'coverage_arrays_Soil3_CE_239_0_None_T9.npz', 'coverage_arrays_Soil5_CE_201_-55_CHL_

In [10]:
sample_ids = [
    covfile.removeprefix("coverage_arrays_").removesuffix(".npz")
    for covfile in coverage_filelist
]
print(f"sample ids ({(len(sample_ids))}):", sample_ids)

sample ids (249): ['Soil17_CE_251_N3_16_None_T9', 'Soil5_CE_201_0_None_T9', 'Soil17_CE_251_N3_24_None_T9', 'Soil9_CE_277_0_CHL_T9', 'Soil15_CE_251_N5_0_None_T0', 'Soil5_CE_201_-28_None_T9', 'Soil12_CE_229_-6_None_T9', 'Soil3_CE_239_9_None_T9', 'Soil3_CE_239_0_CHL_No_Nitrate_T9', 'Soil6_CE_73_-70_CHL_T9', 'Soil5_CE_201_7_CHL_T9', 'Soil15_CE_251_N5_22_CHL_T9', 'Soil3_CE_239_0_None_No_Nitrate_T9', 'Soil17_CE_251_N3_0_None_T0', 'Soil17_CE_251_N3_6_CHL_T9', 'Soil9_CE_277_-13_CHL_T9', 'Soil14_CE_251_N2_0_None_No_Nitrate_T9', 'Soil12_CE_229_3_None_T9', 'Soil3_CE_239_0_None_T9', 'Soil5_CE_201_-55_CHL_T9', 'Soil11_CE_234_-10_CHL_T9', 'Soil17_CE_251_N3_-4_None_T9', 'Soil5_CE_201_-9_CHL_T9', 'Soil12_CE_229_-32_CHL_T9', 'Soil5_CE_201_-19_CHL_T9', 'Soil12_CE_229_-6_CHL_T9', 'Soil9_CE_277_0_None_T9', 'Soil16_CE_251_N6_8_None_T9', 'Soil6_CE_73_-17_CHL_T9', 'Soil11_CE_234_0_None_T0', 'Soil16_CE_251_N6_28_None_T9', 'Soil9_CE_277_-5_None_T9', 'Soil12_CE_229_6_None_T9', 'Soil17_CE_251_N3_28_None_T9', 'So

### Accepted Reads

In [11]:
KO_TABLE_DIR = "data/ko_tables"

data_rows = {}
for f in [f for f in os.listdir(KO_TABLE_DIR) if f.endswith(".tsv")]:
    with open(f"{KO_TABLE_DIR}/{f}", "r") as f:
        csvreader = csv.reader(f, delimiter="\t")
        header = next(csvreader)
        for row in csvreader:  # process each row
            sample_id, ko, avg_depth = row[0:3]
            if sample_id not in data_rows:
                data_rows[sample_id] = {}
            if ko in KO_LIST:
                data_rows[sample_id][ko] = avg_depth

# Convert nested dict → DataFrame
df_full = pd.DataFrame.from_dict(data_rows, orient="index", dtype=float)

# Optional: ensure columns follow KO_LIST order
df_full = df_full.reindex(columns=KO_LIST)

# Ensure all sample_ids appear as rows
all_sample_ids = set(df_full.index.values)
missing_samples = [s for s in all_sample_ids if s not in sample_ids]
df_full = df_full.reindex(all_sample_ids)
# Fill nan values
df_full = df_full.fillna(0.)

print(len(df_full))
df_full = df_full.drop(index=missing_samples)  # DROP PROBLEM SAMPLE

print(len(df_full))
for sample_id in missing_samples:
    all_sample_ids.remove(sample_id)
print(f"Dropped {missing_samples}")

# Add boolean screens for CHL+/-, T0/T9, Nitrate/No_Nitrate
screen_df = pd.DataFrame({
    "no_nitrate": df_full.index.str.contains("No_Nitrate"),
    "nitrate": ~df_full.index.str.contains("No_Nitrate"),
    "t0": df_full.index.str.contains("T0"),
    "t9": df_full.index.str.contains("T9"),
    "chl_pos": df_full.index.str.contains("CHL"),
    "chl_neg": df_full.index.str.contains("None"),
}, index=df_full.index)

# Prepend these to the KO columns
nscreens = screen_df.shape[1]
print(f"{nscreens} screen columns")
df_full = pd.concat([screen_df, df_full], axis=1)
df_full

260
249
Dropped ['Soil13_CE_251_N7_0_None_T0', 'Soil7_CE_153_0_None_T0', 'Soil19_CE_251_N4_0_None_T0', 'Soil20_CE_251_0_None_T0', 'Soil15_CE_251_N5_7_CHL_T9', 'Soil4_CE_56b_0_None_T0', 'Soil10_CE_253_0_None_T0', 'Soil1_CN_A4_0_None_T0', 'Soil8_CE_56a_0_None_T0', 'Soil18_CE_251_N1_0_None_T0', 'Soil2_CE_239_A12_0_None_T0']
6 screen columns


Unnamed: 0,no_nitrate,nitrate,t0,t9,chl_pos,chl_neg,K00360,K00361,K00362,K00363,...,K12265,K12266,K15864,K15876,K15877,K17877,K19339,K19343,K26138,K26139
Soil12_CE_229_-2_CHL_T9,False,True,False,True,True,False,17.19,4.40,439.53,0.0,...,0.00,626.43,0.00,0.0,1.25,43.40,13.15,1.90,0.26,0.31
Soil5_CE_201_-19_None_T9,False,True,False,True,False,True,46.73,0.68,944.65,0.0,...,0.00,838.63,0.00,0.0,0.00,19.13,128.81,22.07,0.32,0.00
Soil16_CE_251_N6_11_CHL_T9,False,True,False,True,True,False,9.62,6.15,530.71,0.0,...,0.00,542.39,0.00,0.0,3.25,48.20,41.12,1.71,0.43,0.19
Soil3_CE_239_-22_None_T9,False,True,False,True,False,True,15.52,3.06,931.11,0.0,...,0.00,1078.38,0.00,0.0,0.00,14.25,282.76,38.42,0.00,0.00
Soil16_CE_251_N6_-100_CHL_T9,False,True,False,True,True,False,30.22,10.64,841.91,0.0,...,0.00,587.09,0.00,0.0,7.51,79.74,67.15,2.19,1.54,1.31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Soil14_CE_251_N2_-2_None_T9,False,True,False,True,False,True,15.78,6.35,483.33,0.0,...,0.00,526.20,0.00,0.0,1.88,41.61,25.07,2.48,1.03,0.54
Soil6_CE_73_-70_CHL_T9,False,True,False,True,True,False,36.58,16.27,573.67,0.0,...,0.00,1608.86,0.00,0.0,1.65,48.12,84.80,1.46,0.00,0.53
Soil9_CE_277_0_None_T9,False,True,False,True,False,True,28.57,15.56,536.81,0.0,...,0.00,1024.81,0.42,0.0,0.00,26.73,66.90,9.22,0.00,0.00
Soil15_CE_251_N5_34_None_T9,False,True,False,True,False,True,35.85,8.76,627.05,0.0,...,0.00,438.33,0.00,0.0,2.77,96.07,34.16,0.18,0.82,0.70


In [12]:
# KO_ACCEPTED_SUBSETS_DIR = "data/ko_expression_accepted"
# map_to_total_depth = {}  # map KO -> SAMPLE_ID -> total avg depth
# all_sample_ids = set()
# for ko_idx, ko in enumerate(KO_LIST):
#     ko_results = {}
#     map_to_total_depth[ko] = ko_results
#     for soil_idx, soil in enumerate(SOILS):
#         # print(ko, soil_idx, soil)
#         bedfpath = f"{KO_ACCEPTED_SUBSETS_DIR}/subset_{ko}/{soil}_all_samples_{ko}.bed"
#         if not os.path.isfile(bedfpath):
#             continue
#         with open(bedfpath, "r") as f:
#             csvreader = csv.reader(f, delimiter=" ")
#             for row in csvreader:  # process each row
#                 sample_id, orf_id, avg_depth = row
#                 all_sample_ids.add(sample_id)
#                 avg_depth = float(avg_depth)
#                 if sample_id not in ko_results:
#                     ko_results[sample_id] = 0
#                 ko_results[sample_id] += avg_depth


# # Convert the nested dict into a DataFrame
# df_full = pd.DataFrame.from_dict(map_to_total_depth, orient="columns")

# # Ensure all sample_ids appear as rows
# df_full = df_full.reindex(all_sample_ids)
# # Fill nan values
# df_full = df_full.fillna(0.)

# print(len(df_full))
# df_full = df_full.drop(index="Soil15_CE_251_N5_7_CHL_T9")  # DROP PROBLEM SAMPLE
# print(len(df_full))
# all_sample_ids.remove("Soil15_CE_251_N5_7_CHL_T9")
# print("Dropped Soil15_CE_251_N5_7_CHL_T9")

# # Add boolean screens for CHL+/-, T0/T9, Nitrate/No_Nitrate
# screen_df = pd.DataFrame({
#     "no_nitrate": df_full.index.str.contains("No_Nitrate"),
#     "nitrate": ~df_full.index.str.contains("No_Nitrate"),
#     "t0": df_full.index.str.contains("T0"),
#     "t9": df_full.index.str.contains("T9"),
#     "chl_pos": df_full.index.str.contains("CHL"),
#     "chl_neg": df_full.index.str.contains("None"),
# }, index=df_full.index)

# # Prepend these to the KO columns
# nscreens = screen_df.shape[1]
# print(f"{nscreens} screen columns")
# df_full = pd.concat([screen_df, df_full], axis=1)
# df_full

In [13]:
# for index in df_full.index:
#     row1 = df_full.loc[index]
#     row2 = df_full_v2.loc[index]
#     for ko in row1[nscreens:].index:
#         if not np.allclose(row1[ko], row2[ko]):
#             print(ko, row1[ko], row2[ko])
#         # print(row1[ko], type(row1[ko]), row2[ko], type(row2[ko]))

In [14]:
##############################################################################
##  Construct sample subsets satisfying conditions of interest

sample_subsets = {}
print(f"{len(all_sample_ids)} total samples")

sample_subsets["no_nitrate"] = sorted(list(filter(
    lambda s: "No_Nitrate" in s, 
    all_sample_ids
)))
print(f"{len(sample_subsets["no_nitrate"])} no-nitrate samples")

sample_subsets["t0_samples"] = sorted(list(filter(
    lambda s: ("T0" in s) and ("No_Nitrate" not in s),
    all_sample_ids
)))
print(f"{len(sample_subsets["t0_samples"])} T0 samples")

sample_subsets["chl_pos_samples"] = sorted(list(filter(
    lambda s: ("T9" in s) and ("No_Nitrate" not in s) and ("CHL" in s),
    all_sample_ids
)))
print(f"{len(sample_subsets["chl_pos_samples"])} T9 CHL+ samples")

sample_subsets["chl_neg_samples"] = sorted(list(filter(
    lambda s: ("T9" in s) and ("No_Nitrate" not in s) and ("None" in s),
    all_sample_ids
)))
print(f"{len(sample_subsets["chl_neg_samples"])} T9 CHL- samples")

assert len(all_sample_ids) == len(functools.reduce(
    lambda x, y: x | y, (set(v) for v in sample_subsets.values())))

249 total samples
20 no-nitrate samples
10 T0 samples
109 T9 CHL+ samples
110 T9 CHL- samples


In [15]:
##############################################################################
##  Dataframe subsets

DF_SAMP_SUBSETS_ACCEPTED = {}
for k in ["t0_samples", "chl_pos_samples", "chl_neg_samples", "no_nitrate"]:
    DF_SAMP_SUBSETS_ACCEPTED[k] = df_full.loc[sample_subsets[k],:]    

In [16]:
DF_SAMP_SUBSETS_ACCEPTED["t0_samples"]

Unnamed: 0,no_nitrate,nitrate,t0,t9,chl_pos,chl_neg,K00360,K00361,K00362,K00363,...,K12265,K12266,K15864,K15876,K15877,K17877,K19339,K19343,K26138,K26139
Soil11_CE_234_0_None_T0,False,True,True,False,False,True,15.03,6.19,321.02,0.0,...,0.0,864.36,0.51,0.0,0.35,22.7,22.89,0.28,0.0,0.0
Soil12_CE_229_0_None_T0,False,True,True,False,False,True,20.19,9.36,493.21,0.0,...,0.0,755.07,0.0,0.0,2.14,57.76,16.2,1.97,2.41,1.51
Soil14_CE_251_N2_0_None_T0,False,True,True,False,False,True,15.46,10.96,447.95,0.0,...,0.0,621.5,0.0,0.0,3.12,42.0,27.07,0.3,0.65,0.59
Soil15_CE_251_N5_0_None_T0,False,True,True,False,False,True,29.21,13.74,826.92,0.0,...,0.0,1148.24,0.0,0.0,4.03,85.13,90.28,1.28,0.68,0.75
Soil16_CE_251_N6_0_None_T0,False,True,True,False,False,True,25.52,13.65,992.73,0.0,...,0.0,1024.82,0.0,0.0,7.1,86.1,93.71,2.36,1.7,1.08
Soil17_CE_251_N3_0_None_T0,False,True,True,False,False,True,27.43,11.37,649.49,0.0,...,0.38,831.74,0.0,0.0,4.53,74.06,44.73,5.6,0.0,1.81
Soil3_CE_239_0_None_T0,False,True,True,False,False,True,11.38,1.69,269.81,0.0,...,0.0,758.62,0.0,0.0,0.12,14.95,29.75,2.21,0.0,0.0
Soil5_CE_201_0_None_T0,False,True,True,False,False,True,17.12,3.53,301.0,0.0,...,0.0,651.3,0.0,0.0,0.01,15.19,38.64,2.91,0.82,0.0
Soil6_CE_73_0_None_T0,False,True,True,False,False,True,40.5,12.19,672.75,0.0,...,0.0,1414.82,0.0,0.0,1.29,52.28,75.95,1.86,0.0,0.36
Soil9_CE_277_0_None_T0,False,True,True,False,False,True,34.62,21.7,558.06,0.0,...,0.0,951.91,0.17,0.0,0.0,30.41,46.34,2.77,0.21,0.66


In [17]:
DF_SAMP_SUBSETS_ACCEPTED["chl_pos_samples"]

Unnamed: 0,no_nitrate,nitrate,t0,t9,chl_pos,chl_neg,K00360,K00361,K00362,K00363,...,K12265,K12266,K15864,K15876,K15877,K17877,K19339,K19343,K26138,K26139
Soil11_CE_234_-100_CHL_T9,False,True,False,True,True,False,24.01,5.63,382.16,0.0,...,0.0,1087.24,0.58,0.0,0.0,27.77,40.78,0.56,0.56,0.0
Soil11_CE_234_-10_CHL_T9,False,True,False,True,True,False,18.84,5.03,334.19,0.0,...,0.0,1107.29,0.49,0.0,0.0,26.29,38.10,0.57,0.53,0.0
Soil11_CE_234_-14_CHL_T9,False,True,False,True,True,False,14.78,6.13,325.79,0.0,...,0.0,907.05,0.26,0.0,0.0,20.67,35.72,1.72,0.56,0.0
Soil11_CE_234_-18_CHL_T9,False,True,False,True,True,False,18.17,5.16,364.49,0.0,...,0.0,1022.13,0.14,0.0,0.0,21.32,41.78,0.71,0.70,0.0
Soil11_CE_234_-25_CHL_T9,False,True,False,True,True,False,16.86,6.50,329.80,0.0,...,0.0,962.05,0.54,0.0,0.0,21.84,37.04,0.64,0.56,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Soil9_CE_277_-5_CHL_T9,False,True,False,True,True,False,35.30,12.55,521.23,0.0,...,0.0,1020.05,0.35,0.0,0.0,28.27,52.82,4.72,0.00,0.0
Soil9_CE_277_-75_CHL_T9,False,True,False,True,True,False,27.54,16.25,486.21,0.0,...,0.0,850.84,0.98,0.0,0.0,25.96,46.13,2.64,0.13,0.0
Soil9_CE_277_-7_CHL_T9,False,True,False,True,True,False,31.78,11.96,558.83,0.0,...,0.0,1099.48,0.55,0.0,0.0,30.88,55.42,2.87,0.21,0.0
Soil9_CE_277_0_CHL_T9,False,True,False,True,True,False,39.13,34.51,736.74,0.0,...,0.0,1142.98,1.20,0.0,0.0,41.70,51.34,4.14,0.18,0.0


In [18]:
DF_SAMP_SUBSETS_ACCEPTED["chl_neg_samples"]

Unnamed: 0,no_nitrate,nitrate,t0,t9,chl_pos,chl_neg,K00360,K00361,K00362,K00363,...,K12265,K12266,K15864,K15876,K15877,K17877,K19339,K19343,K26138,K26139
Soil11_CE_234_-100_None_T9,False,True,False,True,False,True,20.35,4.15,242.75,0.0,...,0.0,500.95,0.68,0.0,0.0,173.96,23.07,0.65,0.00,0.0
Soil11_CE_234_-10_None_T9,False,True,False,True,False,True,28.99,12.93,582.40,0.0,...,0.0,1457.43,0.89,0.0,0.0,33.43,169.04,3.94,2.02,0.0
Soil11_CE_234_-14_None_T9,False,True,False,True,False,True,20.30,21.95,566.47,0.0,...,0.0,1200.08,0.97,0.0,0.0,25.66,59.25,5.69,1.80,0.0
Soil11_CE_234_-18_None_T9,False,True,False,True,False,True,22.49,21.37,549.68,0.0,...,0.0,1042.39,0.26,0.0,0.0,24.12,67.82,9.85,0.99,0.0
Soil11_CE_234_-25_None_T9,False,True,False,True,False,True,26.91,70.08,577.07,0.0,...,0.0,947.24,0.51,0.0,0.0,40.99,93.64,17.31,3.29,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Soil9_CE_277_-5_None_T9,False,True,False,True,False,True,25.35,12.71,438.47,0.0,...,0.0,913.69,0.71,0.0,0.0,27.58,62.44,12.00,0.00,0.0
Soil9_CE_277_-75_None_T9,False,True,False,True,False,True,33.91,9.42,404.28,0.0,...,0.0,489.36,1.62,0.0,0.0,518.85,42.51,3.99,0.00,0.0
Soil9_CE_277_-7_None_T9,False,True,False,True,False,True,28.89,12.66,438.91,0.0,...,0.0,946.98,0.71,0.0,0.0,25.60,57.25,13.57,0.21,0.0
Soil9_CE_277_0_None_T9,False,True,False,True,False,True,28.57,15.56,536.81,0.0,...,0.0,1024.81,0.42,0.0,0.0,26.73,66.90,9.22,0.00,0.0


In [19]:
DF_SAMP_SUBSETS_ACCEPTED["no_nitrate"]

Unnamed: 0,no_nitrate,nitrate,t0,t9,chl_pos,chl_neg,K00360,K00361,K00362,K00363,...,K12265,K12266,K15864,K15876,K15877,K17877,K19339,K19343,K26138,K26139
Soil11_CE_234_0_CHL_No_Nitrate_T9,True,False,False,True,True,False,21.48,9.76,587.74,0.0,...,0.0,1507.59,0.16,0.0,0.0,38.29,37.89,0.98,0.0,0.0
Soil11_CE_234_0_None_No_Nitrate_T9,True,False,False,True,False,True,12.61,4.68,336.68,0.0,...,0.0,856.37,0.37,0.0,0.0,22.33,31.18,1.28,0.0,0.0
Soil12_CE_229_0_CHL_No_Nitrate_T9,True,False,False,True,True,False,16.3,10.47,472.64,0.0,...,0.0,760.73,0.0,0.0,2.68,51.86,21.31,2.21,0.0,0.39
Soil12_CE_229_0_None_No_Nitrate_T9,True,False,False,True,False,True,16.6,6.94,494.62,0.0,...,0.0,809.56,0.0,0.0,0.96,68.46,22.64,2.14,0.83,0.72
Soil14_CE_251_N2_0_CHL_No_Nitrate_T9,True,False,False,True,True,False,13.66,6.13,410.86,0.0,...,0.0,583.97,0.0,0.0,3.73,53.2,22.44,1.08,0.18,1.61
Soil14_CE_251_N2_0_None_No_Nitrate_T9,True,False,False,True,False,True,18.08,8.57,492.61,0.0,...,0.0,574.62,0.0,0.0,2.17,43.68,30.99,1.28,1.71,1.13
Soil15_CE_251_N5_0_CHL_No_Nitrate_T9,True,False,False,True,True,False,25.89,13.95,714.83,0.0,...,0.0,825.0,0.0,0.0,5.16,77.31,68.12,0.39,2.02,1.28
Soil15_CE_251_N5_0_None_No_Nitrate_T9,True,False,False,True,False,True,30.91,18.38,1097.85,1.19,...,0.0,1332.04,0.0,0.0,7.34,110.84,127.18,2.62,2.31,0.78
Soil16_CE_251_N6_0_CHL_No_Nitrate_T9,True,False,False,True,True,False,17.69,7.36,649.69,0.0,...,0.0,679.74,0.0,0.0,4.89,50.8,64.44,1.33,0.0,1.05
Soil16_CE_251_N6_0_None_No_Nitrate_T9,True,False,False,True,False,True,25.49,13.88,1237.18,0.0,...,0.0,1178.19,0.0,0.0,6.4,97.2,119.58,7.11,0.0,1.18


## Tables for rejected reads

### Aggregate over taxa

In [20]:
KO_REJECTED_SUBSETS_DIR = "out/ko_expression"

contam_dfs = {}
for sample_id, fname in zip(sample_ids, coverage_filelist):
    fpath = os.path.join(KO_REJECTED_SUBSETS_DIR, fname)
    fpath = fpath.replace("coverage_arrays_", "coverage_")
    fpath = fpath.replace(".npz", ".csv")
    contam_dfs[sample_id] = pd.read_csv(
        os.path.join(fpath)
    )



In [21]:
DF_SAMP_SUBSETS_REJECTED = {}
for key in DF_SAMP_SUBSETS_ACCEPTED:
    print(key)
    df_rej = DF_SAMP_SUBSETS_ACCEPTED[key].copy()
    df_rej.iloc[:,nscreens:] = np.nan
    ko_values_rej = {
        sample_id: df.groupby("ko")["avg_depth"].sum()
        for sample_id, df in contam_dfs.items()
    }

    for sample_id, series in ko_values_rej.items():
        if sample_id in df_rej.index:
            for ko, value in series.items():
                if ko in df_rej.columns:
                    df_rej.at[sample_id, ko] = value
    
    DF_SAMP_SUBSETS_REJECTED[key] = df_rej


t0_samples
chl_pos_samples
chl_neg_samples
no_nitrate


In [22]:
DF_SAMP_SUBSETS_REJECTED["t0_samples"]

Unnamed: 0,no_nitrate,nitrate,t0,t9,chl_pos,chl_neg,K00360,K00361,K00362,K00363,...,K12265,K12266,K15864,K15876,K15877,K17877,K19339,K19343,K26138,K26139
Soil11_CE_234_0_None_T0,False,True,True,False,False,True,58.800344,2.490947,142.947259,23.682198,...,105.787692,375.646771,0.391934,55.834215,19.436027,,199.206804,,60.939673,113.976952
Soil12_CE_229_0_None_T0,False,True,True,False,False,True,80.891805,8.947075,210.796783,28.422587,...,179.556505,514.592974,1.93508,89.268806,25.915354,,256.271235,,98.335024,182.253041
Soil14_CE_251_N2_0_None_T0,False,True,True,False,False,True,56.47506,4.502539,157.183021,18.69361,...,144.87017,385.684085,0.359738,75.650368,21.317245,,210.702489,,72.37259,141.028344
Soil15_CE_251_N5_0_None_T0,False,True,True,False,False,True,132.418542,17.512535,273.215651,48.394974,...,263.708285,815.915507,2.043163,155.385041,41.478111,,534.122566,,160.977283,320.309444
Soil16_CE_251_N6_0_None_T0,False,True,True,False,False,True,120.726433,23.394847,333.371076,44.834416,...,311.802384,864.597187,5.33245,143.689473,53.46793,,514.214413,,195.084469,314.253028
Soil17_CE_251_N3_0_None_T0,False,True,True,False,False,True,85.064698,10.68454,215.101248,28.716376,...,212.976318,574.678644,1.540764,104.359031,37.915807,,364.914021,,130.093279,209.856054
Soil3_CE_239_0_None_T0,False,True,True,False,False,True,55.516187,1.841226,114.213983,26.787012,...,57.022597,328.707437,0.0,57.406563,8.10595,,139.848033,,43.944825,109.856897
Soil5_CE_201_0_None_T0,False,True,True,False,False,True,45.256543,0.630919,140.935407,23.818555,...,71.019675,277.359098,0.0,42.618791,11.610713,,133.398218,,48.858524,88.937342
Soil6_CE_73_0_None_T0,False,True,True,False,False,True,145.335202,4.175487,333.780007,52.073622,...,204.891397,893.563084,0.0,154.926911,30.08645,,432.457531,,145.455225,292.671464
Soil9_CE_277_0_None_T0,False,True,True,False,False,True,82.105138,3.700557,233.293615,19.605649,...,173.051609,522.121011,0.427005,91.189592,24.581193,,309.451839,,111.740114,201.32453


In [23]:
DF_SAMP_SUBSETS_REJECTED["chl_pos_samples"]

Unnamed: 0,no_nitrate,nitrate,t0,t9,chl_pos,chl_neg,K00360,K00361,K00362,K00363,...,K12265,K12266,K15864,K15876,K15877,K17877,K19339,K19343,K26138,K26139
Soil11_CE_234_-100_CHL_T9,False,True,False,True,True,False,118.401849,3.690111,288.976759,40.404764,...,190.344452,713.216307,0.000000,131.791709,23.371136,,338.968680,,87.665999,219.830947
Soil11_CE_234_-10_CHL_T9,False,True,False,True,True,False,80.046927,2.719359,152.485696,31.880266,...,120.412017,484.766146,0.000000,92.894037,22.422144,,231.696560,,74.279704,156.742045
Soil11_CE_234_-14_CHL_T9,False,True,False,True,True,False,83.181420,1.579387,145.923034,33.398773,...,107.683326,471.386115,2.000000,79.303047,18.244649,,204.499072,,72.158478,146.045119
Soil11_CE_234_-18_CHL_T9,False,True,False,True,True,False,89.846709,2.944290,162.907587,40.347479,...,126.004470,504.201421,0.000000,101.418384,23.541748,,241.042265,,80.086103,158.520080
Soil11_CE_234_-25_CHL_T9,False,True,False,True,True,False,71.490533,1.894150,156.562639,22.089828,...,116.756049,429.509000,0.000000,75.108413,20.881968,,217.283920,,81.368333,133.702823
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Soil9_CE_277_-5_CHL_T9,False,True,False,True,True,False,108.492764,2.849582,224.909401,33.960991,...,167.469013,643.352310,0.089934,105.621174,23.912158,,341.849214,,93.659396,233.430866
Soil9_CE_277_-75_CHL_T9,False,True,False,True,True,False,84.165501,3.683148,250.380033,34.526290,...,185.256626,573.551145,0.269803,98.955348,20.746538,,314.462803,,106.826650,202.501660
Soil9_CE_277_-7_CHL_T9,False,True,False,True,True,False,120.226944,2.097493,234.373074,48.691771,...,171.571363,706.426446,0.089934,133.944060,23.988778,,362.747835,,94.442532,233.300088
Soil9_CE_277_0_CHL_T9,False,True,False,True,True,False,108.450830,5.683148,319.169044,36.440078,...,258.620274,811.802404,0.000000,117.853497,33.550385,,460.523578,,142.635343,303.446036


In [24]:
DF_SAMP_SUBSETS_REJECTED["chl_neg_samples"]

Unnamed: 0,no_nitrate,nitrate,t0,t9,chl_pos,chl_neg,K00360,K00361,K00362,K00363,...,K12265,K12266,K15864,K15876,K15877,K17877,K19339,K19343,K26138,K26139
Soil11_CE_234_-100_None_T9,False,True,False,True,False,True,119.376302,0.210306,152.640085,57.266871,...,98.960209,653.988073,0.000000,115.804981,11.567878,,256.491142,,64.825810,190.465319
Soil11_CE_234_-10_None_T9,False,True,False,True,False,True,82.961433,11.538997,254.860095,33.570978,...,366.635695,813.160626,0.179869,96.275386,51.282321,,779.536714,,243.222485,458.717873
Soil11_CE_234_-14_None_T9,False,True,False,True,False,True,76.476637,15.558496,290.794574,27.600602,...,410.004685,821.545439,0.179869,90.757279,29.230882,,788.945347,,203.705421,438.013276
Soil11_CE_234_-18_None_T9,False,True,False,True,False,True,83.400920,21.130919,296.479036,35.632823,...,431.647271,948.153927,1.059806,100.864055,41.843156,,893.053694,,256.500379,527.060984
Soil11_CE_234_-25_None_T9,False,True,False,True,False,True,118.731751,54.136490,380.751045,52.603730,...,496.872859,1336.597444,4.190069,110.933776,50.060970,,1114.518230,,254.634538,742.890373
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Soil9_CE_277_-5_None_T9,False,True,False,True,False,True,71.717361,2.732591,206.049091,46.270677,...,280.148612,654.732701,1.426437,102.030032,39.780467,,662.933294,,276.180818,359.737860
Soil9_CE_277_-75_None_T9,False,True,False,True,False,True,143.257745,0.935933,220.560325,50.607352,...,136.711336,946.083032,0.100936,161.489320,15.352039,,381.095026,,83.721328,261.395459
Soil9_CE_277_-7_None_T9,False,True,False,True,False,True,79.294833,3.364903,200.323131,45.993566,...,319.034221,780.190895,2.269803,98.181896,37.801318,,759.696121,,302.103739,404.154228
Soil9_CE_277_0_None_T9,False,True,False,True,False,True,86.486216,3.784819,238.530228,31.837838,...,245.869193,721.573440,0.000000,97.154010,35.205986,,584.259813,,192.318232,339.595126


In [25]:
DF_SAMP_SUBSETS_REJECTED["no_nitrate"]

Unnamed: 0,no_nitrate,nitrate,t0,t9,chl_pos,chl_neg,K00360,K00361,K00362,K00363,...,K12265,K12266,K15864,K15876,K15877,K17877,K19339,K19343,K26138,K26139
Soil11_CE_234_0_CHL_No_Nitrate_T9,True,False,False,True,True,False,103.990968,2.839136,231.055196,37.62427,...,156.560576,569.723969,0.179869,95.376126,30.555316,,287.309769,,94.925841,177.010974
Soil11_CE_234_0_None_No_Nitrate_T9,True,False,False,True,False,True,53.202993,1.787604,140.302932,17.892581,...,147.313287,411.394799,0.0,64.470487,25.688924,,246.81207,,82.878313,151.048361
Soil12_CE_229_0_CHL_No_Nitrate_T9,True,False,False,True,True,False,77.976855,7.95195,202.819728,26.338945,...,176.004332,525.343538,0.179869,89.765991,23.067975,,268.981312,,69.16502,176.582323
Soil12_CE_229_0_None_No_Nitrate_T9,True,False,False,True,False,True,79.930675,9.085655,208.169983,27.188782,...,223.120718,591.94875,0.237183,150.015127,33.921672,,424.771365,,118.583303,227.254818
Soil14_CE_251_N2_0_CHL_No_Nitrate_T9,True,False,False,True,True,False,63.273585,5.576602,149.371836,22.523665,...,138.848731,369.23301,0.449672,67.781588,22.839625,,209.101267,,73.312764,137.620594
Soil14_CE_251_N2_0_None_No_Nitrate_T9,True,False,False,True,False,True,46.487576,6.568942,139.753757,12.414092,...,149.123664,338.850263,0.615843,59.114774,19.580237,,262.877383,,105.548194,160.341171
Soil15_CE_251_N5_0_CHL_No_Nitrate_T9,True,False,False,True,True,False,129.101452,13.026462,244.617168,50.067026,...,236.008869,784.960043,1.090215,147.572847,30.601357,,461.586859,,133.961793,279.55912
Soil15_CE_251_N5_0_None_No_Nitrate_T9,True,False,False,True,False,True,132.14085,30.547354,315.616253,46.194128,...,372.937681,1114.117149,2.541674,173.837474,41.509293,,996.751699,,208.716011,421.975024
Soil16_CE_251_N6_0_CHL_No_Nitrate_T9,True,False,False,True,True,False,84.656109,11.596797,205.7709,35.494992,...,189.070144,549.236586,1.980345,115.420298,25.22905,,308.041284,,102.273774,206.822144
Soil16_CE_251_N6_0_None_No_Nitrate_T9,True,False,False,True,False,True,109.962335,28.592618,308.004464,39.433182,...,358.225829,924.659302,4.565981,154.408056,50.705747,,820.314374,,202.201348,407.563733


## Plots and tables

In [26]:
def make_violin_plot(
        df_subsets, 
        ko_set,
        keys,
        ko_labels=None,
        width=None,
        spacing=None,
        gap=None,
        margin=0,
        legend=True,
        legend_labels=None, 
        colors=None,
        alpha=None,
        hatch=None,
        ax=None,
        verbosity=1,
        **kwargs
):
    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=kwargs.get("figsize", (8, 10)))

    n = len(ko_set)
    k = len(keys)

    width = width if width else 0.9 * (1 / k) # width of violin
    d = spacing if spacing else 0.01 * (1 / k)  # spacing between violins
    gap = gap if gap else 0.5 * (1 / k)  # gap between KO groups

    legend_handles = []
    legend_labels = keys if legend_labels is None else legend_labels
    ko_group_width = k * width + (k - 1) * d
    for i, key in enumerate(keys):
        df = df_subsets[key]
        data = [np.log10(1+df[col]) for col in ko_set]
        pos = margin + np.arange(n) * (ko_group_width + gap) + i * (width + d)
        li = ax.violinplot(
            data, pos, 
            orientation="horizontal",
            # showmeans=True, 
            showmedians=True, 
            showextrema=True,
            widths=width,
        )
        if isinstance(colors, list):
            c = colors[i]
            for body in li["bodies"]:
                body.set_facecolor(c)
                body.set_edgecolor(c)
            for partname in ['cbars','cmins','cmaxes','cmeans','cmedians']:
                if partname in li:
                    vp = li[partname]
                    vp.set_edgecolor(c)
        
        for body in li["bodies"]:
            if hatch:
                body.set_hatch(hatch)
            if alpha:
                body.set_alpha(alpha)
        
        legend_handles.append(li["bodies"][0])
    
    if legend:
        if ax.get_legend() is None:
            old_handles, old_labels = [], []
        else:
            old_handles = ax.get_legend().legend_handles
            old_labels = [t.get_text() for t in ax.get_legend().texts]
        legend_handles.reverse()
        legend_labels.reverse()
        all_handles = old_handles + legend_handles
        all_labels = old_labels + legend_labels
        ax.legend(
            all_handles, all_labels,
            bbox_to_anchor=(1.05, 1),
            loc="upper left"
        )

    # Set xticks
    xticklabels = np.array(
        [10**i for i in range(0, int(np.ceil(ax.get_xlim()[1])))]
    )
    ax.set_xticks(np.log10(1 + xticklabels), labels=xticklabels)

    # Set yticks
    yticks = margin + np.arange(n) * (ko_group_width + gap) + (ko_group_width - width) / 2 
    ax.set_yticks(
        yticks, 
        labels=ko_labels,
    )

    # Add labels and title
    ax.set_xlabel("total avg depth")
    ax.set_ylabel("KO")
    ax.set_title("")
    ax.set_ylim(-gap, pos.max() + gap)
    
    return ax



#### Accepted reads

In [27]:
colors_acc = ["g", "b", "r"]
colors_rej = ["brown", "cyan", "orange"]

alpha_acc = None
alpha_rej = None

hatch_acc = None
hatch_rej = "///"

In [28]:
for key in DF_SAMP_SUBSETS_ACCEPTED:
    df = DF_SAMP_SUBSETS_ACCEPTED[key]
    df.iloc[:,nscreens:].to_csv(
        f"{OUTDIR}/{key}_accepted.csv", float_format="%.3f", na_rep=np.nan
    )

In [29]:
##############################################################################
##  Violin plots (ACCEPTED)

for category in KO_CATEGORIES:
    ko_set = KO_CATEGORY_SETS[category]
    ko_labels = [ko + "\n" + DF_KO_INFO.loc[ko, "SYMBOL"] for ko in ko_set]
    keys = ["t0_samples", "chl_pos_samples", "chl_neg_samples",]

    fig, ax = plt.subplots(1, 1, figsize=[8,10])

    make_violin_plot(
        DF_SAMP_SUBSETS_ACCEPTED, ko_set, keys, 
        ko_labels=ko_labels,
        ax=ax,
        colors=colors_acc,
        alpha=alpha_acc,
        hatch=hatch_acc,
    )

    ax.set_title(f"{category} (accepted)")

    saveas = f"{IMGDIR}/{category.replace(" ", "_")}_accepted.png"
    print(f"Saving {saveas}")
    plt.savefig(saveas, bbox_inches="tight")
    if CLOSE_PLOTS:
        plt.close()

plt.show()

Saving out/images/hydroxylamine_reductase_accepted.png
Saving out/images/nitrate_reductase_accepted.png
Saving out/images/nitric_oxide_reductase_accepted.png
Saving out/images/nitrite_reductase_accepted.png
Saving out/images/nitrous_oxide_reductase_accepted.png
Saving out/images/other_accepted.png


#### Rejected reads

In [30]:
for key in DF_SAMP_SUBSETS_REJECTED:
    df = DF_SAMP_SUBSETS_REJECTED[key]
    df.iloc[:,nscreens:].to_csv(
        f"{OUTDIR}/{key}_rejected.csv", float_format="%.3f", na_rep=np.nan
    )

In [31]:
##############################################################################
##  Violin plots (REJECTED)

for category in KO_CATEGORIES:
    ko_set = KO_CATEGORY_SETS[category]
    ko_labels = [ko + "\n" + DF_KO_INFO.loc[ko, "SYMBOL"] for ko in ko_set]
    keys = ["t0_samples", "chl_pos_samples", "chl_neg_samples",]

    fig, ax = plt.subplots(1, 1, figsize=[8,10])

    make_violin_plot(
        DF_SAMP_SUBSETS_REJECTED, ko_set, keys, 
        ko_labels=ko_labels,
        ax=ax,
        colors=colors_rej,
        alpha=alpha_rej,
        hatch=hatch_rej,
    )

    ax.set_title(f"{category} (rejected)")
    
    saveas = f"{IMGDIR}/{category.replace(" ", "_")}_rejected.png"
    print(f"Saving {saveas}")
    plt.savefig(saveas, bbox_inches="tight")
    if CLOSE_PLOTS:
        plt.close()

plt.show()

Saving out/images/hydroxylamine_reductase_rejected.png
Saving out/images/nitrate_reductase_rejected.png
Saving out/images/nitric_oxide_reductase_rejected.png
Saving out/images/nitrite_reductase_rejected.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/nitrous_oxide_reductase_rejected.png
Saving out/images/other_rejected.png


In [32]:
DF_SAMP_SUBSETS_REJECTED["no_nitrate"]

Unnamed: 0,no_nitrate,nitrate,t0,t9,chl_pos,chl_neg,K00360,K00361,K00362,K00363,...,K12265,K12266,K15864,K15876,K15877,K17877,K19339,K19343,K26138,K26139
Soil11_CE_234_0_CHL_No_Nitrate_T9,True,False,False,True,True,False,103.990968,2.839136,231.055196,37.62427,...,156.560576,569.723969,0.179869,95.376126,30.555316,,287.309769,,94.925841,177.010974
Soil11_CE_234_0_None_No_Nitrate_T9,True,False,False,True,False,True,53.202993,1.787604,140.302932,17.892581,...,147.313287,411.394799,0.0,64.470487,25.688924,,246.81207,,82.878313,151.048361
Soil12_CE_229_0_CHL_No_Nitrate_T9,True,False,False,True,True,False,77.976855,7.95195,202.819728,26.338945,...,176.004332,525.343538,0.179869,89.765991,23.067975,,268.981312,,69.16502,176.582323
Soil12_CE_229_0_None_No_Nitrate_T9,True,False,False,True,False,True,79.930675,9.085655,208.169983,27.188782,...,223.120718,591.94875,0.237183,150.015127,33.921672,,424.771365,,118.583303,227.254818
Soil14_CE_251_N2_0_CHL_No_Nitrate_T9,True,False,False,True,True,False,63.273585,5.576602,149.371836,22.523665,...,138.848731,369.23301,0.449672,67.781588,22.839625,,209.101267,,73.312764,137.620594
Soil14_CE_251_N2_0_None_No_Nitrate_T9,True,False,False,True,False,True,46.487576,6.568942,139.753757,12.414092,...,149.123664,338.850263,0.615843,59.114774,19.580237,,262.877383,,105.548194,160.341171
Soil15_CE_251_N5_0_CHL_No_Nitrate_T9,True,False,False,True,True,False,129.101452,13.026462,244.617168,50.067026,...,236.008869,784.960043,1.090215,147.572847,30.601357,,461.586859,,133.961793,279.55912
Soil15_CE_251_N5_0_None_No_Nitrate_T9,True,False,False,True,False,True,132.14085,30.547354,315.616253,46.194128,...,372.937681,1114.117149,2.541674,173.837474,41.509293,,996.751699,,208.716011,421.975024
Soil16_CE_251_N6_0_CHL_No_Nitrate_T9,True,False,False,True,True,False,84.656109,11.596797,205.7709,35.494992,...,189.070144,549.236586,1.980345,115.420298,25.22905,,308.041284,,102.273774,206.822144
Soil16_CE_251_N6_0_None_No_Nitrate_T9,True,False,False,True,False,True,109.962335,28.592618,308.004464,39.433182,...,358.225829,924.659302,4.565981,154.408056,50.705747,,820.314374,,202.201348,407.563733


### Combined

In [33]:
DF_SAMP_SUBSETS_REJECTED.keys()

dict_keys(['t0_samples', 'chl_pos_samples', 'chl_neg_samples', 'no_nitrate'])

In [34]:
##############################################################################
##  Violin plots (COMBINED)

for category in KO_CATEGORIES:
    ko_set = KO_CATEGORY_SETS[category]
    ko_labels = [ko + "\n" + DF_KO_INFO.loc[ko, "SYMBOL"] for ko in ko_set]
    keys = ["t0_samples", "chl_pos_samples", "chl_neg_samples",]

    fig, ax = plt.subplots(1, 1, figsize=(8,10))

    make_violin_plot(
        DF_SAMP_SUBSETS_ACCEPTED, ko_set, keys, 
        ko_labels=ko_labels,
        ax=ax,
        legend=True,
        legend_labels=[k + " (accepted)" for k in keys],
        colors=colors_acc,
        alpha=alpha_acc,
        hatch=hatch_acc,
    )

    make_violin_plot(
        DF_SAMP_SUBSETS_REJECTED, ko_set, keys, 
        ko_labels=ko_labels,
        ax=ax,
        legend=True,
        legend_labels=[k + " (rejected)" for k in keys],
        colors=colors_rej,
        alpha=alpha_rej,
        hatch=hatch_rej,
    )

    ax.set_title(f"{category} (accepted vs rejected)")

    saveas = f"{IMGDIR}/{category.replace(" ", "_")}_comparison.png"
    print(f"Saving {saveas}")
    plt.savefig(saveas, bbox_inches="tight")
    if CLOSE_PLOTS:
        plt.close()

plt.show()

Saving out/images/hydroxylamine_reductase_comparison.png
Saving out/images/nitrate_reductase_comparison.png
Saving out/images/nitric_oxide_reductase_comparison.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/nitrite_reductase_comparison.png
Saving out/images/nitrous_oxide_reductase_comparison.png
Saving out/images/other_comparison.png


# Disambiguate by taxa

In [35]:
DF_SAMP_SUBSETS_REJECTED_BY_TAXA = {}
for taxid in TAXA_LIST:
    DF_SAMP_SUBSETS_REJECTED_BY_TAXA[taxid] = {}
    for key in DF_SAMP_SUBSETS_ACCEPTED:    
        df_rej = DF_SAMP_SUBSETS_ACCEPTED[key].copy()
        df_rej.iloc[:,nscreens:] = np.nan
        ko_values_rej = {
            sample_id: df[df["taxid"] == taxid].groupby("ko")["avg_depth"].sum()
            for sample_id, df in contam_dfs.items()
        }

        for sample_id, series in ko_values_rej.items():
            if sample_id in df_rej.index:
                for ko, value in series.items():
                    if ko in df_rej.columns:
                        df_rej.at[sample_id, ko] = value
        DF_SAMP_SUBSETS_REJECTED_BY_TAXA[taxid][key] = df_rej


In [36]:
suboutdir = f"{OUTDIR}/by_taxa"
os.makedirs(suboutdir, exist_ok=True)

for taxid in TAXA_LIST:
    dftaxa = DF_SAMP_SUBSETS_REJECTED_BY_TAXA[taxid]
    for key in dftaxa:
        df = dftaxa[key]
        df.iloc[:,nscreens:].to_csv(
            f"{suboutdir}/{key}_{taxid}_rejected.csv", 
            float_format="%.3f", na_rep=np.nan
        )

In [37]:
##############################################################################
##  Violin plots (COMPARISON, DISAMBIGUATED)

subimgdir = f"{IMGDIR}/by_taxa"
os.makedirs(subimgdir, exist_ok=True)

for category in KO_CATEGORIES:
    ko_set = KO_CATEGORY_SETS[category]
    ko_labels = [ko + "\n" + DF_KO_INFO.loc[ko, "SYMBOL"] for ko in ko_set]
    keys = ["t0_samples", "chl_pos_samples", "chl_neg_samples",]

    for taxid in TAXA_LIST:
        fig, ax = plt.subplots(1, 1, figsize=(8,10))

        make_violin_plot(
            DF_SAMP_SUBSETS_ACCEPTED, ko_set, keys, 
            ko_labels=ko_labels,
            ax=ax,
            legend=True,
            legend_labels=[k + " (accepted)" for k in keys],
            colors=colors_acc,
            alpha=alpha_acc,
            hatch=hatch_acc,
        )

        make_violin_plot(
            DF_SAMP_SUBSETS_REJECTED_BY_TAXA[taxid], ko_set, keys, 
            ko_labels=ko_labels,
            ax=ax,
            legend=True,
            legend_labels=[k + " (rejected)" for k in keys],
            colors=colors_rej,
            alpha=alpha_rej,
            hatch=hatch_rej,
        )

        spec = TAXA_DF[TAXA_DF["taxid"] == taxid]["species"].values[0]
        ax.set_title(f"{category} (accepted vs rejected)\n {taxid} ({spec})")

        saveas = f"{subimgdir}/{category.replace(" ", "_")}_comparison_{taxid}.png"
        print(f"Saving {saveas}")
        plt.savefig(saveas, bbox_inches="tight")
        if CLOSE_PLOTS:
            plt.close()

plt.show()

Saving out/images/by_taxa/hydroxylamine_reductase_comparison_1.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_2849180.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_10710.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/hydroxylamine_reductase_comparison_305.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_1280.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_511145.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/hydroxylamine_reductase_comparison_190485.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_267608.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_216595.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/hydroxylamine_reductase_comparison_272558.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_176279.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_205922.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/hydroxylamine_reductase_comparison_232721.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_398578.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_391008.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/hydroxylamine_reductase_comparison_418699.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_522373.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_688245.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/hydroxylamine_reductase_comparison_757424.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_742013.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_1114970.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/hydroxylamine_reductase_comparison_1144306.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_358220.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_745310.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/hydroxylamine_reductase_comparison_1265504.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_1217690.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_983594.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/hydroxylamine_reductase_comparison_348824.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_1495331.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_1333852.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_68895.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_80878.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/hydroxylamine_reductase_comparison_1267562.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_1242245.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_470.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_1125630.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_300267.png
Saving out/images/by_taxa/hydroxylamine_reductase_comparison_198214.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_1.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_2849180.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_10710.png
Saving out/images/by_taxa/nitrate_reductase_comparison_305.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_1280.png
Saving out/images/by_taxa/nitrate_reductase_comparison_511145.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_190485.png
Saving out/images/by_taxa/nitrate_reductase_comparison_267608.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_216595.png
Saving out/images/by_taxa/nitrate_reductase_comparison_272558.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_176279.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_205922.png
Saving out/images/by_taxa/nitrate_reductase_comparison_232721.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_398578.png
Saving out/images/by_taxa/nitrate_reductase_comparison_391008.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_418699.png
Saving out/images/by_taxa/nitrate_reductase_comparison_522373.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_688245.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_757424.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_742013.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_1114970.png
Saving out/images/by_taxa/nitrate_reductase_comparison_1144306.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_358220.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_745310.png
Saving out/images/by_taxa/nitrate_reductase_comparison_1265504.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_1217690.png
Saving out/images/by_taxa/nitrate_reductase_comparison_983594.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_348824.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_1495331.png
Saving out/images/by_taxa/nitrate_reductase_comparison_1333852.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_68895.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_80878.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_1267562.png
Saving out/images/by_taxa/nitrate_reductase_comparison_1242245.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_470.png
Saving out/images/by_taxa/nitrate_reductase_comparison_1125630.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_300267.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrate_reductase_comparison_198214.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitric_oxide_reductase_comparison_1.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitric_oxide_reductase_comparison_2849180.png
Saving out/images/by_taxa/nitric_oxide_reductase_comparison_10710.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitric_oxide_reductase_comparison_305.png
Saving out/images/by_taxa/nitric_oxide_reductase_comparison_1280.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitric_oxide_reductase_comparison_511145.png
Saving out/images/by_taxa/nitric_oxide_reductase_comparison_190485.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitric_oxide_reductase_comparison_267608.png
Saving out/images/by_taxa/nitric_oxide_reductase_comparison_216595.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitric_oxide_reductase_comparison_272558.png
Saving out/images/by_taxa/nitric_oxide_reductase_comparison_176279.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitric_oxide_reductase_comparison_205922.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitric_oxide_reductase_comparison_232721.png
Saving out/images/by_taxa/nitric_oxide_reductase_comparison_398578.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitric_oxide_reductase_comparison_391008.png
Saving out/images/by_taxa/nitric_oxide_reductase_comparison_418699.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitric_oxide_reductase_comparison_522373.png
Saving out/images/by_taxa/nitric_oxide_reductase_comparison_688245.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitric_oxide_reductase_comparison_757424.png
Saving out/images/by_taxa/nitric_oxide_reductase_comparison_742013.png
Saving out/images/by_taxa/nitric_oxide_reductase_comparison_1114970.png
Saving out/images/by_taxa/nitric_oxide_reductase_comparison_1144306.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitric_oxide_reductase_comparison_358220.png
Saving out/images/by_taxa/nitric_oxide_reductase_comparison_745310.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitric_oxide_reductase_comparison_1265504.png
Saving out/images/by_taxa/nitric_oxide_reductase_comparison_1217690.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitric_oxide_reductase_comparison_983594.png
Saving out/images/by_taxa/nitric_oxide_reductase_comparison_348824.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitric_oxide_reductase_comparison_1495331.png
Saving out/images/by_taxa/nitric_oxide_reductase_comparison_1333852.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitric_oxide_reductase_comparison_68895.png
Saving out/images/by_taxa/nitric_oxide_reductase_comparison_80878.png
Saving out/images/by_taxa/nitric_oxide_reductase_comparison_1267562.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitric_oxide_reductase_comparison_1242245.png
Saving out/images/by_taxa/nitric_oxide_reductase_comparison_470.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitric_oxide_reductase_comparison_1125630.png
Saving out/images/by_taxa/nitric_oxide_reductase_comparison_300267.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitric_oxide_reductase_comparison_198214.png
Saving out/images/by_taxa/nitrite_reductase_comparison_1.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_2849180.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_10710.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_305.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_1280.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_511145.png
Saving out/images/by_taxa/nitrite_reductase_comparison_190485.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_267608.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_216595.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_272558.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_176279.png
Saving out/images/by_taxa/nitrite_reductase_comparison_205922.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_232721.png
Saving out/images/by_taxa/nitrite_reductase_comparison_398578.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_391008.png
Saving out/images/by_taxa/nitrite_reductase_comparison_418699.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_522373.png
Saving out/images/by_taxa/nitrite_reductase_comparison_688245.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_757424.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_742013.png
Saving out/images/by_taxa/nitrite_reductase_comparison_1114970.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_1144306.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_358220.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_745310.png
Saving out/images/by_taxa/nitrite_reductase_comparison_1265504.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_1217690.png
Saving out/images/by_taxa/nitrite_reductase_comparison_983594.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_348824.png
Saving out/images/by_taxa/nitrite_reductase_comparison_1495331.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_1333852.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_68895.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_80878.png
Saving out/images/by_taxa/nitrite_reductase_comparison_1267562.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_1242245.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_470.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_1125630.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrite_reductase_comparison_300267.png
Saving out/images/by_taxa/nitrite_reductase_comparison_198214.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_1.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_2849180.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_10710.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_305.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_1280.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_511145.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_190485.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_267608.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_216595.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_272558.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_176279.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_205922.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_232721.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_398578.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_391008.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_418699.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_522373.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_688245.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_757424.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_742013.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_1114970.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_1144306.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_358220.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_745310.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_1265504.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_1217690.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_983594.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_348824.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_1495331.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_1333852.png


  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_68895.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_80878.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_1267562.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_1242245.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_470.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_1125630.png
Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_300267.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/nitrous_oxide_reductase_comparison_198214.png
Saving out/images/by_taxa/other_comparison_1.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/other_comparison_2849180.png
Saving out/images/by_taxa/other_comparison_10710.png
Saving out/images/by_taxa/other_comparison_305.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/other_comparison_1280.png
Saving out/images/by_taxa/other_comparison_511145.png
Saving out/images/by_taxa/other_comparison_190485.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/other_comparison_267608.png
Saving out/images/by_taxa/other_comparison_216595.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/other_comparison_272558.png
Saving out/images/by_taxa/other_comparison_176279.png
Saving out/images/by_taxa/other_comparison_205922.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/other_comparison_232721.png
Saving out/images/by_taxa/other_comparison_398578.png
Saving out/images/by_taxa/other_comparison_391008.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/other_comparison_418699.png
Saving out/images/by_taxa/other_comparison_522373.png
Saving out/images/by_taxa/other_comparison_688245.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/other_comparison_757424.png
Saving out/images/by_taxa/other_comparison_742013.png
Saving out/images/by_taxa/other_comparison_1114970.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/other_comparison_1144306.png
Saving out/images/by_taxa/other_comparison_358220.png
Saving out/images/by_taxa/other_comparison_745310.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/other_comparison_1265504.png
Saving out/images/by_taxa/other_comparison_1217690.png
Saving out/images/by_taxa/other_comparison_983594.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/other_comparison_348824.png
Saving out/images/by_taxa/other_comparison_1495331.png
Saving out/images/by_taxa/other_comparison_1333852.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/other_comparison_68895.png
Saving out/images/by_taxa/other_comparison_80878.png
Saving out/images/by_taxa/other_comparison_1267562.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)


Saving out/images/by_taxa/other_comparison_1242245.png
Saving out/images/by_taxa/other_comparison_470.png
Saving out/images/by_taxa/other_comparison_1125630.png
Saving out/images/by_taxa/other_comparison_300267.png
Saving out/images/by_taxa/other_comparison_198214.png


  r = _umath_linalg.det(a, signature=signature)
  r = _umath_linalg.det(a, signature=signature)
