In [None]:
import os
os.chdir("../../test_data/system_tests/Spectronaut_LargeFC/")

In [None]:
import os

INPUT_FILE = "20210210_154121_S209-S-1-240min_Reportv2.tsv"
SAMPLEMAP_FILE = "samples.map.tsv"
RESULTS_DIR = "results"
PROTEIN_SUBSET_FOR_NORMALIZATION="housekeeping_proteins.tsv" #"housekeeping_genes.tsv"
SPECTRONAUT_FILE = "MP-LFC-OT.xlsx"
CONDPAIR = ("S1", "S2")
SHARED_PEPTIDES_BETWEEN_SPECIES_FILE = "../../databases/intersecting_peptides_human_yeast_cael_ecoli.tsv"

In [None]:
import alphaquant.run_pipeline as run_pipeline

run_pipeline.run_pipeline(input_file=INPUT_FILE, samplemap_file=SAMPLEMAP_FILE, results_dir=RESULTS_DIR, runtime_plots=True, protein_subset_for_normalization_file=PROTEIN_SUBSET_FOR_NORMALIZATION, 
                        annotation_columns=["PG.Organisms"], condpairs_list=[CONDPAIR], peptides_to_exclude_file=SHARED_PEPTIDES_BETWEEN_SPECIES_FILE, input_type_to_use="spectronaut_fragion_isotopes_protein",
                        minrep_c1 = 2, minrep_c2 = 0)

In [None]:
import pandas as pd

alphaquant_file = f"{RESULTS_DIR}/{CONDPAIR[0]}_VS_{CONDPAIR[1]}.results.tsv"
df_alphaquant = pd.read_csv(alphaquant_file, sep='\t')[["protein", "log2fc", "fdr", "quality_score", "PG.Organisms", "total_intensity"]]
df_alphaquant = df_alphaquant.rename(columns={"PG.Organisms": "organism", "total_intensity" : "intensity_alphaquant", "fdr" : "fdr_alphaquant", "log2fc" : "log2fc_alphaquant"})
df_alphaquant = df_alphaquant.dropna(subset=["organism"])
df_alphaquant = df_alphaquant[[";" not in x for x in df_alphaquant["organism"]]]

df_spectronaut = pd.read_excel(SPECTRONAUT_FILE, sheet_name=2)
display(df_spectronaut)
display(df_alphaquant)

In [None]:
import alphaquant.benchm.sensitivity as aq_benchm_sensititivy
classification_benchmark = aq_benchm_sensititivy.RatioClassificationTableGenerator(df_alphaquant, method_suffixes=["_alphaquant"] ,decoy_organism="Homo sapiens")
classification_df = classification_benchmark.per_species_results_df
display(classification_df)
ax = aq_benchm_sensititivy.plot_sighits_barplot(classification_benchmark.per_species_results_df, ['_alphaquant'], 'Homo sapiens')



In [None]:
def assert_fdr_is_not_violated(classification_df, suffix = "_alphaquant",  decoy_organism="Homo sapiens"):
    idx_of_decoy_organism = classification_df[classification_df["organism"] == decoy_organism].index
    num_allowed_hits = classification_df.loc[idx_of_decoy_organism, f"allowed_decoy_hits{suffix}"].values[0]
    num_allowed_hits_w_tolerance = int(1.1*num_allowed_hits)
    num_actual_hits =  classification_df.loc[idx_of_decoy_organism,f"hits{suffix}"].values[0]

    assert num_actual_hits < num_allowed_hits_w_tolerance , "more false postives than expected"



def assert_sensitivity_is_as_expected(classification_df, suffix="_alphaquant"):
    organism2numsig = dict(zip(classification_df["organism"],classification_df[f"hits{suffix}"]))
    assert organism2numsig["Saccharomyces cerevisiae (strain ATCC 204508 / S288c)"] > 370, "sensitivity of Saccharomyces cerevisiae (strain ATCC 204508 / S288c) lower than expected"
    assert organism2numsig["Escherichia coli (strain K12)"] > 445, "sensitivity of Escherichia coli lower than expected"
    assert organism2numsig["Caenorhabditis elegans"] > 1100, "sensitivity of Caenorhabditis elegans lower than expected"

assert_fdr_is_not_violated(classification_df)
assert_sensitivity_is_as_expected(classification_df)

In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
import alphaquant.benchm.ratios as aq_benchm_ratios
df_alphaquant_onlyyeast = df_alphaquant[df_alphaquant["organism"].str.contains("Saccharomyces cerevisiae")]
scatter_plotter = aq_benchm_ratios.MixedSpeciesScatterPlotter(df_combined=df_alphaquant, method_suffixes=["_alphaquant"], expected_log2fcs=[-2.2, -1.2,0,  2], figure_size=[10, 6])
aq_benchm_ratios.MixedSpeciesBoxPlotter(df_combined=df_alphaquant, method_suffixes=["_alphaquant"], expected_log2fcs=[-2.2, -1.2,0,  2], figure_size=[6, 6])


