# Quicktests on Mixed Species Dataset

This notebook runs AlphaQuant on a Mixed Species dataset, subsetted to only around 60 proteins. We check that the results of the differential expression neither violate fdr nor are less sensitive than expected.

In [None]:
#download test files for benchmarking

import alphaquant.benchm.testfile_handling
test_folder = "../../test_data/"
links_yaml_quicktest_files = "../../alphaquant/config/download_links_for_testfiles_quicktest.yaml"



testfieldownloader = alphaquant.benchm.testfile_handling.TestFileDownloader(test_folder=test_folder, links_yaml=links_yaml_quicktest_files, subfolder_of_interest="mixed_species")
testfieldownloader = alphaquant.benchm.testfile_handling.TestFileDownloader(test_folder=test_folder, links_yaml=links_yaml_quicktest_files, subfolder_of_interest="databases")
testfieldownloader.download_missing_files()

In [None]:
import os
os.chdir("../../test_data/quicktests/mixed_species")

In [None]:
INPUT_FILE = "20210210_154121_S209-S-1-240min_Report_quicktest_shortened.tsv.zip"
SAMPLEMAP = "samplemap.tsv"
RESULTS_DIR = "results"
SHARED_PEPTIDES_BETWEEN_SPECIES_FILE = "../../databases/intersecting_peptides_human_yeast_cael_ecoli.tsv"

In [None]:
import alphaquant.run_pipeline as run_pipeline

run_pipeline.run_pipeline(input_file=INPUT_FILE, samplemap_file=SAMPLEMAP, results_dir=RESULTS_DIR, runtime_plots=True, minrep_either= 2, take_median_ion= True,
                           annotation_columns=["PG.Genes", "PG.Organisms"], input_type_to_use= "spectronaut_fragion_isotopes_protein", peptides_to_exclude_file=SHARED_PEPTIDES_BETWEEN_SPECIES_FILE)

In [None]:
import pandas as pd
resuls_table = f"{RESULTS_DIR}/S1_VS_S2.results.tsv"

results_df = pd.read_csv(resuls_table, sep="\t")
display(results_df)

results_df_reformat = results_df[["protein", "PG.Organisms", "log2fc", "fdr"]].rename(columns={"PG.Organisms": "organism_alphaquant","log2fc": "log2fc_alphaquant", 
                                                                                               "fdr": "fdr_alphaquant"})

In [None]:
import numpy as np
import alphaquant.benchm.sensitivity as aq_benchm_sensitivity

classification_benchmark = aq_benchm_sensitivity.RatioClassificationTableGenerator(results_df_reformat, decoy_organism="Homo sapiens", method_suffixes=["_alphaquant"])
classification_df = classification_benchmark.per_species_results_df.replace(np.nan, 0)
display(classification_df)

aq_benchm_sensitivity.plot_sighits_barplot(classification_df, suffixes=["_alphaquant"], decoy_organism="Homo sapiens")



In [None]:
def assert_fdr_is_not_violated(classification_df, suffix = "_alphaquant",  decoy_organism="Homo sapiens"):
    idx_of_decoy_organism = classification_df[classification_df.index == decoy_organism].index
    num_allowed_hits = classification_df.loc[idx_of_decoy_organism, f"allowed_decoy_hits{suffix}"].values[0]
    num_allowed_hits_w_tolerance = int(1.1*num_allowed_hits)
    num_actual_hits =  classification_df.loc[idx_of_decoy_organism,f"hits{suffix}"].values[0]

    assert num_actual_hits < num_allowed_hits_w_tolerance , "more false postives than expected"


def assert_sensitivity_is_as_expected(classification_df):
    assert classification_df.loc["Caenorhabditis elegans", "hits_alphaquant"] >=19
    assert classification_df.loc["Escherichia coli (strain K12)", "hits_alphaquant"] >=14
    assert classification_df.loc["Saccharomyces cerevisiae (strain ATCC 204508 / S288c)", "hits_alphaquant"] >=13


assert_fdr_is_not_violated(classification_df)
assert_sensitivity_is_as_expected(classification_df)

In [None]:
import alphaquant.benchm.ratios as aq_benchm_ratios

results_df_ratio = results_df[["protein", "total_intensity", "log2fc", "PG.Organisms"]].rename(columns={"PG.Organisms": "organism_alphaquant","log2fc": "log2fc_alphaquant", "total_intensity": "intensity_alphaquant"})

species_plotter = aq_benchm_ratios.MixedSpeciesScatterPlotter(results_df_ratio, method_suffixes=["_alphaquant"], expected_log2fcs=[-2.2, -1.2, 0,2])
species_plotter = aq_benchm_ratios.MixedSpeciesBoxPlotter(results_df_ratio, method_suffixes=["_alphaquant"], expected_log2fcs=[-2.2, -1.2, 0,2], figure_size=[6, 8])
