diff --git a/pyprophet/data_handling.py b/pyprophet/data_handling.py index d27247b..d290d3c 100644 --- a/pyprophet/data_handling.py +++ b/pyprophet/data_handling.py @@ -14,6 +14,7 @@ def profile(fun): return fun +# selection of scores with low cross-correlation for metabolomics scoring def use_metabolomics_scores(): return [ "var_ms1_isotope_overlap_score", @@ -29,6 +30,17 @@ def use_metabolomics_scores(): "var_norm_rt_score" ] +# extracts the scores and writes it into an SQL command +# in some cases some post processing has to be performed depending on which +# position the statement should be inserted (e.g. export_compounds.py) +def write_scores_sql_command(con, score_sql, feature_name, var_replacement): + feature = pd.read_sql_query("""PRAGMA table_info(%s)""" % feature_name, con) + score_names_sql = [name for name in feature["name"].tolist() if name.startswith("VAR")] + score_names_lower = [name.lower().replace("var_", var_replacement) for name in score_names_sql] + for i in range(0,len(score_names_sql)): + score_sql = score_sql + str(feature_name + "." + score_names_sql[i] + " AS " + score_names_lower[i] + ", ") + return score_sql + # Parameter transformation functions def transform_pi0_lambda(ctx, param, value): if value[1] == 0 and value[2] == 0: diff --git a/pyprophet/export.py b/pyprophet/export.py index bbec92c..094613e 100644 --- a/pyprophet/export.py +++ b/pyprophet/export.py @@ -5,6 +5,7 @@ import os from .data_handling import check_sqlite_table +from .data_handling import write_scores_sql_command from .report import plot_scores @@ -12,401 +13,451 @@ def export_tsv(infile, outfile, format, outcsv, transition_quantification, max_t con = sqlite3.connect(infile) - ipf_present = False - if ipf: - ipf_present = check_sqlite_table(con, "SCORE_IPF") - - # Main query for peptidoform IPF - if ipf_present and ipf=='peptidoform': - idx_query = ''' -CREATE INDEX IF NOT EXISTS idx_precursor_precursor_id ON PRECURSOR (ID); -CREATE INDEX IF NOT EXISTS idx_precursor_peptide_mapping_precursor_id ON PRECURSOR_PEPTIDE_MAPPING (PRECURSOR_ID); -CREATE INDEX IF NOT EXISTS idx_feature_precursor_id ON FEATURE (PRECURSOR_ID); - -CREATE INDEX IF NOT EXISTS idx_precursor_peptide_mapping_peptide_id ON PRECURSOR_PEPTIDE_MAPPING (PEPTIDE_ID); -CREATE INDEX IF NOT EXISTS idx_peptide_peptide_id ON PEPTIDE (ID); - -CREATE INDEX IF NOT EXISTS idx_run_run_id ON RUN (ID); -CREATE INDEX IF NOT EXISTS idx_feature_run_id ON FEATURE (RUN_ID); - -CREATE INDEX IF NOT EXISTS idx_feature_feature_id ON FEATURE (ID); -''' - if check_sqlite_table(con, "FEATURE_MS1"): - idx_query += "CREATE INDEX IF NOT EXISTS idx_feature_ms1_feature_id ON FEATURE_MS1 (FEATURE_ID);" - if check_sqlite_table(con, "FEATURE_MS2"): - idx_query += "CREATE INDEX IF NOT EXISTS idx_feature_ms2_feature_id ON FEATURE_MS2 (FEATURE_ID);" - if check_sqlite_table(con, "SCORE_MS1"): - idx_query += "CREATE INDEX IF NOT EXISTS idx_score_ms1_feature_id ON SCORE_MS1 (FEATURE_ID);" - score_ms1_pep = "SCORE_MS1.PEP" - link_ms1 = "LEFT JOIN SCORE_MS1 ON SCORE_MS1.FEATURE_ID = FEATURE.ID" - else: - score_ms1_pep = "NULL" - link_ms1 = "" - if check_sqlite_table(con, "SCORE_MS2"): - idx_query += "CREATE INDEX IF NOT EXISTS idx_score_ms2_feature_id ON SCORE_MS2 (FEATURE_ID);" - if check_sqlite_table(con, "SCORE_IPF"): - idx_query += "CREATE INDEX IF NOT EXISTS idx_score_ipf_feature_id ON SCORE_IPF (FEATURE_ID);" - idx_query += "CREATE INDEX IF NOT EXISTS idx_score_ipf_peptide_id ON SCORE_IPF (PEPTIDE_ID);" - - query = ''' -SELECT RUN.ID AS id_run, - PEPTIDE.ID AS id_peptide, - PEPTIDE_IPF.MODIFIED_SEQUENCE || '_' || PRECURSOR.ID AS transition_group_id, - PRECURSOR.DECOY AS decoy, - RUN.ID AS run_id, - RUN.FILENAME AS filename, - FEATURE.EXP_RT AS RT, - FEATURE.EXP_RT - FEATURE.DELTA_RT AS assay_rt, - FEATURE.DELTA_RT AS delta_rt, - FEATURE.NORM_RT AS iRT, - PRECURSOR.LIBRARY_RT AS assay_iRT, - FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_iRT, - FEATURE.ID AS id, - PEPTIDE_IPF.UNMODIFIED_SEQUENCE AS Sequence, - PEPTIDE_IPF.MODIFIED_SEQUENCE AS FullPeptideName, - PRECURSOR.CHARGE AS Charge, - PRECURSOR.PRECURSOR_MZ AS mz, - FEATURE_MS2.AREA_INTENSITY AS Intensity, - FEATURE_MS1.AREA_INTENSITY AS aggr_prec_Peak_Area, - FEATURE_MS1.APEX_INTENSITY AS aggr_prec_Peak_Apex, - FEATURE.LEFT_WIDTH AS leftWidth, - FEATURE.RIGHT_WIDTH AS rightWidth, - %s AS ms1_pep, - SCORE_MS2.PEP AS ms2_pep, - SCORE_IPF.PRECURSOR_PEAKGROUP_PEP AS precursor_pep, - SCORE_IPF.PEP AS ipf_pep, - SCORE_MS2.RANK AS peak_group_rank, - SCORE_MS2.SCORE AS d_score, - SCORE_MS2.QVALUE AS ms2_m_score, - SCORE_IPF.QVALUE AS m_score -FROM PRECURSOR -INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID -INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID -INNER JOIN FEATURE ON FEATURE.PRECURSOR_ID = PRECURSOR.ID -INNER JOIN RUN ON RUN.ID = FEATURE.RUN_ID -LEFT JOIN FEATURE_MS1 ON FEATURE_MS1.FEATURE_ID = FEATURE.ID -LEFT JOIN FEATURE_MS2 ON FEATURE_MS2.FEATURE_ID = FEATURE.ID -%s -LEFT JOIN SCORE_MS2 ON SCORE_MS2.FEATURE_ID = FEATURE.ID -LEFT JOIN SCORE_IPF ON SCORE_IPF.FEATURE_ID = FEATURE.ID -INNER JOIN PEPTIDE AS PEPTIDE_IPF ON SCORE_IPF.PEPTIDE_ID = PEPTIDE_IPF.ID -WHERE SCORE_MS2.QVALUE < %s AND SCORE_IPF.PEP < %s -ORDER BY transition_group_id, - peak_group_rank; -''' % (score_ms1_pep, link_ms1, max_rs_peakgroup_qvalue, ipf_max_peptidoform_pep) - # Main query for augmented IPF - elif ipf_present and ipf=='augmented': - idx_query = ''' -CREATE INDEX IF NOT EXISTS idx_precursor_precursor_id ON PRECURSOR (ID); -CREATE INDEX IF NOT EXISTS idx_precursor_peptide_mapping_precursor_id ON PRECURSOR_PEPTIDE_MAPPING (PRECURSOR_ID); -CREATE INDEX IF NOT EXISTS idx_feature_precursor_id ON FEATURE (PRECURSOR_ID); - -CREATE INDEX IF NOT EXISTS idx_precursor_peptide_mapping_peptide_id ON PRECURSOR_PEPTIDE_MAPPING (PEPTIDE_ID); -CREATE INDEX IF NOT EXISTS idx_peptide_peptide_id ON PEPTIDE (ID); - -CREATE INDEX IF NOT EXISTS idx_run_run_id ON RUN (ID); -CREATE INDEX IF NOT EXISTS idx_feature_run_id ON FEATURE (RUN_ID); - -CREATE INDEX IF NOT EXISTS idx_feature_feature_id ON FEATURE (ID); -''' - if check_sqlite_table(con, "FEATURE_MS1"): - idx_query += "CREATE INDEX IF NOT EXISTS idx_feature_ms1_feature_id ON FEATURE_MS1 (FEATURE_ID);" - if check_sqlite_table(con, "FEATURE_MS2"): - idx_query += "CREATE INDEX IF NOT EXISTS idx_feature_ms2_feature_id ON FEATURE_MS2 (FEATURE_ID);" - if check_sqlite_table(con, "SCORE_MS1"): - idx_query += "CREATE INDEX IF NOT EXISTS idx_score_ms1_feature_id ON SCORE_MS1 (FEATURE_ID);" - score_ms1_pep = "SCORE_MS1.PEP" - link_ms1 = "LEFT JOIN SCORE_MS1 ON SCORE_MS1.FEATURE_ID = FEATURE.ID" - else: - score_ms1_pep = "NULL" - link_ms1 = "" - if check_sqlite_table(con, "SCORE_MS2"): - idx_query += "CREATE INDEX IF NOT EXISTS idx_score_ms2_feature_id ON SCORE_MS2 (FEATURE_ID);" - if check_sqlite_table(con, "SCORE_IPF"): - idx_query += "CREATE INDEX IF NOT EXISTS idx_score_ipf_feature_id ON SCORE_IPF (FEATURE_ID);" - idx_query += "CREATE INDEX IF NOT EXISTS idx_score_ipf_peptide_id ON SCORE_IPF (PEPTIDE_ID);" - - query = ''' -SELECT RUN.ID AS id_run, - PEPTIDE.ID AS id_peptide, - PRECURSOR.ID AS transition_group_id, - PRECURSOR.DECOY AS decoy, - RUN.ID AS run_id, - RUN.FILENAME AS filename, - FEATURE.EXP_RT AS RT, - FEATURE.EXP_RT - FEATURE.DELTA_RT AS assay_rt, - FEATURE.DELTA_RT AS delta_rt, - FEATURE.NORM_RT AS iRT, - PRECURSOR.LIBRARY_RT AS assay_iRT, - FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_iRT, - FEATURE.ID AS id, - PEPTIDE.UNMODIFIED_SEQUENCE AS Sequence, - PEPTIDE.MODIFIED_SEQUENCE AS FullPeptideName, - PRECURSOR.CHARGE AS Charge, - PRECURSOR.PRECURSOR_MZ AS mz, - FEATURE_MS2.AREA_INTENSITY AS Intensity, - FEATURE_MS1.AREA_INTENSITY AS aggr_prec_Peak_Area, - FEATURE_MS1.APEX_INTENSITY AS aggr_prec_Peak_Apex, - FEATURE.LEFT_WIDTH AS leftWidth, - FEATURE.RIGHT_WIDTH AS rightWidth, - SCORE_MS2.RANK AS peak_group_rank, - SCORE_MS2.SCORE AS d_score, - SCORE_MS2.QVALUE AS m_score, - %s AS ms1_pep, - SCORE_MS2.PEP AS ms2_pep -FROM PRECURSOR -INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID -INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID -INNER JOIN FEATURE ON FEATURE.PRECURSOR_ID = PRECURSOR.ID -INNER JOIN RUN ON RUN.ID = FEATURE.RUN_ID -LEFT JOIN FEATURE_MS1 ON FEATURE_MS1.FEATURE_ID = FEATURE.ID -LEFT JOIN FEATURE_MS2 ON FEATURE_MS2.FEATURE_ID = FEATURE.ID -%s -LEFT JOIN SCORE_MS2 ON SCORE_MS2.FEATURE_ID = FEATURE.ID -WHERE SCORE_MS2.QVALUE < %s -ORDER BY transition_group_id, - peak_group_rank; -''' % (score_ms1_pep, link_ms1, max_rs_peakgroup_qvalue) - query_augmented = ''' -SELECT FEATURE_ID AS id, - MODIFIED_SEQUENCE AS ipf_FullUniModPeptideName, - PRECURSOR_PEAKGROUP_PEP AS ipf_precursor_peakgroup_pep, - PEP AS ipf_peptidoform_pep, - QVALUE AS ipf_peptidoform_m_score -FROM SCORE_IPF -INNER JOIN PEPTIDE ON SCORE_IPF.PEPTIDE_ID = PEPTIDE.ID -WHERE SCORE_IPF.PEP < %s; -''' % ipf_max_peptidoform_pep - # Main query for standard OpenSWATH - else: - idx_query = ''' -CREATE INDEX IF NOT EXISTS idx_precursor_precursor_id ON PRECURSOR (ID); -CREATE INDEX IF NOT EXISTS idx_precursor_peptide_mapping_precursor_id ON PRECURSOR_PEPTIDE_MAPPING (PRECURSOR_ID); -CREATE INDEX IF NOT EXISTS idx_feature_precursor_id ON FEATURE (PRECURSOR_ID); - -CREATE INDEX IF NOT EXISTS idx_precursor_peptide_mapping_peptide_id ON PRECURSOR_PEPTIDE_MAPPING (PEPTIDE_ID); -CREATE INDEX IF NOT EXISTS idx_peptide_peptide_id ON PEPTIDE (ID); - -CREATE INDEX IF NOT EXISTS idx_run_run_id ON RUN (ID); -CREATE INDEX IF NOT EXISTS idx_feature_run_id ON FEATURE (RUN_ID); - -CREATE INDEX IF NOT EXISTS idx_feature_feature_id ON FEATURE (ID); -''' - if check_sqlite_table(con, "FEATURE_MS1"): - idx_query += "CREATE INDEX IF NOT EXISTS idx_feature_ms1_feature_id ON FEATURE_MS1 (FEATURE_ID);" - if check_sqlite_table(con, "FEATURE_MS2"): - idx_query += "CREATE INDEX IF NOT EXISTS idx_feature_ms2_feature_id ON FEATURE_MS2 (FEATURE_ID);" - if check_sqlite_table(con, "SCORE_MS2"): - idx_query += "CREATE INDEX IF NOT EXISTS idx_score_ms2_feature_id ON SCORE_MS2 (FEATURE_ID);" - - query = ''' -SELECT RUN.ID AS id_run, - PEPTIDE.ID AS id_peptide, - PRECURSOR.ID AS transition_group_id, - PRECURSOR.DECOY AS decoy, - RUN.ID AS run_id, - RUN.FILENAME AS filename, - FEATURE.EXP_RT AS RT, - FEATURE.EXP_RT - FEATURE.DELTA_RT AS assay_rt, - FEATURE.DELTA_RT AS delta_rt, - FEATURE.NORM_RT AS iRT, - PRECURSOR.LIBRARY_RT AS assay_iRT, - FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_iRT, - FEATURE.ID AS id, - PEPTIDE.UNMODIFIED_SEQUENCE AS Sequence, - PEPTIDE.MODIFIED_SEQUENCE AS FullPeptideName, - PRECURSOR.CHARGE AS Charge, - PRECURSOR.PRECURSOR_MZ AS mz, - FEATURE_MS2.AREA_INTENSITY AS Intensity, - FEATURE_MS1.AREA_INTENSITY AS aggr_prec_Peak_Area, - FEATURE_MS1.APEX_INTENSITY AS aggr_prec_Peak_Apex, - FEATURE.LEFT_WIDTH AS leftWidth, - FEATURE.RIGHT_WIDTH AS rightWidth, - SCORE_MS2.RANK AS peak_group_rank, - SCORE_MS2.SCORE AS d_score, - SCORE_MS2.QVALUE AS m_score -FROM PRECURSOR -INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID -INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID -INNER JOIN FEATURE ON FEATURE.PRECURSOR_ID = PRECURSOR.ID -INNER JOIN RUN ON RUN.ID = FEATURE.RUN_ID -LEFT JOIN FEATURE_MS1 ON FEATURE_MS1.FEATURE_ID = FEATURE.ID -LEFT JOIN FEATURE_MS2 ON FEATURE_MS2.FEATURE_ID = FEATURE.ID -LEFT JOIN SCORE_MS2 ON SCORE_MS2.FEATURE_ID = FEATURE.ID -WHERE SCORE_MS2.QVALUE < %s -ORDER BY transition_group_id, - peak_group_rank; -''' % max_rs_peakgroup_qvalue - - # Execute main SQLite query - click.echo("Info: Reading peak group-level results.") - con.executescript(idx_query) # Add indices - data = pd.read_sql_query(query, con) - - # Augment OpenSWATH results with IPF scores - if ipf_present and ipf=='augmented': - data_augmented = pd.read_sql_query(query_augmented, con) - - data_augmented = data_augmented.groupby('id').apply(lambda x: pd.Series({'ipf_FullUniModPeptideName': ";".join(x[x['ipf_peptidoform_pep'] == np.min(x['ipf_peptidoform_pep'])]['ipf_FullUniModPeptideName']), 'ipf_precursor_peakgroup_pep': x[x['ipf_peptidoform_pep'] == np.min(x['ipf_peptidoform_pep'])]['ipf_precursor_peakgroup_pep'].values[0], 'ipf_peptidoform_pep': x[x['ipf_peptidoform_pep'] == np.min(x['ipf_peptidoform_pep'])]['ipf_peptidoform_pep'].values[0], 'ipf_peptidoform_m_score': x[x['ipf_peptidoform_pep'] == np.min(x['ipf_peptidoform_pep'])]['ipf_peptidoform_m_score'].values[0]})).reset_index(level='id') - - data = pd.merge(data, data_augmented, how='left', on='id') - - # Append transition-level quantities - if transition_quantification: - if check_sqlite_table(con, "SCORE_TRANSITION"): - idx_transition_query = ''' -CREATE INDEX IF NOT EXISTS idx_feature_transition_transition_id ON FEATURE_TRANSITION (TRANSITION_ID); -CREATE INDEX IF NOT EXISTS idx_transition_transition_id ON TRANSITION (ID); - -CREATE INDEX IF NOT EXISTS idx_feature_transition_transition_id_feature_id ON FEATURE_TRANSITION (TRANSITION_ID, FEATURE_ID); -CREATE INDEX IF NOT EXISTS idx_score_transition_transition_id_feature_id ON SCORE_TRANSITION (TRANSITION_ID, FEATURE_ID); -CREATE INDEX IF NOT EXISTS idx_feature_transition_feature_id ON FEATURE_TRANSITION (FEATURE_ID); -''' - transition_query = ''' -SELECT FEATURE_TRANSITION.FEATURE_ID AS id, - GROUP_CONCAT(AREA_INTENSITY,';') AS aggr_Peak_Area, - GROUP_CONCAT(APEX_INTENSITY,';') AS aggr_Peak_Apex, - GROUP_CONCAT(TRANSITION.ID || "_" || TRANSITION.TYPE || TRANSITION.ORDINAL || "_" || TRANSITION.CHARGE,';') AS aggr_Fragment_Annotation -FROM FEATURE_TRANSITION -INNER JOIN TRANSITION ON FEATURE_TRANSITION.TRANSITION_ID = TRANSITION.ID -INNER JOIN SCORE_TRANSITION ON FEATURE_TRANSITION.TRANSITION_ID = SCORE_TRANSITION.TRANSITION_ID AND FEATURE_TRANSITION.FEATURE_ID = SCORE_TRANSITION.FEATURE_ID -WHERE TRANSITION.DECOY == 0 AND SCORE_TRANSITION.PEP < %s -GROUP BY FEATURE_TRANSITION.FEATURE_ID -''' % max_transition_pep - else: - idx_transition_query = ''' -CREATE INDEX IF NOT EXISTS idx_feature_transition_transition_id ON FEATURE_TRANSITION (TRANSITION_ID); -CREATE INDEX IF NOT EXISTS idx_transition_transition_id ON TRANSITION (ID); - -CREATE INDEX IF NOT EXISTS idx_feature_transition_feature_id ON FEATURE_TRANSITION (FEATURE_ID); -''' - transition_query = ''' -SELECT FEATURE_ID AS id, - GROUP_CONCAT(AREA_INTENSITY,';') AS aggr_Peak_Area, - GROUP_CONCAT(APEX_INTENSITY,';') AS aggr_Peak_Apex, - GROUP_CONCAT(TRANSITION.ID || "_" || TRANSITION.TYPE || TRANSITION.ORDINAL || "_" || TRANSITION.CHARGE,';') AS aggr_Fragment_Annotation -FROM FEATURE_TRANSITION -INNER JOIN TRANSITION ON FEATURE_TRANSITION.TRANSITION_ID = TRANSITION.ID -GROUP BY FEATURE_ID -''' - click.echo("Info: Reading transition-level results.") - con.executescript(idx_transition_query) # Add indices - data_transition = pd.read_sql_query(transition_query, con) - data = pd.merge(data, data_transition, how='left', on=['id']) - - # Append concatenated protein identifier - click.echo("Info: Reading protein identifiers.") - con.executescript(''' -CREATE INDEX IF NOT EXISTS idx_peptide_protein_mapping_protein_id ON PEPTIDE_PROTEIN_MAPPING (PROTEIN_ID); -CREATE INDEX IF NOT EXISTS idx_protein_protein_id ON PROTEIN (ID); - -CREATE INDEX IF NOT EXISTS idx_peptide_protein_mapping_peptide_id ON PEPTIDE_PROTEIN_MAPPING (PEPTIDE_ID); -''') - data_protein = pd.read_sql_query(''' -SELECT PEPTIDE_ID AS id_peptide, - GROUP_CONCAT(PROTEIN.PROTEIN_ACCESSION,';') AS ProteinName -FROM PEPTIDE_PROTEIN_MAPPING -INNER JOIN PROTEIN ON PEPTIDE_PROTEIN_MAPPING.PROTEIN_ID = PROTEIN.ID -GROUP BY PEPTIDE_ID; -''', con) - data = pd.merge(data, data_protein, how='inner', on=['id_peptide']) - - # Append peptide error-rate control - peptide_present = False - if peptide: - peptide_present = check_sqlite_table(con, "SCORE_PEPTIDE") - - if peptide_present and peptide: - click.echo("Info: Reading peptide-level results.") - data_peptide_run = pd.read_sql_query(''' -SELECT RUN_ID AS id_run, - PEPTIDE_ID AS id_peptide, - QVALUE AS m_score_peptide_run_specific -FROM SCORE_PEPTIDE -WHERE CONTEXT == 'run-specific'; -''', con) - if len(data_peptide_run.index) > 0: - data = pd.merge(data, data_peptide_run, how='inner', on=['id_run','id_peptide']) - - data_peptide_experiment = pd.read_sql_query(''' -SELECT RUN_ID AS id_run, - PEPTIDE_ID AS id_peptide, - QVALUE AS m_score_peptide_experiment_wide -FROM SCORE_PEPTIDE -WHERE CONTEXT == 'experiment-wide'; -''', con) - if len(data_peptide_experiment.index) > 0: - data = pd.merge(data, data_peptide_experiment, on=['id_run','id_peptide']) - - data_peptide_global = pd.read_sql_query(''' -SELECT PEPTIDE_ID AS id_peptide, - QVALUE AS m_score_peptide_global -FROM SCORE_PEPTIDE -WHERE CONTEXT == 'global'; -''', con) - if len(data_peptide_global.index) > 0: - data = pd.merge(data, data_peptide_global[data_peptide_global['m_score_peptide_global'] < max_global_peptide_qvalue], on=['id_peptide']) + # output for merged but not scored pyprophet input + if (check_sqlite_table(con, "SCORE_MS1") is False and check_sqlite_table(con, "SCORE_MS2") is False and check_sqlite_table(con, "SCORE_IPF") is False and check_sqlite_table(con, "SCORE_PEPTIDE") is False and check_sqlite_table(con, "SCORE_PROTEIN") is False ): + + score_sql = "" + + if (check_sqlite_table(con, "FEATURE_MS1")): + score_sql = write_scores_sql_command(con, score_sql, "FEATURE_MS1", "var_ms1_") + + if (check_sqlite_table(con, "FEATURE_MS2")): + score_sql = write_scores_sql_command(con, score_sql, "FEATURE_MS2", "var_ms2_") + + # remove last comma from sql statement, since a "FROM" is following + if (len(score_sql) > 0): + score_sql = ", " + score_sql # add comma at the beginning to fit to statement + score_sql = score_sql[:-2] # remove additional space and comma from the end of the string + + data = pd.read_sql_query(""" + SELECT + RUN.ID AS id_run, + PEPTIDE.ID AS id_peptide, + PRECURSOR.ID AS transition_group_id, + PRECURSOR.DECOY AS decoy, + RUN.ID AS run_id, + RUN.FILENAME AS filename, + FEATURE.EXP_RT AS RT, + FEATURE.EXP_RT - FEATURE.DELTA_RT AS assay_rt, + FEATURE.DELTA_RT AS delta_rt, + PRECURSOR.LIBRARY_RT AS assay_RT, + FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_RT, + FEATURE.ID AS id, + PRECURSOR.CHARGE AS Charge, + PRECURSOR.PRECURSOR_MZ AS mz, + FEATURE_MS2.AREA_INTENSITY AS Intensity, + FEATURE_MS1.AREA_INTENSITY AS aggr_prec_Peak_Area, + FEATURE_MS1.APEX_INTENSITY AS aggr_prec_Peak_Apex, + FEATURE.LEFT_WIDTH AS leftWidth, + FEATURE.RIGHT_WIDTH AS rightWidth + %s + FROM PRECURSOR + INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID + INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID + INNER JOIN FEATURE ON FEATURE.PRECURSOR_ID = PRECURSOR.ID + INNER JOIN RUN ON RUN.ID = FEATURE.RUN_ID + LEFT JOIN FEATURE_MS1 ON FEATURE_MS1.FEATURE_ID = FEATURE.ID + LEFT JOIN FEATURE_MS2 ON FEATURE_MS2.FEATURE_ID = FEATURE.ID + ORDER BY transition_group_id + """ % score_sql, con) - # Append protein error-rate control - protein_present = False - if protein: - protein_present = check_sqlite_table(con, "SCORE_PROTEIN") - - if protein_present and protein: - click.echo("Info: Reading protein-level results.") - con.executescript(''' -CREATE INDEX IF NOT EXISTS idx_peptide_protein_mapping_protein_id ON PEPTIDE_PROTEIN_MAPPING (PROTEIN_ID); -CREATE INDEX IF NOT EXISTS idx_peptide_protein_mapping_peptide_id ON PEPTIDE_PROTEIN_MAPPING (PEPTIDE_ID); -CREATE INDEX IF NOT EXISTS idx_score_protein_protein_id ON SCORE_PROTEIN (PROTEIN_ID); -CREATE INDEX IF NOT EXISTS idx_score_protein_run_id ON SCORE_PROTEIN (RUN_ID); -''') - data_protein_run = pd.read_sql_query(''' -SELECT RUN_ID AS id_run, - PEPTIDE_ID AS id_peptide, - MIN(QVALUE) AS m_score_protein_run_specific -FROM PEPTIDE_PROTEIN_MAPPING -INNER JOIN SCORE_PROTEIN ON PEPTIDE_PROTEIN_MAPPING.PROTEIN_ID = SCORE_PROTEIN.PROTEIN_ID -WHERE CONTEXT == 'run-specific' -GROUP BY RUN_ID, - PEPTIDE_ID; -''', con) - if len(data_protein_run.index) > 0: - data = pd.merge(data, data_protein_run, how='inner', on=['id_run','id_peptide']) - - con.executescript(''' -CREATE INDEX IF NOT EXISTS idx_peptide_protein_mapping_protein_id ON PEPTIDE_PROTEIN_MAPPING (PROTEIN_ID); -CREATE INDEX IF NOT EXISTS idx_peptide_protein_mapping_peptide_id ON PEPTIDE_PROTEIN_MAPPING (PEPTIDE_ID); -CREATE INDEX IF NOT EXISTS idx_score_protein_protein_id ON SCORE_PROTEIN (PROTEIN_ID); -CREATE INDEX IF NOT EXISTS idx_score_protein_run_id ON SCORE_PROTEIN (RUN_ID); -''') - data_protein_experiment = pd.read_sql_query(''' -SELECT RUN_ID AS id_run, - PEPTIDE_ID AS id_peptide, - MIN(QVALUE) AS m_score_protein_experiment_wide -FROM PEPTIDE_PROTEIN_MAPPING -INNER JOIN SCORE_PROTEIN ON PEPTIDE_PROTEIN_MAPPING.PROTEIN_ID = SCORE_PROTEIN.PROTEIN_ID -WHERE CONTEXT == 'experiment-wide' -GROUP BY RUN_ID, - PEPTIDE_ID; -''', con) - if len(data_protein_experiment.index) > 0: - data = pd.merge(data, data_protein_experiment, how='inner', on=['id_run','id_peptide']) + else: + ipf_present = False + if ipf: + ipf_present = check_sqlite_table(con, "SCORE_IPF") + + # Main query for peptidoform IPF + if ipf_present and ipf=='peptidoform': + idx_query = ''' + CREATE INDEX IF NOT EXISTS idx_precursor_precursor_id ON PRECURSOR (ID); + CREATE INDEX IF NOT EXISTS idx_precursor_peptide_mapping_precursor_id ON PRECURSOR_PEPTIDE_MAPPING (PRECURSOR_ID); + CREATE INDEX IF NOT EXISTS idx_feature_precursor_id ON FEATURE (PRECURSOR_ID); + + CREATE INDEX IF NOT EXISTS idx_precursor_peptide_mapping_peptide_id ON PRECURSOR_PEPTIDE_MAPPING (PEPTIDE_ID); + CREATE INDEX IF NOT EXISTS idx_peptide_peptide_id ON PEPTIDE (ID); + + CREATE INDEX IF NOT EXISTS idx_run_run_id ON RUN (ID); + CREATE INDEX IF NOT EXISTS idx_feature_run_id ON FEATURE (RUN_ID); + + CREATE INDEX IF NOT EXISTS idx_feature_feature_id ON FEATURE (ID); + ''' + if check_sqlite_table(con, "FEATURE_MS1"): + idx_query += "CREATE INDEX IF NOT EXISTS idx_feature_ms1_feature_id ON FEATURE_MS1 (FEATURE_ID);" + if check_sqlite_table(con, "FEATURE_MS2"): + idx_query += "CREATE INDEX IF NOT EXISTS idx_feature_ms2_feature_id ON FEATURE_MS2 (FEATURE_ID);" + if check_sqlite_table(con, "SCORE_MS1"): + idx_query += "CREATE INDEX IF NOT EXISTS idx_score_ms1_feature_id ON SCORE_MS1 (FEATURE_ID);" + score_ms1_pep = "SCORE_MS1.PEP" + link_ms1 = "LEFT JOIN SCORE_MS1 ON SCORE_MS1.FEATURE_ID = FEATURE.ID" + else: + score_ms1_pep = "NULL" + link_ms1 = "" + if check_sqlite_table(con, "SCORE_MS2"): + idx_query += "CREATE INDEX IF NOT EXISTS idx_score_ms2_feature_id ON SCORE_MS2 (FEATURE_ID);" + if check_sqlite_table(con, "SCORE_IPF"): + idx_query += "CREATE INDEX IF NOT EXISTS idx_score_ipf_feature_id ON SCORE_IPF (FEATURE_ID);" + idx_query += "CREATE INDEX IF NOT EXISTS idx_score_ipf_peptide_id ON SCORE_IPF (PEPTIDE_ID);" + + query = ''' + SELECT RUN.ID AS id_run, + PEPTIDE.ID AS id_peptide, + PEPTIDE_IPF.MODIFIED_SEQUENCE || '_' || PRECURSOR.ID AS transition_group_id, + PRECURSOR.DECOY AS decoy, + RUN.ID AS run_id, + RUN.FILENAME AS filename, + FEATURE.EXP_RT AS RT, + FEATURE.EXP_RT - FEATURE.DELTA_RT AS assay_rt, + FEATURE.DELTA_RT AS delta_rt, + FEATURE.NORM_RT AS iRT, + PRECURSOR.LIBRARY_RT AS assay_iRT, + FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_iRT, + FEATURE.ID AS id, + PEPTIDE_IPF.UNMODIFIED_SEQUENCE AS Sequence, + PEPTIDE_IPF.MODIFIED_SEQUENCE AS FullPeptideName, + PRECURSOR.CHARGE AS Charge, + PRECURSOR.PRECURSOR_MZ AS mz, + FEATURE_MS2.AREA_INTENSITY AS Intensity, + FEATURE_MS1.AREA_INTENSITY AS aggr_prec_Peak_Area, + FEATURE_MS1.APEX_INTENSITY AS aggr_prec_Peak_Apex, + FEATURE.LEFT_WIDTH AS leftWidth, + FEATURE.RIGHT_WIDTH AS rightWidth, + %s AS ms1_pep, + SCORE_MS2.PEP AS ms2_pep, + SCORE_IPF.PRECURSOR_PEAKGROUP_PEP AS precursor_pep, + SCORE_IPF.PEP AS ipf_pep, + SCORE_MS2.RANK AS peak_group_rank, + SCORE_MS2.SCORE AS d_score, + SCORE_MS2.QVALUE AS ms2_m_score, + SCORE_IPF.QVALUE AS m_score + FROM PRECURSOR + INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID + INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID + INNER JOIN FEATURE ON FEATURE.PRECURSOR_ID = PRECURSOR.ID + INNER JOIN RUN ON RUN.ID = FEATURE.RUN_ID + LEFT JOIN FEATURE_MS1 ON FEATURE_MS1.FEATURE_ID = FEATURE.ID + LEFT JOIN FEATURE_MS2 ON FEATURE_MS2.FEATURE_ID = FEATURE.ID + %s + LEFT JOIN SCORE_MS2 ON SCORE_MS2.FEATURE_ID = FEATURE.ID + LEFT JOIN SCORE_IPF ON SCORE_IPF.FEATURE_ID = FEATURE.ID + INNER JOIN PEPTIDE AS PEPTIDE_IPF ON SCORE_IPF.PEPTIDE_ID = PEPTIDE_IPF.ID + WHERE SCORE_MS2.QVALUE < %s AND SCORE_IPF.PEP < %s + ORDER BY transition_group_id, + peak_group_rank; + ''' % (score_ms1_pep, link_ms1, max_rs_peakgroup_qvalue, ipf_max_peptidoform_pep) + # Main query for augmented IPF + elif ipf_present and ipf=='augmented': + idx_query = ''' + CREATE INDEX IF NOT EXISTS idx_precursor_precursor_id ON PRECURSOR (ID); + CREATE INDEX IF NOT EXISTS idx_precursor_peptide_mapping_precursor_id ON PRECURSOR_PEPTIDE_MAPPING (PRECURSOR_ID); + CREATE INDEX IF NOT EXISTS idx_feature_precursor_id ON FEATURE (PRECURSOR_ID); + + CREATE INDEX IF NOT EXISTS idx_precursor_peptide_mapping_peptide_id ON PRECURSOR_PEPTIDE_MAPPING (PEPTIDE_ID); + CREATE INDEX IF NOT EXISTS idx_peptide_peptide_id ON PEPTIDE (ID); + + CREATE INDEX IF NOT EXISTS idx_run_run_id ON RUN (ID); + CREATE INDEX IF NOT EXISTS idx_feature_run_id ON FEATURE (RUN_ID); + + CREATE INDEX IF NOT EXISTS idx_feature_feature_id ON FEATURE (ID); + ''' + if check_sqlite_table(con, "FEATURE_MS1"): + idx_query += "CREATE INDEX IF NOT EXISTS idx_feature_ms1_feature_id ON FEATURE_MS1 (FEATURE_ID);" + if check_sqlite_table(con, "FEATURE_MS2"): + idx_query += "CREATE INDEX IF NOT EXISTS idx_feature_ms2_feature_id ON FEATURE_MS2 (FEATURE_ID);" + if check_sqlite_table(con, "SCORE_MS1"): + idx_query += "CREATE INDEX IF NOT EXISTS idx_score_ms1_feature_id ON SCORE_MS1 (FEATURE_ID);" + score_ms1_pep = "SCORE_MS1.PEP" + link_ms1 = "LEFT JOIN SCORE_MS1 ON SCORE_MS1.FEATURE_ID = FEATURE.ID" + else: + score_ms1_pep = "NULL" + link_ms1 = "" + if check_sqlite_table(con, "SCORE_MS2"): + idx_query += "CREATE INDEX IF NOT EXISTS idx_score_ms2_feature_id ON SCORE_MS2 (FEATURE_ID);" + if check_sqlite_table(con, "SCORE_IPF"): + idx_query += "CREATE INDEX IF NOT EXISTS idx_score_ipf_feature_id ON SCORE_IPF (FEATURE_ID);" + idx_query += "CREATE INDEX IF NOT EXISTS idx_score_ipf_peptide_id ON SCORE_IPF (PEPTIDE_ID);" + + query = ''' + SELECT RUN.ID AS id_run, + PEPTIDE.ID AS id_peptide, + PRECURSOR.ID AS transition_group_id, + PRECURSOR.DECOY AS decoy, + RUN.ID AS run_id, + RUN.FILENAME AS filename, + FEATURE.EXP_RT AS RT, + FEATURE.EXP_RT - FEATURE.DELTA_RT AS assay_rt, + FEATURE.DELTA_RT AS delta_rt, + FEATURE.NORM_RT AS iRT, + PRECURSOR.LIBRARY_RT AS assay_iRT, + FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_iRT, + FEATURE.ID AS id, + PEPTIDE.UNMODIFIED_SEQUENCE AS Sequence, + PEPTIDE.MODIFIED_SEQUENCE AS FullPeptideName, + PRECURSOR.CHARGE AS Charge, + PRECURSOR.PRECURSOR_MZ AS mz, + FEATURE_MS2.AREA_INTENSITY AS Intensity, + FEATURE_MS1.AREA_INTENSITY AS aggr_prec_Peak_Area, + FEATURE_MS1.APEX_INTENSITY AS aggr_prec_Peak_Apex, + FEATURE.LEFT_WIDTH AS leftWidth, + FEATURE.RIGHT_WIDTH AS rightWidth, + SCORE_MS2.RANK AS peak_group_rank, + SCORE_MS2.SCORE AS d_score, + SCORE_MS2.QVALUE AS m_score, + %s AS ms1_pep, + SCORE_MS2.PEP AS ms2_pep + FROM PRECURSOR + INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID + INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID + INNER JOIN FEATURE ON FEATURE.PRECURSOR_ID = PRECURSOR.ID + INNER JOIN RUN ON RUN.ID = FEATURE.RUN_ID + LEFT JOIN FEATURE_MS1 ON FEATURE_MS1.FEATURE_ID = FEATURE.ID + LEFT JOIN FEATURE_MS2 ON FEATURE_MS2.FEATURE_ID = FEATURE.ID + %s + LEFT JOIN SCORE_MS2 ON SCORE_MS2.FEATURE_ID = FEATURE.ID + WHERE SCORE_MS2.QVALUE < %s + ORDER BY transition_group_id, + peak_group_rank; + ''' % (score_ms1_pep, link_ms1, max_rs_peakgroup_qvalue) + query_augmented = ''' + SELECT FEATURE_ID AS id, + MODIFIED_SEQUENCE AS ipf_FullUniModPeptideName, + PRECURSOR_PEAKGROUP_PEP AS ipf_precursor_peakgroup_pep, + PEP AS ipf_peptidoform_pep, + QVALUE AS ipf_peptidoform_m_score + FROM SCORE_IPF + INNER JOIN PEPTIDE ON SCORE_IPF.PEPTIDE_ID = PEPTIDE.ID + WHERE SCORE_IPF.PEP < %s; + ''' % ipf_max_peptidoform_pep + # Main query for standard OpenSWATH + else: + idx_query = ''' + CREATE INDEX IF NOT EXISTS idx_precursor_precursor_id ON PRECURSOR (ID); + CREATE INDEX IF NOT EXISTS idx_precursor_peptide_mapping_precursor_id ON PRECURSOR_PEPTIDE_MAPPING (PRECURSOR_ID); + CREATE INDEX IF NOT EXISTS idx_feature_precursor_id ON FEATURE (PRECURSOR_ID); + + CREATE INDEX IF NOT EXISTS idx_precursor_peptide_mapping_peptide_id ON PRECURSOR_PEPTIDE_MAPPING (PEPTIDE_ID); + CREATE INDEX IF NOT EXISTS idx_peptide_peptide_id ON PEPTIDE (ID); + + CREATE INDEX IF NOT EXISTS idx_run_run_id ON RUN (ID); + CREATE INDEX IF NOT EXISTS idx_feature_run_id ON FEATURE (RUN_ID); + + CREATE INDEX IF NOT EXISTS idx_feature_feature_id ON FEATURE (ID); + ''' + if check_sqlite_table(con, "FEATURE_MS1"): + idx_query += "CREATE INDEX IF NOT EXISTS idx_feature_ms1_feature_id ON FEATURE_MS1 (FEATURE_ID);" + if check_sqlite_table(con, "FEATURE_MS2"): + idx_query += "CREATE INDEX IF NOT EXISTS idx_feature_ms2_feature_id ON FEATURE_MS2 (FEATURE_ID);" + if check_sqlite_table(con, "SCORE_MS2"): + idx_query += "CREATE INDEX IF NOT EXISTS idx_score_ms2_feature_id ON SCORE_MS2 (FEATURE_ID);" + + query = ''' + SELECT RUN.ID AS id_run, + PEPTIDE.ID AS id_peptide, + PRECURSOR.ID AS transition_group_id, + PRECURSOR.DECOY AS decoy, + RUN.ID AS run_id, + RUN.FILENAME AS filename, + FEATURE.EXP_RT AS RT, + FEATURE.EXP_RT - FEATURE.DELTA_RT AS assay_rt, + FEATURE.DELTA_RT AS delta_rt, + FEATURE.NORM_RT AS iRT, + PRECURSOR.LIBRARY_RT AS assay_iRT, + FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_iRT, + FEATURE.ID AS id, + PEPTIDE.UNMODIFIED_SEQUENCE AS Sequence, + PEPTIDE.MODIFIED_SEQUENCE AS FullPeptideName, + PRECURSOR.CHARGE AS Charge, + PRECURSOR.PRECURSOR_MZ AS mz, + FEATURE_MS2.AREA_INTENSITY AS Intensity, + FEATURE_MS1.AREA_INTENSITY AS aggr_prec_Peak_Area, + FEATURE_MS1.APEX_INTENSITY AS aggr_prec_Peak_Apex, + FEATURE.LEFT_WIDTH AS leftWidth, + FEATURE.RIGHT_WIDTH AS rightWidth, + SCORE_MS2.RANK AS peak_group_rank, + SCORE_MS2.SCORE AS d_score, + SCORE_MS2.QVALUE AS m_score + FROM PRECURSOR + INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID + INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID + INNER JOIN FEATURE ON FEATURE.PRECURSOR_ID = PRECURSOR.ID + INNER JOIN RUN ON RUN.ID = FEATURE.RUN_ID + LEFT JOIN FEATURE_MS1 ON FEATURE_MS1.FEATURE_ID = FEATURE.ID + LEFT JOIN FEATURE_MS2 ON FEATURE_MS2.FEATURE_ID = FEATURE.ID + LEFT JOIN SCORE_MS2 ON SCORE_MS2.FEATURE_ID = FEATURE.ID + WHERE SCORE_MS2.QVALUE < %s + ORDER BY transition_group_id, + peak_group_rank; + ''' % max_rs_peakgroup_qvalue + + # Execute main SQLite query + click.echo("Info: Reading peak group-level results.") + con.executescript(idx_query) # Add indices + data = pd.read_sql_query(query, con) + + # Augment OpenSWATH results with IPF scores + if ipf_present and ipf=='augmented': + data_augmented = pd.read_sql_query(query_augmented, con) + + data_augmented = data_augmented.groupby('id').apply(lambda x: pd.Series({'ipf_FullUniModPeptideName': ";".join(x[x['ipf_peptidoform_pep'] == np.min(x['ipf_peptidoform_pep'])]['ipf_FullUniModPeptideName']), 'ipf_precursor_peakgroup_pep': x[x['ipf_peptidoform_pep'] == np.min(x['ipf_peptidoform_pep'])]['ipf_precursor_peakgroup_pep'].values[0], 'ipf_peptidoform_pep': x[x['ipf_peptidoform_pep'] == np.min(x['ipf_peptidoform_pep'])]['ipf_peptidoform_pep'].values[0], 'ipf_peptidoform_m_score': x[x['ipf_peptidoform_pep'] == np.min(x['ipf_peptidoform_pep'])]['ipf_peptidoform_m_score'].values[0]})).reset_index(level='id') + + data = pd.merge(data, data_augmented, how='left', on='id') + + # Append transition-level quantities + if transition_quantification: + if check_sqlite_table(con, "SCORE_TRANSITION"): + idx_transition_query = ''' + CREATE INDEX IF NOT EXISTS idx_feature_transition_transition_id ON FEATURE_TRANSITION (TRANSITION_ID); + CREATE INDEX IF NOT EXISTS idx_transition_transition_id ON TRANSITION (ID); + + CREATE INDEX IF NOT EXISTS idx_feature_transition_transition_id_feature_id ON FEATURE_TRANSITION (TRANSITION_ID, FEATURE_ID); + CREATE INDEX IF NOT EXISTS idx_score_transition_transition_id_feature_id ON SCORE_TRANSITION (TRANSITION_ID, FEATURE_ID); + CREATE INDEX IF NOT EXISTS idx_feature_transition_feature_id ON FEATURE_TRANSITION (FEATURE_ID); + ''' + transition_query = ''' + SELECT FEATURE_TRANSITION.FEATURE_ID AS id, + GROUP_CONCAT(AREA_INTENSITY,';') AS aggr_Peak_Area, + GROUP_CONCAT(APEX_INTENSITY,';') AS aggr_Peak_Apex, + GROUP_CONCAT(TRANSITION.ID || "_" || TRANSITION.TYPE || TRANSITION.ORDINAL || "_" || TRANSITION.CHARGE,';') AS aggr_Fragment_Annotation + FROM FEATURE_TRANSITION + INNER JOIN TRANSITION ON FEATURE_TRANSITION.TRANSITION_ID = TRANSITION.ID + INNER JOIN SCORE_TRANSITION ON FEATURE_TRANSITION.TRANSITION_ID = SCORE_TRANSITION.TRANSITION_ID AND FEATURE_TRANSITION.FEATURE_ID = SCORE_TRANSITION.FEATURE_ID + WHERE TRANSITION.DECOY == 0 AND SCORE_TRANSITION.PEP < %s + GROUP BY FEATURE_TRANSITION.FEATURE_ID + ''' % max_transition_pep + else: + idx_transition_query = ''' + CREATE INDEX IF NOT EXISTS idx_feature_transition_transition_id ON FEATURE_TRANSITION (TRANSITION_ID); + CREATE INDEX IF NOT EXISTS idx_transition_transition_id ON TRANSITION (ID); + + CREATE INDEX IF NOT EXISTS idx_feature_transition_feature_id ON FEATURE_TRANSITION (FEATURE_ID); + ''' + transition_query = ''' + SELECT FEATURE_ID AS id, + GROUP_CONCAT(AREA_INTENSITY,';') AS aggr_Peak_Area, + GROUP_CONCAT(APEX_INTENSITY,';') AS aggr_Peak_Apex, + GROUP_CONCAT(TRANSITION.ID || "_" || TRANSITION.TYPE || TRANSITION.ORDINAL || "_" || TRANSITION.CHARGE,';') AS aggr_Fragment_Annotation + FROM FEATURE_TRANSITION + INNER JOIN TRANSITION ON FEATURE_TRANSITION.TRANSITION_ID = TRANSITION.ID + GROUP BY FEATURE_ID + ''' + click.echo("Info: Reading transition-level results.") + con.executescript(idx_transition_query) # Add indices + data_transition = pd.read_sql_query(transition_query, con) + data = pd.merge(data, data_transition, how='left', on=['id']) + + # Append concatenated protein identifier + click.echo("Info: Reading protein identifiers.") con.executescript(''' -CREATE INDEX IF NOT EXISTS idx_peptide_protein_mapping_protein_id ON PEPTIDE_PROTEIN_MAPPING (PROTEIN_ID); -CREATE INDEX IF NOT EXISTS idx_peptide_protein_mapping_peptide_id ON PEPTIDE_PROTEIN_MAPPING (PEPTIDE_ID); -CREATE INDEX IF NOT EXISTS idx_score_protein_protein_id ON SCORE_PROTEIN (PROTEIN_ID); -''') - data_protein_global = pd.read_sql_query(''' -SELECT PEPTIDE_ID AS id_peptide, - MIN(QVALUE) AS m_score_protein_global -FROM PEPTIDE_PROTEIN_MAPPING -INNER JOIN SCORE_PROTEIN ON PEPTIDE_PROTEIN_MAPPING.PROTEIN_ID = SCORE_PROTEIN.PROTEIN_ID -WHERE CONTEXT == 'global' -GROUP BY PEPTIDE_ID; -''', con) - if len(data_protein_global.index) > 0: - data = pd.merge(data, data_protein_global[data_protein_global['m_score_protein_global'] < max_global_protein_qvalue], how='inner', on=['id_peptide']) + CREATE INDEX IF NOT EXISTS idx_peptide_protein_mapping_protein_id ON PEPTIDE_PROTEIN_MAPPING (PROTEIN_ID); + CREATE INDEX IF NOT EXISTS idx_protein_protein_id ON PROTEIN (ID); + + CREATE INDEX IF NOT EXISTS idx_peptide_protein_mapping_peptide_id ON PEPTIDE_PROTEIN_MAPPING (PEPTIDE_ID); + ''') + data_protein = pd.read_sql_query(''' + SELECT PEPTIDE_ID AS id_peptide, + GROUP_CONCAT(PROTEIN.PROTEIN_ACCESSION,';') AS ProteinName + FROM PEPTIDE_PROTEIN_MAPPING + INNER JOIN PROTEIN ON PEPTIDE_PROTEIN_MAPPING.PROTEIN_ID = PROTEIN.ID + GROUP BY PEPTIDE_ID; + ''', con) + data = pd.merge(data, data_protein, how='inner', on=['id_peptide']) + + # Append peptide error-rate control + peptide_present = False + if peptide: + peptide_present = check_sqlite_table(con, "SCORE_PEPTIDE") + + if peptide_present and peptide: + click.echo("Info: Reading peptide-level results.") + data_peptide_run = pd.read_sql_query(''' + SELECT RUN_ID AS id_run, + PEPTIDE_ID AS id_peptide, + QVALUE AS m_score_peptide_run_specific + FROM SCORE_PEPTIDE + WHERE CONTEXT == 'run-specific'; + ''', con) + if len(data_peptide_run.index) > 0: + data = pd.merge(data, data_peptide_run, how='inner', on=['id_run','id_peptide']) + + data_peptide_experiment = pd.read_sql_query(''' + SELECT RUN_ID AS id_run, + PEPTIDE_ID AS id_peptide, + QVALUE AS m_score_peptide_experiment_wide + FROM SCORE_PEPTIDE + WHERE CONTEXT == 'experiment-wide'; + ''', con) + if len(data_peptide_experiment.index) > 0: + data = pd.merge(data, data_peptide_experiment, on=['id_run','id_peptide']) + + data_peptide_global = pd.read_sql_query(''' + SELECT PEPTIDE_ID AS id_peptide, + QVALUE AS m_score_peptide_global + FROM SCORE_PEPTIDE + WHERE CONTEXT == 'global'; + ''', con) + if len(data_peptide_global.index) > 0: + data = pd.merge(data, data_peptide_global[data_peptide_global['m_score_peptide_global'] < max_global_peptide_qvalue], on=['id_peptide']) + + # Append protein error-rate control + protein_present = False + if protein: + protein_present = check_sqlite_table(con, "SCORE_PROTEIN") + + if protein_present and protein: + click.echo("Info: Reading protein-level results.") + con.executescript(''' + CREATE INDEX IF NOT EXISTS idx_peptide_protein_mapping_protein_id ON PEPTIDE_PROTEIN_MAPPING (PROTEIN_ID); + CREATE INDEX IF NOT EXISTS idx_peptide_protein_mapping_peptide_id ON PEPTIDE_PROTEIN_MAPPING (PEPTIDE_ID); + CREATE INDEX IF NOT EXISTS idx_score_protein_protein_id ON SCORE_PROTEIN (PROTEIN_ID); + CREATE INDEX IF NOT EXISTS idx_score_protein_run_id ON SCORE_PROTEIN (RUN_ID); + ''') + data_protein_run = pd.read_sql_query(''' + SELECT RUN_ID AS id_run, + PEPTIDE_ID AS id_peptide, + MIN(QVALUE) AS m_score_protein_run_specific + FROM PEPTIDE_PROTEIN_MAPPING + INNER JOIN SCORE_PROTEIN ON PEPTIDE_PROTEIN_MAPPING.PROTEIN_ID = SCORE_PROTEIN.PROTEIN_ID + WHERE CONTEXT == 'run-specific' + GROUP BY RUN_ID, + PEPTIDE_ID; + ''', con) + if len(data_protein_run.index) > 0: + data = pd.merge(data, data_protein_run, how='inner', on=['id_run','id_peptide']) + + con.executescript(''' + CREATE INDEX IF NOT EXISTS idx_peptide_protein_mapping_protein_id ON PEPTIDE_PROTEIN_MAPPING (PROTEIN_ID); + CREATE INDEX IF NOT EXISTS idx_peptide_protein_mapping_peptide_id ON PEPTIDE_PROTEIN_MAPPING (PEPTIDE_ID); + CREATE INDEX IF NOT EXISTS idx_score_protein_protein_id ON SCORE_PROTEIN (PROTEIN_ID); + CREATE INDEX IF NOT EXISTS idx_score_protein_run_id ON SCORE_PROTEIN (RUN_ID); + ''') + data_protein_experiment = pd.read_sql_query(''' + SELECT RUN_ID AS id_run, + PEPTIDE_ID AS id_peptide, + MIN(QVALUE) AS m_score_protein_experiment_wide + FROM PEPTIDE_PROTEIN_MAPPING + INNER JOIN SCORE_PROTEIN ON PEPTIDE_PROTEIN_MAPPING.PROTEIN_ID = SCORE_PROTEIN.PROTEIN_ID + WHERE CONTEXT == 'experiment-wide' + GROUP BY RUN_ID, + PEPTIDE_ID; + ''', con) + if len(data_protein_experiment.index) > 0: + data = pd.merge(data, data_protein_experiment, how='inner', on=['id_run','id_peptide']) + + con.executescript(''' + CREATE INDEX IF NOT EXISTS idx_peptide_protein_mapping_protein_id ON PEPTIDE_PROTEIN_MAPPING (PROTEIN_ID); + CREATE INDEX IF NOT EXISTS idx_peptide_protein_mapping_peptide_id ON PEPTIDE_PROTEIN_MAPPING (PEPTIDE_ID); + CREATE INDEX IF NOT EXISTS idx_score_protein_protein_id ON SCORE_PROTEIN (PROTEIN_ID); + ''') + data_protein_global = pd.read_sql_query(''' + SELECT PEPTIDE_ID AS id_peptide, + MIN(QVALUE) AS m_score_protein_global + FROM PEPTIDE_PROTEIN_MAPPING + INNER JOIN SCORE_PROTEIN ON PEPTIDE_PROTEIN_MAPPING.PROTEIN_ID = SCORE_PROTEIN.PROTEIN_ID + WHERE CONTEXT == 'global' + GROUP BY PEPTIDE_ID; + ''', con) + if len(data_protein_global.index) > 0: + data = pd.merge(data, data_protein_global[data_protein_global['m_score_protein_global'] < max_global_protein_qvalue], how='inner', on=['id_peptide']) if outcsv: sep = "," @@ -428,7 +479,6 @@ def export_tsv(infile, outfile, format, outcsv, transition_quantification, max_t con.close() - def export_score_plots(infile): con = sqlite3.connect(infile) diff --git a/pyprophet/export_compound.py b/pyprophet/export_compound.py index dfab937..98e4b69 100644 --- a/pyprophet/export_compound.py +++ b/pyprophet/export_compound.py @@ -2,12 +2,65 @@ import sqlite3 from .data_handling import check_sqlite_table +from .data_handling import write_scores_sql_command from .report import plot_scores def export_compound_tsv(infile, outfile, format, outcsv, max_rs_peakgroup_qvalue): con = sqlite3.connect(infile) - if check_sqlite_table(con, "SCORE_MS1"): + + # output for merged but not scored pyprophet input + if (check_sqlite_table(con, "SCORE_MS1") is False and check_sqlite_table(con, "SCORE_MS2") is False): # No scoring performend + + score_sql = "" + + if (check_sqlite_table(con, "FEATURE_MS1")): + score_sql = write_scores_sql_command(con, score_sql, "FEATURE_MS1", "var_ms1_") + + if (check_sqlite_table(con, "FEATURE_MS2")): + score_sql = write_scores_sql_command(con, score_sql, "FEATURE_MS2", "var_ms2_") + + # remove last comma from sql statement, since a "FROM" is following + if (len(score_sql) > 0): + score_sql = ", " + score_sql # add comma at the beginning to fit to statement + score_sql = score_sql[:-2] # remove additional space and comma from the end of the string + + data = pd.read_sql_query(""" + SELECT + RUN.ID AS id_run, + COMPOUND.ID AS id_compound, + PRECURSOR.ID AS transition_group_id, + PRECURSOR.DECOY AS decoy, + RUN.ID AS run_id, + RUN.FILENAME AS filename, + FEATURE.EXP_RT AS RT, + FEATURE.EXP_RT - FEATURE.DELTA_RT AS assay_rt, + FEATURE.DELTA_RT AS delta_rt, + PRECURSOR.LIBRARY_RT AS assay_RT, + FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_RT, + FEATURE.ID AS id, + COMPOUND.SUM_FORMULA AS sum_formula, + COMPOUND.COMPOUND_NAME AS compound_name, + COMPOUND.ADDUCTS AS Adducts, + PRECURSOR.CHARGE AS Charge, + PRECURSOR.PRECURSOR_MZ AS mz, + FEATURE_MS2.AREA_INTENSITY AS Intensity, + FEATURE_MS1.AREA_INTENSITY AS aggr_prec_Peak_Area, + FEATURE_MS1.APEX_INTENSITY AS aggr_prec_Peak_Apex, + FEATURE.LEFT_WIDTH AS leftWidth, + FEATURE.RIGHT_WIDTH AS rightWidth + %s + FROM PRECURSOR + INNER JOIN PRECURSOR_COMPOUND_MAPPING ON PRECURSOR.ID = PRECURSOR_COMPOUND_MAPPING.PRECURSOR_ID + INNER JOIN COMPOUND ON PRECURSOR_COMPOUND_MAPPING.COMPOUND_ID = COMPOUND.ID + INNER JOIN FEATURE ON FEATURE.PRECURSOR_ID = PRECURSOR.ID + INNER JOIN RUN ON RUN.ID = FEATURE.RUN_ID + LEFT JOIN FEATURE_MS1 ON FEATURE_MS1.FEATURE_ID = FEATURE.ID + LEFT JOIN FEATURE_MS2 ON FEATURE_MS2.FEATURE_ID = FEATURE.ID + ORDER BY transition_group_id + """ % score_sql, con) + + elif check_sqlite_table(con, "SCORE_MS1"): # MS1 scoring performend data = pd.read_sql_query(""" SELECT RUN.ID AS id_run, @@ -47,7 +100,7 @@ def export_compound_tsv(infile, outfile, format, outcsv, max_rs_peakgroup_qvalue ORDER BY transition_group_id, peak_group_rank; """ % max_rs_peakgroup_qvalue, con) - else: + else: # MS2 or MS1MS2 scoring performend data = pd.read_sql_query(""" SELECT RUN.ID AS id_run, @@ -105,4 +158,3 @@ def export_compound_tsv(infile, outfile, format, outcsv, max_rs_peakgroup_qvalue data = data[['transition_group_id','sum_formula','compound_name', 'Adducts','filename','Intensity']] data = data.pivot_table(index=['transition_group_id','sum_formula','compound_name','Adducts'], columns='filename', values='Intensity') data.to_csv(outfile, sep=sep, index=True) - diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_compound_unscored.out b/tests/_regtest_outputs/test_pyprophet_export.test_compound_unscored.out new file mode 100644 index 0000000..42d4316 --- /dev/null +++ b/tests/_regtest_outputs/test_pyprophet_export.test_compound_unscored.out @@ -0,0 +1,14 @@ + Adducts Charge Intensity RT aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_RT assay_rt compound_name decoy delta_RT delta_rt filename id leftWidth mz rightWidth run_id sum_formula transition_group_id var_ms1_im_ms1_delta_score var_ms1_isotope_correlation_score var_ms1_isotope_overlap_score var_ms1_massdev_score var_ms1_mi_combined_score var_ms1_mi_contrast_score var_ms1_mi_score var_ms1_xcorr_coelution var_ms1_xcorr_coelution_combined var_ms1_xcorr_coelution_contrast var_ms1_xcorr_shape var_ms1_xcorr_shape_combined var_ms1_xcorr_shape_contrast var_ms2_bseries_score var_ms2_dotprod_score var_ms2_elution_model_fit_score var_ms2_im_delta_score var_ms2_im_xcorr_coelution var_ms2_im_xcorr_shape var_ms2_intensity_score var_ms2_isotope_correlation_score var_ms2_isotope_overlap_score var_ms2_library_corr var_ms2_library_dotprod var_ms2_library_manhattan var_ms2_library_rmsd var_ms2_library_rootmeansquare var_ms2_library_sangle var_ms2_log_sn_score var_ms2_manhattan_score var_ms2_massdev_score var_ms2_massdev_score_weighted var_ms2_mi_ratio_score var_ms2_mi_score var_ms2_mi_weighted_score var_ms2_norm_rt_score var_ms2_sonar_lag var_ms2_sonar_log_diff var_ms2_sonar_log_sn var_ms2_sonar_log_trend var_ms2_sonar_rsq var_ms2_sonar_shape var_ms2_xcorr_coelution var_ms2_xcorr_coelution_weighted var_ms2_xcorr_shape var_ms2_xcorr_shape_weighted var_ms2_yseries_score +0 M+H+ 1 6225.5100 112.742 73086.0000 314988.6293 117.983 117.9827 Methamidophos 0 -5.241 -5.2406 /Users/alka/Documents/work/projects/OpenSWATH_... 6629051171722998253 108.687 142.0086 121.853 3140099155998074833 C2H8NO2PS 0 NaN 0.9999 0.0014 5.1551 NaN NaN NaN 0.0000 16.5692 18.6667 0.9377 0.4422 0.2955 0.0 0.4198 NaN NaN NaN NaN 0.8171 0.9986 0.0 -0.4959 0.4383 1.4720 0.5386 0.5975 1.2820 2.8562 1.4708 0.6429 0.3704 NaN NaN NaN 5.5165e-03 NaN NaN NaN NaN NaN NaN 19.8653 15.4097 0.1667 0.0369 0.0 +1 M+H+ 1 9.1700 123.499 457.0000 680.6210 117.983 117.9830 Methamidophos 0 5.516 5.5160 /Users/alka/Documents/work/projects/OpenSWATH_... 5754119136871178291 121.853 142.0086 125.145 3140099155998074833 C2H8NO2PS 0 NaN 0.0000 0.3990 2.1159 NaN NaN NaN 4.1909 4.7088 4.8749 0.4609 0.2621 0.1932 0.0 0.1824 NaN NaN NaN NaN 0.0012 0.5902 0.0 -0.4959 0.4383 1.4720 0.5386 0.5975 1.2820 0.0000 1.6955 3.1363 1.8072 NaN NaN NaN 5.8063e-03 NaN NaN NaN NaN NaN NaN 4.9663 3.8524 0.1667 0.0369 0.0 +2 M+H+ 1 758.0860 117.364 15110.0000 49705.2486 117.983 117.9831 Methamidophos 0 -0.619 -0.6191 /Users/alka/Documents/work/projects/OpenSWATH_... 8877729345763666378 114.448 142.0086 121.854 2408145804652532658 C2H8NO2PS 0 NaN 0.0000 0.0017 6.1092 NaN NaN NaN 1.3367 9.3543 10.3531 0.8274 0.3707 0.2022 0.0 0.4061 NaN NaN NaN NaN 0.7938 0.9959 0.0 -0.4959 0.4383 1.4720 0.5386 0.5975 1.2820 3.6594 1.4708 1.7058 0.9830 NaN NaN NaN 6.5173e-04 NaN NaN NaN NaN NaN NaN 11.1742 8.6680 0.1667 0.0369 0.0 +3 M+H+ 1 198.6610 114.092 2967.0000 12507.2948 117.983 117.9832 Methamidophos 0 -3.891 -3.8912 /Users/alka/Documents/work/projects/OpenSWATH_... 85932679098247514 110.333 142.0086 119.385 1007350642398073598 C2H8NO2PS 0 NaN 0.9908 0.0090 9.8699 NaN NaN NaN 11.5249 12.9492 13.4061 0.4405 0.2372 0.1487 0.0 0.2990 NaN NaN NaN NaN 0.7852 0.8089 0.0 -0.4959 0.4383 1.4720 0.5386 0.5975 1.2820 2.5903 1.6034 3.4600 1.9938 NaN NaN NaN 4.0960e-03 NaN NaN NaN NaN NaN NaN 13.6574 10.5942 0.1667 0.0369 0.0 +4 M+H+ 1 165.0120 117.559 2930.0000 7454.3224 117.983 117.9831 Methamidophos 0 -0.424 -0.4241 /Users/alka/Documents/work/projects/OpenSWATH_... 8707546894788545451 115.270 142.0086 120.208 214379053049545951 C2H8NO2PS 0 NaN 0.0000 0.0103 1.2196 NaN NaN NaN 7.3341 8.2174 8.4327 0.4717 0.2578 0.1760 0.0 0.3276 NaN NaN NaN NaN 0.7501 0.9725 0.0 -0.4959 0.4383 1.4720 0.5386 0.5975 1.2820 3.3673 1.6283 2.0336 1.1719 NaN NaN NaN 4.4643e-04 NaN NaN NaN NaN NaN NaN 8.6911 6.7417 0.1667 0.0369 0.0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 M+Na+ 1 6.1690 461.710 62.0000 50.9950 475.718 475.7176 Isoprothiolane_decoy 1 -14.008 -14.0076 /Users/alka/Documents/work/projects/OpenSWATH_... 1964030463263715205 460.065 313.0539 463.356 6344784885616328495 C12H18O4S2 4 NaN 0.0000 0.0000 159.7169 NaN NaN NaN 3.7247 3.5925 3.4444 0.1667 0.1201 0.0579 0.0 0.4601 NaN NaN NaN NaN 0.0290 -0.4941 0.0 -0.5000 0.3184 1.5837 0.5991 0.6960 1.4453 0.0000 1.3431 6.0865 14.5563 NaN NaN NaN 1.4745e-02 NaN NaN NaN NaN NaN NaN 3.7247 2.9692 0.1667 0.0103 0.0 +96 M+Na+ 1 9.8760 472.007 0.0000 0.0000 475.718 475.7183 Isoprothiolane_decoy 1 -3.711 -3.7113 /Users/alka/Documents/work/projects/OpenSWATH_... 2859267736951343070 471.061 313.0539 472.707 5621422513449725403 C12H18O4S2 4 NaN -0.3997 0.0000 29.1235 NaN NaN NaN 2.0000 2.3868 2.0000 0.0000 0.1190 0.0000 0.0 0.5684 NaN NaN NaN NaN 0.0606 -0.3470 0.0 -0.9897 0.4499 1.1673 0.5315 0.5647 1.3934 1.4663 1.0597 13.0114 25.3056 NaN NaN NaN 3.9066e-03 NaN NaN NaN NaN NaN NaN 2.1499 1.9383 0.4167 0.0308 0.0 +97 M+Na+ 1 21.3980 468.593 39.4751 51.0260 475.718 475.7180 Isoprothiolane_decoy 1 -7.125 -7.1250 /Users/alka/Documents/work/projects/OpenSWATH_... 7551648122635593597 466.124 313.0539 471.061 5621422513449725403 C12H18O4S2 4 NaN 0.9682 0.6667 56.4466 NaN NaN NaN 8.6911 8.5100 8.5556 0.1667 0.1175 0.0519 0.0 0.5610 NaN NaN NaN NaN 0.1313 0.5428 0.0 1.0000 0.8929 0.8327 0.1352 0.1434 0.1780 0.0000 1.0697 13.8422 17.2104 NaN NaN NaN 7.5000e-03 NaN NaN NaN NaN NaN NaN 8.6911 2.5514 0.1667 0.6355 0.0 +98 M+Na+ 1 9.0475 486.697 0.0000 0.0000 475.718 475.7184 Isoprothiolane_decoy 1 10.979 10.9786 /Users/alka/Documents/work/projects/OpenSWATH_... 1499988340730146690 485.051 313.0539 488.342 5621422513449725403 C12H18O4S2 4 NaN 0.0000 0.0000 159.7169 NaN NaN NaN 5.0000 5.8530 5.0000 0.0000 0.0476 0.0000 0.0 0.4831 NaN NaN NaN NaN 0.0555 -0.4200 0.0 1.0000 0.8929 0.8327 0.1352 0.1434 0.1780 1.2993 1.1348 24.0979 12.6527 NaN NaN NaN 1.1556e-02 NaN NaN NaN NaN NaN NaN 6.2079 1.8224 0.1667 0.6355 0.0 +99 M+Na+ 1 44.4405 475.691 3081.0000 8395.4228 475.718 475.7179 Isoprothiolane_decoy 1 -0.027 -0.0269 /Users/alka/Documents/work/projects/OpenSWATH_... 1898773645914670705 473.231 313.0539 478.991 5621422513449725403 C12H18O4S2 4 NaN 0.2104 0.0000 0.3133 NaN NaN NaN 0.0000 5.3096 5.9138 0.8544 0.5499 0.3979 0.0 0.5071 NaN NaN NaN NaN 0.2726 -0.2917 0.0 0.9245 0.9266 0.4163 0.1176 0.1252 0.2708 0.0000 1.0414 58.9369 65.1163 NaN NaN NaN 2.8275e-05 NaN NaN NaN NaN NaN NaN 7.3364 1.5093 0.4733 0.7816 0.0 + +[100 rows x 67 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_unscored.out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_unscored.out new file mode 100644 index 0000000..3055576 --- /dev/null +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_unscored.out @@ -0,0 +1,14 @@ + Charge Intensity RT aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_RT assay_rt decoy delta_RT delta_rt filename id leftWidth mz rightWidth run_id transition_group_id var_ms1_isotope_correlation_score var_ms1_isotope_overlap_score var_ms1_massdev_score var_ms1_xcorr_coelution var_ms1_xcorr_shape var_ms2_bseries_score var_ms2_dotprod_score var_ms2_elution_model_fit_score var_ms2_intensity_score var_ms2_isotope_correlation_score var_ms2_isotope_overlap_score var_ms2_library_corr var_ms2_library_dotprod var_ms2_library_manhattan var_ms2_library_rmsd var_ms2_library_rootmeansquare var_ms2_library_sangle var_ms2_log_sn_score var_ms2_manhattan_score var_ms2_massdev_score var_ms2_massdev_score_weighted var_ms2_norm_rt_score var_ms2_sonar_lag var_ms2_sonar_log_diff var_ms2_sonar_log_sn var_ms2_sonar_log_trend var_ms2_sonar_rsq var_ms2_sonar_shape var_ms2_xcorr_coelution var_ms2_xcorr_coelution_weighted var_ms2_xcorr_shape var_ms2_xcorr_shape_weighted var_ms2_yseries_score +0 2 207283.0 2661.55 117220.7482 854645.0 26.5 2595.5788 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz -4409520928686189639 2640.5100 728.8795 2705.3701 -8670811102654834151 0 0.9835 0.1247 1.3707 0.0000 0.9907 9.0 0.7708 NaN 0.7811 0.9962 0.0000 0.9987 0.9978 0.0659 0.0239 0.0262 0.0725 4.7388 0.7451 0.3398 0.1793 0.0194 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9936 0.9958 11.0 +1 2 6385.0 2605.74 8790.7812 104006.0 26.5 2595.5733 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 260819276075322832 2575.6399 728.8795 2623.4399 -8670811102654834151 0 0.9555 0.2667 5.4202 5.1430 0.6532 2.0 0.7610 NaN 0.0241 0.9216 0.1104 0.8271 0.9764 0.2223 0.0995 0.1102 0.3579 1.3130 0.7675 4.5391 3.5103 0.0032 NaN NaN NaN NaN NaN NaN 7.0474 2.3104 0.7806 0.8341 6.0 +2 2 5180.0 2832.77 10419.7435 241873.0 26.5 2595.5778 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 5163914660633416481 2811.2000 728.8795 2855.5801 -8670811102654834151 0 0.6123 0.4707 8.9907 4.0083 0.5985 2.0 0.7923 NaN 0.0195 0.8418 0.0911 0.9916 0.9960 0.0958 0.0387 0.0426 0.1243 0.6699 0.6863 4.7328 2.9948 0.0690 NaN NaN NaN NaN NaN NaN 4.3568 2.0950 0.6909 0.6974 6.0 +3 2 2693.0 2795.06 4036.5600 25862.3 26.5 2595.5754 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 6932937885234622359 2790.7200 728.8795 2811.2000 -8670811102654834151 0 0.1872 2.4435 1.8505 4.0083 0.6422 4.0 0.7883 NaN 0.0101 0.6804 0.1794 0.4554 0.9481 0.3084 0.1494 0.1882 0.6202 0.6284 0.6986 5.4811 3.8885 0.0581 NaN NaN NaN NaN NaN NaN 1.6487 0.9186 0.7955 0.7971 6.0 +4 2 3838.0 2708.53 5750.4716 73215.2 26.5 2595.5750 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 8534214264242363560 2705.3701 728.8795 2736.0901 -8670811102654834151 0 -0.3692 0.7498 7.1610 6.7500 0.4827 3.0 0.8181 NaN 0.0145 0.7660 0.1334 0.8344 0.9736 0.2367 0.1055 0.1166 0.3772 0.6034 0.6468 2.5636 1.1471 0.0330 NaN NaN NaN NaN NaN NaN 3.4656 0.9347 0.6790 0.7379 5.0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 10959.0 2538.15 10810.2698 56553.9 16.3 2245.2307 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz -5430403952310232561 2520.5100 612.3184 2544.4099 -8670811102654834151 19 0.7016 0.5650 8.3246 1.8165 0.5954 2.0 0.6942 NaN 0.0121 0.4590 0.6792 -0.1524 0.9216 0.3502 0.1629 0.1915 0.6979 0.6856 0.7628 4.0046 4.4541 0.0856 NaN NaN NaN NaN NaN NaN 1.4718 0.6660 0.7586 0.6855 6.0 +96 2 20746.0 2120.97 10568.0806 85676.6 16.3 2245.2373 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz -610141049182829192 2100.6001 612.3184 2141.5701 -8670811102654834151 19 0.5016 0.9312 12.3692 1.8165 0.6723 4.0 0.7176 NaN 0.0230 0.6551 0.4036 0.5606 0.9754 0.2021 0.0921 0.1010 0.3719 0.7732 0.7428 4.7941 2.9011 0.0354 NaN NaN NaN NaN NaN NaN 2.5491 1.1289 0.8015 0.7928 5.0 +97 2 48058.0 2291.53 6480.8607 75465.1 16.3 2245.2345 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 2043199813358518344 2267.8799 612.3184 2298.6101 -8670811102654834151 19 0.7397 3.0841 3.4962 5.3116 0.4407 5.0 0.6916 NaN 0.0533 0.7474 0.0866 -0.3525 0.8138 0.5478 0.2791 0.3394 1.0605 1.2194 0.8070 2.2562 2.3997 0.0141 NaN NaN NaN NaN NaN NaN 4.4101 1.9926 0.7236 0.7141 7.0 +98 2 16553.0 2317.38 9656.7598 89588.0 16.3 2245.2345 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 6262215160571261022 2302.0200 612.3184 2332.7400 -8670811102654834151 19 0.4212 0.8821 2.1071 7.7080 0.4296 4.0 0.7153 NaN 0.0183 0.0829 0.2014 -0.0107 0.9622 0.2794 0.1337 0.1370 0.5120 0.2548 0.7596 1.9435 1.6447 0.0216 NaN NaN NaN NaN NaN NaN 0.7830 0.1896 0.7600 0.7459 7.0 +99 2 597887.0 2230.18 269150.5777 1192530.0 16.3 2245.2318 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 6870255268859409918 2213.2600 612.3184 2247.3999 -8670811102654834151 19 0.9939 0.1300 2.0203 0.0000 0.9990 8.0 0.7684 NaN 0.6626 0.9928 0.0000 0.8658 0.9884 0.1569 0.0719 0.0725 0.2613 4.7826 0.7659 1.6355 1.5429 0.0037 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9992 0.9993 8.0 + +[100 rows x 50 columns] diff --git a/tests/test_pyprophet_export.py b/tests/test_pyprophet_export.py index 60bd4fa..7b393f3 100644 --- a/tests/test_pyprophet_export.py +++ b/tests/test_pyprophet_export.py @@ -60,6 +60,17 @@ def _run_osw(regtest, temp_folder, transition_quantification=False, peptide=Fals print(pd.read_csv("test_data.tsv", sep="\t", nrows=100).sort_index(axis=1),file=regtest) +def _run_osw_unscored(regtest, temp_folder, transition_quantification=False, peptide=False, protein=False): + os.chdir(temp_folder) + data_path = os.path.join(DATA_FOLDER, "test_data.osw") + shutil.copy(data_path, temp_folder) + + # export + cmdline = "pyprophet export --in=test_data.osw --out=test_data.tsv --format=legacy_merged" + + stdout = _run_cmdline(cmdline) + + print(pd.read_csv("test_data.tsv", sep="\t", nrows=100).sort_index(axis=1),file=regtest) def _run_ipf(regtest, temp_folder, transition_quantification=False, ipf="disable"): os.chdir(temp_folder) @@ -88,14 +99,24 @@ def _run_ipf(regtest, temp_folder, transition_quantification=False, ipf="disable stdout = _run_cmdline(cmdline) print(pd.read_csv("test_data.tsv", sep="\t", nrows=100).sort_index(axis=1),file=regtest) + +def _run_compound_unscored(regtest, temp_folder): + os.chdir(temp_folder) + data_path= os.path.join(DATA_FOLDER, "test_data_compound.osw") + shutil.copy(data_path, temp_folder) + + # export + cmdline = "pyprophet export-compound --in=test_data_compound.osw --out=test_data_compound_unscored.tsv --format=legacy_merged" + stdout = _run_cmdline(cmdline) + print(pd.read_csv("test_data_compound_unscored.tsv", sep="\t", nrows=100).sort_index(axis=1),file=regtest) def _run_compound_ms1(regtest, temp_folder): os.chdir(temp_folder) data_path= os.path.join(DATA_FOLDER, "test_data_compound.osw") shutil.copy(data_path, temp_folder) - # MS2-level + # MS1-level cmdline = "pyprophet score --in=test_data_compound.osw --level=ms1 --test" # export @@ -109,7 +130,7 @@ def _run_compound_ms2(regtest, temp_folder): data_path= os.path.join(DATA_FOLDER, "test_data_compound.osw") shutil.copy(data_path, temp_folder) - # MS1-level + # MS2-level cmdline = "pyprophet score --in=test_data_compound.osw --level=ms2 --test" # export @@ -149,3 +170,9 @@ def test_compound_0(tmpdir, regtest): def test_compound_1(tmpdir, regtest): _run_compound_ms2(regtest, tmpdir.strpath) + +def test_compound_unscored(tmpdir, regtest): + _run_compound_unscored(regtest, tmpdir.strpath) + +def test_osw_unscored(tmpdir, regtest): + _run_osw_unscored(regtest, tmpdir.strpath)