diff --git a/src/pyOpenMS/pyopenms/dataframes.py b/src/pyOpenMS/pyopenms/dataframes.py index 6405038048f..73f48aadfe9 100644 --- a/src/pyOpenMS/pyopenms/dataframes.py +++ b/src/pyOpenMS/pyopenms/dataframes.py @@ -293,20 +293,26 @@ def get_key(val): cv.loadFromOBO("psims", File.getOpenMSDataPath() + "/CV/psi-ms.obo") clearMVs = [cv.getTerm(m).name if m.startswith("MS:") else m for m in decodedMVs] #cols = ["id", "RT", "mz", "score", "charge"] + decodedMVs - clearcols = ["id", "RT", "mz", mainscorename, "charge"] + clearMVs - coltypes = ['U100', 'f', 'f', 'f', 'i'] + types + clearcols = ["id", "RT", "mz", mainscorename, "charge", "protein_accession", "start", "end"] + clearMVs + coltypes = ['U100', 'f', 'f', 'f', 'i','U1000', 'U1000', 'U1000'] + types dt = list(zip(clearcols, coltypes)) def extract(pep): hits = pep.getHits() if not hits: if export_unidentified: - return tuple(pep.getIdentifier().encode('utf-8'), pep.getRT(), pep.getMZ(), default_missing_values[float], default_missing_values[int], *dmv) + return (pep.getIdentifier().encode('utf-8'), pep.getRT(), pep.getMZ(), default_missing_values[float], default_missing_values[int], + default_missing_values[str], default_missing_values[str], default_missing_values[str], *dmv) else: return besthit = hits[0] - ret = [pep.getIdentifier().encode('utf-8'), pep.getRT(), pep.getMZ(), besthit.getScore(), besthit.getCharge()] + ret = [pep.getIdentifier().encode('utf-8'), pep.getRT(), pep.getMZ(), besthit.getScore(), besthit.getCharge()] + # add accession, start and end positions of peptide evidences as comma separated str (like in mzTab) + evs = besthit.getPeptideEvidences() + ret += [','.join(v) if v else default_missing_values[str] for v in ([e.getProteinAccession() for e in evs], + [str(e.getStart()) for e in evs], + [str(e.getEnd()) for e in evs])] for k in metavals: if besthit.metaValueExists(k): val = besthit.getMetaValue(k) diff --git a/src/pyOpenMS/tests/unittests/test000.py b/src/pyOpenMS/tests/unittests/test000.py index 208bf187c9b..6a10111f1f7 100644 --- a/src/pyOpenMS/tests/unittests/test000.py +++ b/src/pyOpenMS/tests/unittests/test000.py @@ -2255,12 +2255,31 @@ def test_peptide_identifications_to_df(): h.setCharge(2) h.setMetaValue("StringMetaValue", "Value") h.setMetaValue("IntMetaValue", 2) + e1 = pyopenms.PeptideEvidence() + e1.setProteinAccession("sp|Accession1") + e1.setStart(123) + e1.setEnd(141) + e2 = pyopenms.PeptideEvidence() + e2.setProteinAccession("sp|Accession2") + e2.setStart(12) + e2.setEnd(24) + h.setPeptideEvidences([e1, e2]) p.insertHit(h) peps.append(p) - peps.append(p) - assert pyopenms.peptide_identifications_to_df(peps).shape == (2,7) + p1 = pyopenms.PeptideIdentification() + p1.setRT(1243.56) + p1.setMZ(240.0) + p1.setScoreType("ScoreType") + p1.setHigherScoreBetter(False) + p1.setIdentifier("IdentificationRun2") + + peps.append(p1) + + assert pyopenms.peptide_identifications_to_df(peps).shape == (2,10) + assert pyopenms.peptide_identifications_to_df(peps)['protein_accession'][0] == 'sp|Accession1,sp|Accession2' + assert pyopenms.peptide_identifications_to_df(peps, export_unidentified=False).shape == (1,10) @report def testPepXMLFile():