# **Create-MzML**

Create small `.mzML` with signals from the precursor `AFVDFLSDEIK2`

In [1]:
import pandas as pd
import polars as pl
import pyopenms as po

#### **Load Results**

In [2]:
rslts = pl.scan_csv("../osw/pyprophet_export_25pc_50ng.tsv", sep='\t')
rslts = rslts.filter(pl.col('decoy') == 0).filter(pl.col('filename') == '20190816_TIMS05_MA_FlMe_diaPASEF_25pc_50ng_A2_1_26.d_mergeFiles.mzML').filter(pl.col('m_score') < 0.01).collect().to_pandas()

---

## **Create Test File**

Create test file for peptide `AFVDFLSDEIK2`

In [3]:
exp = po.OnDiscMSExperiment()

In [4]:
exp.openFile("20190816_TIMS05_MA_FlMe_diaPASEF_25pc_50ng_A2_1_26.mzML")

True

Get the peak boundaries so know where to filter.

In [5]:
rslts[(rslts['FullPeptideName'] == 'AFVDFLSDEIK') & (rslts['Charge'] == 2)][['filename','leftWidth', 'rightWidth', 'mz']].values

array([['20190816_TIMS05_MA_FlMe_diaPASEF_25pc_50ng_A2_1_26.d_mergeFiles.mzML',
        6235.8486328125, 6248.42822265625, 642.3295],
       ['20190816_TIMS05_MA_FlMe_diaPASEF_25pc_50ng_A2_1_26.d_mergeFiles.mzML',
        6255.64599609375, 6266.51513671875, 642.3295]], dtype=object)

In [6]:
exp_meta = exp.getMetaData()

In [7]:
expOut = po.MSExperiment()

rtMin = 6225
rtMax = 6270
for s in exp_meta:
    nativeId = s.getNativeID()
    rt = s.getRT()
    mz = s.getPrecursors()[0].getMZ()
    low = s.getPrecursors()[0].getIsolationWindowLowerOffset()
    if rt > rtMin and rt < rtMax:
        if s.getMSLevel() == 1:
            print("adding ms1")
            expOut.addSpectrum(exp.getSpectrumByNativeId(nativeId))
        else:
            if mz == 625.0: # window is 600-650
                print("found!!")
                print(rt, mz)
                expOut.addSpectrum(exp.getSpectrumByNativeId(nativeId))

adding ms1
found!!
6225.11081679484 625.0
adding ms1
found!!
6226.89773809107 625.0
adding ms1
found!!
6228.68570364494 625.0
adding ms1
found!!
6230.47519995558 625.0
adding ms1
found!!
6232.26158460719 625.0
adding ms1
found!!
6234.0534788102 625.0
adding ms1
found!!
6235.84861018256 625.0
adding ms1
found!!
6237.64613563505 625.0
adding ms1
found!!
6239.43720522349 625.0
adding ms1
found!!
6241.24574655304 625.0
adding ms1
found!!
6243.03476926748 625.0
adding ms1
found!!
6244.82797253146 625.0
adding ms1
found!!
6246.62867474413 625.0
adding ms1
found!!
6248.42832918019 625.0
adding ms1
found!!
6250.22954838984 625.0
adding ms1
found!!
6252.03127198678 625.0
adding ms1
found!!
6253.83602161421 625.0
adding ms1
found!!
6255.64580924956 625.0
adding ms1
found!!
6257.45004012053 625.0
adding ms1
found!!
6259.2927545687 625.0
adding ms1
found!!
6261.10140645291 625.0
adding ms1
found!!
6262.90909501605 625.0
adding ms1
found!!
6264.71157343443 625.0
adding ms1
found!!
6266.51513569899 

In [8]:
mzmlOut = po.MzMLFile()

opt = mzmlOut.getOptions()

cfg = po.NumpressConfig()
cfg.estimate_fixed_point = True
cfg.numpressErrorTolerance = -1.0 # skip check, faster
cfg.setCompression(b"linear");
cfg.linear_fp_mass_acc = -1; # set the desired RT accuracy in seconds
opt.setNumpressConfigurationMassTime(cfg)
cfg = po.NumpressConfig()
cfg.estimate_fixed_point = True
cfg.numpressErrorTolerance = -1.0 # skip check, faster
cfg.setCompression(b"slof");
opt.setNumpressConfigurationIntensity(cfg)
opt.setCompression(True) # zlib compression

# Now also try to compress float data arrays (this is not enabled in all
# versions of pyopenms).
try:
    cfg = po.NumpressConfig()
    cfg.estimate_fixed_point = True
    cfg.numpressErrorTolerance = -1.0 # skip check, faster
    cfg.setCompression(b"slof");
    opt.setNumpressConfigurationFloatDataArray(cfg)
except Exception:
    pass

mzmlOut.setOptions(opt)

mzmlOut.store('ionMobilityTest.mzML', expOut)