In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import sys
from pathlib import Path

from PySide2.QtWidgets import QFileDialog, QApplication
from PySide2.QtCore import Qt

from corems.mass_spectrum.input.massList import ReadMassList
from corems.molecular_id.search.priorityAssignment import OxygenPriorityAssignment


### Get the full path of the file using Pyside2 
Install Pyside2 separately. It is not a requirement from CoreMS, therefore you need to install it using:
- pip install PySide2, or
- pip3 install PySide2, or
- python -m install PySide2

In [3]:
app = QApplication(sys.argv)

In [4]:
file_dialog = QFileDialog()
file_dialog.setWindowFlags(Qt.WindowStaysOnTopHint)

file_location = file_dialog.getOpenFileName()[0]
app.quit()

### Load the mass spectrum data from a text mass list in centroid mode (default behaviour)

In [5]:
mass_spectrum = ReadMassList(file_location).get_mass_spectrum(polarity=-1)
print(mass_spectrum)

<corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid object at 0x0000021BF340E948>


### Mass error settings

In [6]:
mass_spectrum.molecular_search_settings.error_method = 'None'
mass_spectrum.molecular_search_settings.min_mz_error = -1
mass_spectrum.molecular_search_settings.max_mz_error = 1

## Oxygen prioritization approach
The oxygen priority assignment algorithm automatically finds the best oxygen series across a broad m/z range (for each m/z 14 Da) and use this series as the reference for a top-down priority assignment. 
The steps are: 
1. Sets Kendrick base to C1H2O1
2. Kendrick filter using DBSCAN
3. Identify the most abundant peak 
4. Search for molecular formulae candidates for classes Oo, then OoSs, OoNn, 
5. Stop the search on the first candidate found, or store all then select the best candidate based on mass error, or another metric 
6. "Walks" in 14 Da increments in ascending and descending m/z order, pick the most abundant peak, and repeat steps 3, 4, and 5.
7. Sort identified serie by the reverse order for peak height
8. Use heteroatom class order for an assignment of all peaks
9. Look for non-oxygen classes, i.e Nn, NnOo


In [7]:
mass_spectrum.molecular_search_settings.usedAtoms['C'] = (1,90)
mass_spectrum.molecular_search_settings.usedAtoms['H'] = (4,200)
mass_spectrum.molecular_search_settings.usedAtoms['O'] = (0,20)
mass_spectrum.molecular_search_settings.usedAtoms['N'] = (0,0)
mass_spectrum.molecular_search_settings.usedAtoms['S'] = (0,0)
mass_spectrum.molecular_search_settings.usedAtoms['Cl'] = (0,1)

mass_spectrum.molecular_search_settings.isProtonated = True
mass_spectrum.molecular_search_settings.isRadical= False
mass_spectrum.molecular_search_settings.isAdduct = True

In [8]:
OxygenPriorityAssignment(mass_spectrum).run()


Started Find Oxygen Peaks series
Estimated number of clusters: 9
Estimated number of noise points: 180
199097933.52905455 1147469824.0
min_mz 144.86974130000002
max_mz 680.3056276000001
Finished Find Oxygen Peaks series


TypeError: sort_by_abundance() got an unexpected keyword argument 'reverse'

### Export data to CSV 
No need to change the filename suffix, the function "to_csv()" convert the suffix to .csv automatically

In [14]:
output_file = file_location.split(".txt")[0]

output_file = output_file + "_Oxygen_Priority_Lowest_Error_no_N"

print(output_file)

mass_spectrum.to_csv(output_file)

C:/Users/eber373/Desktop/Data/Fendorf_DISHR_15_14_A_19_Rep1_14Nov19_AlderInf_IATp1_1_01_49979_Oxygen_Priority_Lowest_Error_no_N
