In [35]:
import numpy as np
import pandas as pd
from metaspace.sm_annotation_utils import SMInstance

In [36]:
# return the mass values of an annotations (+isotopes) for a particular METASPACE dataset
# min_annot is the minimum number of annotations required to consider a dataset
def retrieve_metaspace_msi_mz(msi_id,min_annot=15):
    msi = sm.dataset(id=msi_id)
    total_annot = [] 
    for db in msi.database_details:
        annot_df = msi.results(database=(db['id']))
        
        if annot_df.shape[0] >0:
            annot_df = annot_df['isotopeImages']
            if annot_df.shape[0] > min_annot:
                for i in range(0,annot_df.shape[0]):
                    for j in range(0,3): # first 3 isotopes
                        total_annot.append(np.around(annot_df[i][j]['mz'], decimals=4))
                        
    return np.unique(np.asarray(total_annot))

# return unique mass values from multiple METASPACE datasets
def unique_mz_over_msi_set(df):
    full_annot_count = {}
    for i in range(0,len(df.index)):
        msi_id = df.index[i]
        peaks = retrieve_metaspace_msi_mz(msi_id,min_annot=15)
        for j in range(0,np.size(peaks,0)):
            if full_annot_count.get(peaks[j]) != None:
                full_annot_count[peaks[j]] += 1
            else:
                full_annot_count[peaks[j]] = 1
    return full_annot_count

def return_peaks_list_from_dict(peaks_dict,df,annot_perct=0.05):
    final_peak_list = []
    for key in peaks_dict:
        if peaks_dict[key] >len(df.index)*annot_perct:
            final_peak_list.append(key)
    return final_peak_list

In [37]:
sm = SMInstance()
metadata = sm.get_metadata()
metadata.columns # available metadata

Index(['Data_Type', 'Sample_Information.Organism',
       'Sample_Information.Organism_Part', 'Sample_Information.Condition',
       'Sample_Information.Sample_Growth_Conditions',
       'Sample_Preparation.Sample_Stabilisation',
       'Sample_Preparation.Tissue_Modification',
       'Sample_Preparation.MALDI_Matrix',
       'Sample_Preparation.MALDI_Matrix_Application',
       'Sample_Preparation.Solvent', 'MS_Analysis.Polarity',
       'MS_Analysis.Ionisation_Source', 'MS_Analysis.Analyzer',
       'MS_Analysis.Detector_Resolving_Power.Resolving_Power',
       'MS_Analysis.Detector_Resolving_Power.mz',
       'MS_Analysis.Pixel_Size.Xaxis', 'MS_Analysis.Pixel_Size.Yaxis',
       'Additional_Information.Supplementary',
       'Submitted_By.Submitter.First_Name', 'Submitted_By.Submitter.Email',
       'Submitted_By.Submitter.Surname',
       'Submitted_By.Principal_Investigator.First_Name',
       'Submitted_By.Principal_Investigator.Email',
       'Submitted_By.Principal_Investigator

In [38]:
# Example of how to generate the internal calibrating ions according to similar public MSI datasets
# In this case the MSI are similar by their aquisition mode 'Positive' and organism part 'Brain'

msi_subset = metadata.loc[(metadata['Sample_Information.Organism_Part'] == "Brain") & 
             (metadata['MS_Analysis.Polarity'] == "Positive") ]
peaks_dict = unique_mz_over_msi_set(msi_subset)

# annot_perct is the minimum frequency over the selected METASPACE datasets required to keep a mass value
peaks_list = return_peaks_list_from_dict(peaks_dict,msi_subset,annot_perct=0.1)