In [1]:
#
# Import libraries
#

import os
import pandas as pd
from dotmap import DotMap
import requests
import json
import numpy as np

In [2]:
modes = ['cp', 'cn', 'hp', 'hn']
f2i_path = r"S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Data\Metabolomics\PESA\OriginalFiles\RBR_f2i.xlsx"
f2i = DotMap({
    'cp': pd.read_excel(f2i_path, sheet_name='C18P'),
    'cn': pd.read_excel(f2i_path, sheet_name='C18N'),
    'hp': pd.read_excel(f2i_path, sheet_name='HILP'),
    'hn': pd.read_excel(f2i_path, sheet_name='HILN')
})

In [3]:
#
# Obtain feature data for each CMM table and run TurboPutative
#

xm = pd.read_csv(
    r"S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Data\Metabolomics\PESA\WorkingFiles\Xm_norm.tsv", 
    sep='\t').set_index('Seqn')

xm.index = [f'seqn_{i}' for i in xm.index]

In [4]:
#
# Adducts
#

posAdd = ["M+H", "M+2H", "M+Na", "M+K", "M+H-H2O", "M+H+HCOONa"]
negAdd = ["M-H", "M-2H", "M+Cl", "M-H-H20", "M-H+HCOONa", "M+Na-2H", "M+HCOOH-H"]

In [5]:
cmm = DotMap()

uri = "http://ceumass.eps.uspceu.es/mediator/api/v3/batch"

# C18P
res = requests.post(
    uri, headers={'Content-Type': 'application/json; charset=utf-8'}, 
    json={
    "metabolites_type": "all-except-peptides",
    "databases": ["all-except-mine"],
    "masses_mode": "mz",
    "ion_mode": "positive",
    "adducts": ['all'],
    "tolerance": 10.0,
    "tolerance_mode": "ppm",
    "masses": f2i.cp['Apex m/z'][np.isin(f2i.cp.fid, xm.columns)].to_list()
    }
)
cmm.cp = pd.DataFrame(res.json()['results'])

# # HILP
res = requests.post(
    uri, headers={'Content-Type': 'application/json; charset=utf-8'}, 
    json={
    "metabolites_type": "all-except-peptides",
    "databases": ["all-except-mine"],
    "masses_mode": "mz",
    "ion_mode": "positive",
    "adducts": ['all'],
    "tolerance": 10.0,
    "tolerance_mode": "ppm",
    "masses": f2i.hp['Apex m/z'][np.isin(f2i.hp.fid, xm.columns)].to_list()
    }
)
cmm.hp = pd.DataFrame(res.json()['results'])

# # C18N
res = requests.post(
    uri, headers={'Content-Type': 'application/json; charset=utf-8'}, 
    json={
    "metabolites_type": "all-except-peptides",
    "databases": ["all-except-mine"],
    "masses_mode": "mz",
    "ion_mode": "negative",
    "adducts": ['all'],
    "tolerance": 10.0,
    "tolerance_mode": "ppm",
    "masses": f2i.cn['Apex m/z'][np.isin(f2i.cn.fid, xm.columns)].to_list()
    }
)
cmm.cn = pd.DataFrame(res.json()['results'])

# HILN
res = requests.post(
    uri, headers={'Content-Type': 'application/json; charset=utf-8'}, 
    json={
    "metabolites_type": "all-except-peptides",
    "databases": ["all-except-mine"],
    "masses_mode": "mz",
    "ion_mode": "negative",
    "adducts": ['all'],
    "tolerance": 10.0,
    "tolerance_mode": "ppm",
    "masses": f2i.hn['Apex m/z'][np.isin(f2i.hn.fid, xm.columns)].to_list()
    }
)
cmm.hn = pd.DataFrame(res.json()['results'])

In [6]:
cmm.cp = cmm.cp[np.isin(cmm.cp.adduct, posAdd)]
cmm.hp = cmm.hp[np.isin(cmm.hp.adduct, posAdd)]

cmm.cn = cmm.cn[np.isin(cmm.cn.adduct, negAdd)]
cmm.hn = cmm.hn[np.isin(cmm.hn.adduct, negAdd)]

In [7]:
# Adapt columns to TP

for i in modes: 
    cmm[i] = cmm[i].rename(columns={
        'identifier': 'Identifier',
        'EM': 'Experimental mass',
        'adduct': 'Adduct',
        'error_ppm': 'mz Error (ppm)',
        'molecular_weight': 'Molecular Weight',
        'name': 'Name',
        'formula': 'Formula'
    })

In [8]:
# Identifiers to be removed
remid = [188282]

In [9]:
for i in modes:
    cmm[i][~np.isin(cmm[i].Identifier, remid)].to_csv(f'CMM_{i}.tsv', sep='\t', index=False)

In [10]:
for i in modes:
    f2i[i].set_index('fid')[['Apex m/z', 'RT [min]']]\
        .join(xm.T, how='inner')\
            .reset_index(names='FeatureInfo_Name')\
                .to_csv(f'FInfo_{i}.tsv', sep='\t', index=False)