In [1]:
#
# Import libraries
#

import os
import pandas as pd
from dotmap import DotMap
import requests
import json
import numpy as np
from tqdm import tqdm
import time

In [2]:
modes = ['cp', 'cn']
f2i_path = r"S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Data\Metabolomics\ALDH4\OriginalFiles\f2i.xlsx"
f2i = DotMap({
    'cp': pd.read_excel(f2i_path, sheet_name='C18P').set_index('fid'),
    'cn': pd.read_excel(f2i_path, sheet_name='C18N').set_index('fid'),
})

In [3]:
#
# Obtain feature data for each CMM table and run TurboPutative
#

xm = pd.read_csv(
    r"S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Data\Metabolomics\ALDH4\WorkingFiles\Xm_norm.tsv", 
    sep='\t', index_col=0)

xm.index = [f'seqn_{i}' for i in xm.index]

In [4]:
#
# Read filtered features
#

mfilt = pd.read_csv(
    r"S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Data\Analysis\00-FeatureSelection\ALDH4\mfilt.tsv",
    sep='\t', index_col=0
)

In [5]:
#
# Adducts
#

posAdd = ["M+H", "M+2H", "M+Na", "M+K", "M+H-H2O", "M+H+HCOONa", "M+NH4"]
negAdd = ["M-H", "M-2H", "M+Cl", "M-H-H20", "M-H+HCOONa", "M+Na-2H"]

In [6]:
mz = f2i.cp.loc[
    mfilt.index[np.isin(mfilt.index, f2i.cp.index)],
    'Apex m/z'
].tolist()

n=5
mz = [mz[i:i+n] for i in range(0, len(mz), n)]

In [7]:
cmm = DotMap()

uri = "http://ceumass.eps.uspceu.es/mediator/api/v3/batch"

# C18P
res_all = []

for mzi in tqdm(mz):
    res = requests.post(
        uri, headers={'Content-Type': 'application/json; charset=utf-8'}, 
        json={
        "metabolites_type": "all-except-peptides",
        "databases": ["all-except-mine"],
        "masses_mode": "mz",
        "ion_mode": "positive",
        "adducts": posAdd,
        "tolerance": 10.0,
        "tolerance_mode": "ppm",
        "masses": mzi
        }
    )
    res_all.extend(res.json()['results'])
    time.sleep(2)

cmm.cp = pd.DataFrame(res_all)

100%|██████████| 137/137 [05:16<00:00,  2.31s/it]


In [8]:
mz = f2i.cn.loc[
    mfilt.index[np.isin(mfilt.index, f2i.cn.index)],
    'Apex m/z'
].tolist()

n = 2 # chunk size
mz = [mz[i:i+n] for i in range(0, len(mz), n)]

In [9]:
# C18N
res_all = []

for mzi in tqdm(mz):
    res = requests.post(
        uri, headers={'Content-Type': 'application/json; charset=utf-8'}, 
        json={
        "metabolites_type": "all-except-peptides",
        "databases": ["all-except-mine"],
        "masses_mode": "mz",
        "ion_mode": "negative",
        "adducts": ['all'],
        "tolerance": 10.0,
        "tolerance_mode": "ppm",
        "masses": mzi
        }
    )
    res_all.extend(res.json()['results'])
    time.sleep(2)

cmm.cn = pd.DataFrame(res_all)

100%|██████████| 154/154 [05:42<00:00,  2.22s/it]


In [10]:
cmmF = DotMap()
cmmF.cp = cmm.cp[np.isin(cmm.cp.adduct, posAdd)]
cmmF.cn = cmm.cn[np.isin(cmm.cn.adduct, negAdd)]

In [11]:
# Adapt columns to TP

for i in modes: 
    cmmF[i] = cmmF[i].rename(columns={
        'identifier': 'Identifier',
        'EM': 'Experimental mass',
        'adduct': 'Adduct',
        'error_ppm': 'mz Error (ppm)',
        'molecular_weight': 'Molecular Weight',
        'name': 'Name',
        'formula': 'Formula'
    })

In [12]:
# Identifiers to be removed
remid = [188282]

In [13]:
# Write MS_Table.tsv
for i in modes:
    cmmF[i][~np.isin(cmmF[i].Identifier, remid)].to_csv(f'CMM_{i}.tsv', sep='\t', index=False)

In [None]:
# Write Feature_Info.tsv
for i in modes:
    f2i[i].loc[
        mfilt.index[np.isin(mfilt.index, f2i[i].index)],
        ['Apex m/z', 'RT [min]']
    ].join(xm.T, how='inner')\
            .reset_index(names='FeatureInfo_Name')\
                .to_csv(f'FInfo_{i}.tsv', sep='\t', index=False)
