In [61]:
import pandas as pd
import numpy as np
from collections import defaultdict
import ms_pred.magma.fragmentation as fe
from ms_pred.common.plot_utils import *
from ms_pred.dag_pred import joint_model
set_style()
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [62]:
test_smiles = "O=C(O)C=1C(=O)C(O)(CC(=O)C1N)C2OC(COC(=O)C)C(OC(=O)C(N=CS)=CC)C(OC3OC(C)C(O)C(OC)C3)C2O"
test_ionization = "[M+H]+"
inten_ckpt = f"../quickstart/iceberg/models/nist_iceberg_score.ckpt"
gen_ckpt = f"../quickstart/iceberg/models/nist_iceberg_generate.ckpt"

# Load joint model
model = joint_model.JointModel.from_checkpoints(
    inten_checkpoint=inten_ckpt, gen_checkpoint=gen_ckpt
)
outputs = model.predict_mol(
    smi=test_smiles,
    adduct=test_ionization,
    device="cpu",
    max_nodes=100,
    binned_out=False,
    threshold=0,
)
root_inchi = outputs["root_inchi"]
frags = outputs["frags"]

# Generate a fragmentation engine
engine = fe.FragmentEngine(mol_str=root_inchi, mol_str_type="inchi")
# Convert from frags dict into a list of mz, inten
mass_to_obj = defaultdict(lambda: {})
for k, val in frags.items():
    masses, intens, form  = val["mz_charge"], val["intens"], val["form"]
    for m, i in zip(masses, intens):
        if i <= 0:
            continue
        cur_obj = mass_to_obj[m]
        if cur_obj.get("inten", 0) > 0:
            # update
            if cur_obj.get("inten") < i:
                cur_obj["frag_hash"] = k
                cur_obj['form'] = form
            cur_obj["inten"] += i
        else:
            cur_obj["inten"] = i
            cur_obj["frag_hash"] = k
            cur_obj['form'] = form

max_inten = max(*[i["inten"] for i in mass_to_obj.values()], 1e-9)
mass_to_obj = {
    k: dict(inten=v["inten"] / max_inten, frag_hash=v["frag_hash"], 
            form=v['form'])
            
    for k, v in mass_to_obj.items()
}
# Ordenar los fragmentos por intensidad en orden descendente y seleccionar los 10 primeros
top_10_fragments = sorted(mass_to_obj.items(), key=lambda x: x[1]["inten"], reverse=True)[:10]

# Imprimir los 10 fragmentos más intensos
for mz, fragment_info in top_10_fragments:
    inten = fragment_info['inten']
    frag_hash = fragment_info['frag_hash']
    form = fragment_info['form']
    print(f"MZ: {mz}, Intensidad: {inten}, Fragmento Hash: {frag_hash}, Fórmula: {form}")
    # Obtener los MZ e intensidades de los fragmentos en listas separadas
fragment_mz = [item[0] for item in sorted(list(mass_to_obj.items()), key=lambda x: x[1]["inten"], reverse=True)[:10]]
fragment_intensidades = [item[1]['inten'] for item in sorted(list(mass_to_obj.items()), key=lambda x: x[1]["inten"], reverse=True)[:10]]

MZ: 661.1909159040001, Intensidad: 1.0, Fragmento Hash: 1786400d125fe351277672db0d9a0b3f1a418065f5786271cdf7e5d92fea1943, Fórmula: C27N2O15SH36
MZ: 128.016461232, Intensidad: 0.9612280350148291, Fragmento Hash: 4130b7945190d699c8d6023758a69bdcb4ff9bf335358e9656e4c4f10698cbad, Fórmula: C5NOSH6
MZ: 113.059705948, Intensidad: 0.8516001011997724, Fragmento Hash: 2edbae9ce2d8641b07443af587b093d3e80b08568e0f01ba7992ec0026b7eae6, Fórmula: C6O2H10
MZ: 390.1030868799998, Intensidad: 0.703631281295138, Fragmento Hash: b4ba4a594f3002505e679f67406d0191b5b97c8c3c9baeed7ef99181fb5bdbcb, Fórmula: C15NO11H17
MZ: 95.049141264, Intensidad: 0.6561164423033794, Fragmento Hash: d6e8b6ee2bd2d5db090d546a6a7497f7a349b8e37598a76a1211d7dba3593df6, Fórmula: C6OH9
MZ: 336.07139282799983, Intensidad: 0.6229825518717302, Fragmento Hash: 9a473d060ad391304e993513d5c32040b5fd3ccd0e66230c973a0f575e6131cd, Fórmula: C15NO8H16
MZ: 370.07687213199983, Intensidad: 0.575562506835458, Fragmento Hash: d30aeb3486e457959ab970d23

In [63]:

ruta_csv = "/data/home/javier_rodriguez/ms2net/quimicaR/pruebas_Rmd/coconut_df.csv"


df = pd.read_csv(ruta_csv)


In [64]:
column_names = df.columns
print(column_names)
inten_ckpt = f"../quickstart/iceberg/models/nist_iceberg_score.ckpt"
gen_ckpt = f"../quickstart/iceberg/models/nist_iceberg_generate.ckpt"
# inten_ckpt = f"../quickstart/iceberg/models/canopus_iceberg_score.ckpt"
# gen_ckpt = f"../quickstart/iceberg/models/canopus_iceberg_generate.ckpt"

# Load joint model
model = joint_model.JointModel.from_checkpoints(
    inten_checkpoint=inten_ckpt, gen_checkpoint=gen_ckpt
)

Index(['coconut_id', 'inchi', 'inchikey', 'SMILES', 'sugar_free_smiles',
       'molecular_formula', 'molecular_weight', 'citationDOI', 'textTaxa',
       'name', 'synonyms', 'NPL_score', 'number_of_carbons',
       'number_of_nitrogens', 'number_of_oxygens', 'number_of_rings',
       'total_atom_number', 'bond_count', 'found_in_databases',
       'murko_framework', 'alogp', 'apol', 'topoPSA'],
      dtype='object')


In [65]:
df = df.drop(df.index[0])
df = df[df['molecular_weight'] <= 800]
df.columns = ['smiles' if col == 'SMILES' else col for col in df.columns]


In [66]:
print(df.loc[1, 'smiles'])


O=C(O)C=1C(=O)C(O)(CC(=O)C1N)C2OC(COC(=O)C)C(OC(=O)C(N=CS)=CC)C(OC3OC(C)C(O)C(OC)C3)C2O


In [None]:
from collections import defaultdict

def iceberg(test_smiles, test_ionization):
    try:
        inten_ckpt = f"/home/javier_rodriguez/ms2net/mspred/ms-pred/quickstart/iceberg/models/nist_iceberg_score.ckpt"
        gen_ckpt = f"/home/javier_rodriguez/ms2net/mspred/ms-pred/quickstart/iceberg/models/nist_iceberg_generate.ckpt"
        # inten_ckpt = f"../quickstart/iceberg/models/canopus_iceberg_score.ckpt"
        # gen_ckpt = f"../quickstart/iceberg/models/canopus_iceberg_generate.ckpt"
        model = joint_model.JointModel.from_checkpoints(
            inten_checkpoint=inten_ckpt, gen_checkpoint=gen_ckpt)
        outputs = model.predict_mol(
            smi=test_smiles,
            adduct=test_ionization,
            device="cpu",
            max_nodes=100,
            binned_out=False,
            threshold=0,
        )
        root_inchi = outputs["root_inchi"]
        frags = outputs["frags"]
        # Generate a fragmentation engine
        engine = fe.FragmentEngine(mol_str=root_inchi, mol_str_type="inchi")

        # Convert from frags dict into a list of mz, inten
        mass_to_obj = defaultdict(lambda: {})
        for k, val in frags.items():
            masses, intens, form  = val["mz_charge"], val["intens"], val["form"]
            for m, i in zip(masses, intens):
                if i <= 0:
                    continue
                cur_obj = mass_to_obj[m]
                if cur_obj.get("inten", 0) > 0:
                    # update
                    if cur_obj.get("inten") < i:
                        cur_obj["frag_hash"] = k
                        cur_obj['form'] = form
                    cur_obj["inten"] += i
                else:
                    cur_obj["inten"] = i
                    cur_obj["frag_hash"] = k
                    cur_obj['form'] = form

        max_inten = max([i["inten"] for i in mass_to_obj.values()] + [1e-9])
        mass_to_obj = {
            k: dict(inten=v["inten"] / max_inten, frag_hash=v["frag_hash"], 
                    form=v['form'])
                    
            for k, v in mass_to_obj.items()
        }

        # Ordenar los fragmentos por intensidad en orden descendente y seleccionar los 10 primeros
        top_10_fragments = sorted(mass_to_obj.items(), key=lambda x: x[1]["inten"], reverse=True)[:10]

        # Obtener los MZ e intensidades de los fragmentos en listas separadas
        fragment_mz = [item[0] for item in sorted(list(mass_to_obj.items()), key=lambda x: x[1]["inten"], reverse=True)[:]]
        fragment_intensidades = [item[1]['inten'] for item in sorted(list(mass_to_obj.items()), key=lambda x: x[1]["inten"], reverse=True)[:]]
        return fragment_mz, fragment_intensidades
    except Exception as e:
        # Manejo del error, por ejemplo, imprimir un mensaje y devolver 'NA'
        print(f"Error: {e}")
        return 'NA', 'NA'


In [None]:
# df['smiles'] = df['smiles'].str.replace(': ', '=')


resultados = []

for index, row in df.iterrows():
    test_smiles = row['smiles']  # Asegúrate de tener una columna "SMILES" en tu DataFrame
    # print(test_smiles)
    # test_ionization = row['Precursor_type']
    test_ionization = row['PRECURSOR_TYPE']

    # Inicializa variables para los resultados
    fragment_mz = None
    fragment_intensidades = None

    # Verificar si test_smiles es una cadena antes de llamar a iceberg
    if isinstance(test_smiles, str):
        try:
            fragment_mz, fragment_intensidades = iceberg(test_smiles, test_ionization)
        except KeyError as e:
            # Manejar la excepción (puedes imprimir un mensaje, omitir la fila, etc.)
            print(f"Error: {e}")

    # Agregar los resultados al DataFrame resultados_df
    resultados.append({
        'SMILES': test_smiles,
        'FragmentMZ': fragment_mz,
        'FragmentIntensidades': fragment_intensidades
    })

# Convierte la lista de resultados en un nuevo DataFrame
resultados_df = pd.DataFrame(resultados)

# Añadir la columna de resultados al DataFrame original 'df'
df['FragmentMZ'] = resultados_df['FragmentMZ']
df['FragmentIntensidades'] = resultados_df['FragmentIntensidades']

In [None]:
sddg

In [67]:
def iceberg(test_smiles, test_ionization):
    inten_ckpt = f"/home/javier_rodriguez/ms2net/mspred/ms-pred/quickstart/iceberg/models/nist_iceberg_score.ckpt"
    gen_ckpt = f"/home/javier_rodriguez/ms2net/mspred/ms-pred/quickstart/iceberg/models/nist_iceberg_generate.ckpt"
    # inten_ckpt = f"../quickstart/iceberg/models/canopus_iceberg_score.ckpt"
    # gen_ckpt = f"../quickstart/iceberg/models/canopus_iceberg_generate.ckpt"
    model = joint_model.JointModel.from_checkpoints(
        inten_checkpoint=inten_ckpt, gen_checkpoint=gen_ckpt)
    outputs = model.predict_mol(
    smi=test_smiles,
    adduct=test_ionization,
    device="cpu",
    max_nodes=100,
    binned_out=False,
    threshold=0,
    )
    root_inchi = outputs["root_inchi"]
    frags = outputs["frags"]
    # Generate a fragmentation engine
    engine = fe.FragmentEngine(mol_str=root_inchi, mol_str_type="inchi")

    # Convert from frags dict into a list of mz, inten
    mass_to_obj = defaultdict(lambda: {})
    for k, val in frags.items():
        masses, intens, form  = val["mz_charge"], val["intens"], val["form"]
        for m, i in zip(masses, intens):
            if i <= 0:
                continue
            cur_obj = mass_to_obj[m]
            if cur_obj.get("inten", 0) > 0:
                # update
                if cur_obj.get("inten") < i:
                    cur_obj["frag_hash"] = k
                    cur_obj['form'] = form
                cur_obj["inten"] += i
            else:
                cur_obj["inten"] = i
                cur_obj["frag_hash"] = k
                cur_obj['form'] = form

    max_inten = max(*[i["inten"] for i in mass_to_obj.values()], 1e-9)
    mass_to_obj = {
        k: dict(inten=v["inten"] / max_inten, frag_hash=v["frag_hash"], 
                form=v['form'])
                
        for k, v in mass_to_obj.items()
    }

    # Ordenar los fragmentos por intensidad en orden descendente y seleccionar los 10 primeros
    top_10_fragments = sorted(mass_to_obj.items(), key=lambda x: x[1]["inten"], reverse=True)[:10]


    # Obtener los MZ e intensidades de los fragmentos en listas separadas
    fragment_mz = [item[0] for item in sorted(list(mass_to_obj.items()), key=lambda x: x[1]["inten"], reverse=True)[:]]
    fragment_intensidades = [item[1]['inten'] for item in sorted(list(mass_to_obj.items()), key=lambda x: x[1]["inten"], reverse=True)[:]]
    return fragment_mz, fragment_intensidades


In [69]:
from collections import defaultdict

def iceberg(test_smiles, test_ionization):
    try:
        # inten_ckpt = f"/home/javier_rodriguez/ms2net/mspred/ms-pred/quickstart/iceberg/models/nist_iceberg_score.ckpt"
        # gen_ckpt = f"/home/javier_rodriguez/ms2net/mspred/ms-pred/quickstart/iceberg/models/nist_iceberg_generate.ckpt"
        inten_ckpt = f"../quickstart/iceberg/models/canopus_iceberg_score.ckpt"
        gen_ckpt = f"../quickstart/iceberg/models/canopus_iceberg_generate.ckpt"
        model = joint_model.JointModel.from_checkpoints(
            inten_checkpoint=inten_ckpt, gen_checkpoint=gen_ckpt)
        outputs = model.predict_mol(
            smi=test_smiles,
            adduct=test_ionization,
            device="cpu",
            max_nodes=100,
            binned_out=False,
            threshold=0,
        )
        root_inchi = outputs["root_inchi"]
        frags = outputs["frags"]
    except Exception as e:
        # Manejo del error, por ejemplo, imprimir un mensaje y asignar valores predeterminados
        print(f"Error: {e}")
        root_inchi = "Default_Root_Inchi"
        frags = {"Default_Frags": {"mz_charge": [0], "intens": [0], "form": ["Default_Form"]}}

    # Procesamiento de Resultados
    mass_to_obj = defaultdict(lambda: {})
    for k, val in frags.items():
        masses, intens, form = val["mz_charge"], val["intens"], val["form"]
        for m, i in zip(masses, intens):
            if i <= 0:
                continue
            cur_obj = mass_to_obj[m]
            if cur_obj.get("inten", 0) > 0:
                # Actualizar
                if cur_obj.get("inten") < i:
                    cur_obj["frag_hash"] = k
                    cur_obj['form'] = form
                cur_obj["inten"] += i
            else:
                cur_obj["inten"] = i
                cur_obj["frag_hash"] = k
                cur_obj['form'] = form


    print
    max_inten = max(*[i["inten"] for i in mass_to_obj.values()], 1e-9)
    mass_to_obj = {
        k: dict(inten=v["inten"] / max_inten, frag_hash=v["frag_hash"],
                form=v['form'])

        for k, v in mass_to_obj.items()
    }

    # Ordenar los fragmentos por intensidad en orden descendente y seleccionar los 10 primeros
    top_10_fragments = sorted(mass_to_obj.items(), key=lambda x: x[1]["inten"], reverse=True)[:10]

    # Obtener los MZ e intensidades de los fragmentos en listas separadas
    fragment_mz = [item[0] for item in sorted(list(mass_to_obj.items()), key=lambda x: x[1]["inten"], reverse=True)[:]]
    fragment_intensidades = [item[1]['inten'] for item in sorted(list(mass_to_obj.items()), key=lambda x: x[1]["inten"], reverse=True)[:]]
    
    return fragment_mz, fragment_intensidades


In [71]:

resultados = []

for index, row in df.iterrows():
    test_smiles = row['smiles']  # Asegúrate de tener una columna "SMILES" en tu DataFrame
    # print(test_smiles)
    # test_ionization = row['Precursor_type']
    test_ionization = "[M+H]+"

    # Inicializa variables para los resultados
    
    fragment_mz = None
    fragment_intensidades = None

    # Verificar si test_smiles es una cadena antes de llamar a iceberg
    if isinstance(test_smiles, str):
        try:
            fragment_mz, fragment_intensidades = iceberg(test_smiles, test_ionization)
        except KeyError as e:
            # Manejar la excepción (puedes imprimir un mensaje, omitir la fila, etc.)
            # print(f"Error: {e}")
            print(f"Error al procesar SMILES '{test_smiles}': {e}")
            continue 

    # Agregar los resultados al DataFrame resultados_df
    resultados.append({
        'SMILES': test_smiles,
        'FragmentMZ': fragment_mz,
        'FragmentIntensidades': fragment_intensidades
    })

# Convierte la lista de resultados en un nuevo DataFrame
resultados_df = pd.DataFrame(resultados)

# Añadir la columna de resultados al DataFrame original 'df'
df['FragmentMZ'] = resultados_df['FragmentMZ']
df['FragmentIntensidades'] = resultados_df['FragmentIntensidades']


Omitted undefined stereo
mitted undefined stereo
efined stereo
itted undefined stereo
tted undefined stereo
ted undefined stereo
ned stereo

 Omitted undefined stereo
ed undefined stereo
ed stereo
Omitted undefined stereo
d undefined stereo
 undefined stereo
undefined stereo
ndefined stereo
defined stereo
efined stereo
G: Omitted undefined stereo
 undefined stereo
 stereo
undefined stereo
stereo
fined stereo
ed stereo
d stereo
mitted undefined stereo
efined stereo
 stereo
 stereo
ARNING: Omitted undefined stereo
itted undefined stereo
fined stereo


Error: 'FragmentEngine' object has no attribute 'max_tree_depth'


[14:35:50] Explicit valence for atom # 3 B, 4, is greater than permitted
RDKit ERROR: [14:35:50] Explicit valence for atom # 3 B, 4, is greater than permitted
[14:35:50] Explicit valence for atom # 3 B, 4, is greater than permitted


TypeError: 'float' object is not iterable

In [54]:
import pandas as pd

# Asumiendo que tienes una función iceberg definida previamente
def iceberg(smiles, ionization):
    # Tu lógica para calcular fragmentos aquí
    pass

# df_prueba = df.head(10)
df = df[df['molecular_weight'] <= 800]
df_prueba = df.copy()  # Usar copy() para crear una copia del DataFrame
resultados = []
precursor_types = ["[M+H]+", "[M+Na]+", "[M+K]+", "[M+H-H2O]+", "[M+NH4]+", "[M+H-2H2O]+"]

for index, row in df_prueba.iterrows():
    test_smiles = row['SMILES']  # Asegúrate de tener una columna "SMILES" en tu DataFrame

    test_ionization = "[M+H]+"

    # Inicializa variables para los resultados
    fragment_mz = None
    fragment_intensidades = None

    # Verificar si test_smiles es una cadena antes de llamar a iceberg
    if isinstance(test_smiles, str):
        try:
            fragment_mz, fragment_intensidades = iceberg(test_smiles, test_ionization)
        except Exception as e:
            # Manejar la excepción (puedes imprimir un mensaje, omitir la fila, etc.)
            print(f"Error al procesar SMILES '{test_smiles}': {e}")
            continue  # Continuar con la siguiente iteración

    # Agregar los resultados al DataFrame resultados_df
    resultados.append({
        'SMILES': test_smiles,
        'FragmentMZ': fragment_mz,
        'FragmentIntensidades': fragment_intensidades
    })

# Convierte la lista de resultados en un nuevo DataFrame
resultados_df = pd.DataFrame(resultados)

# Añadir la columna de resultados al DataFrame original 'df'
df_prueba['FragmentMZ'] = resultados_df['FragmentMZ']
df_prueba['FragmentIntensidades'] = resultados_df['FragmentIntensidades']
df_prueba['PRECURSOR_TYPE'] = "[M+H]+"


KeyError: 'SMILES'

In [37]:
# df_prueba = df.head(10)
df = df[df['molecular_weight'] <= 1500]
df_prueba = df
resultados = []
precursor_types = ["[M+H]+", "[M+Na]+", "[M+K]+", "[M+H-H2O]+", "[M+NH4]+", "[M+H-2H2O]+"]

for index, row in df_prueba.iterrows():
    test_smiles = row['SMILES']  # Asegúrate de tener una columna "SMILES" en tu DataFrame

    test_ionization = "[M+H]+"

    # Inicializa variables para los resultados
    fragment_mz = None
    fragment_intensidades = None

    # Verificar si test_smiles es una cadena antes de llamar a iceberg
    if isinstance(test_smiles, str):
        try:
            fragment_mz, fragment_intensidades = iceberg(test_smiles, test_ionization)
        except KeyError as e:
            # Manejar la excepción (puedes imprimir un mensaje, omitir la fila, etc.)
            print(f"Error: {e}")

    # Agregar los resultados al DataFrame resultados_df
    resultados.append({
        'SMILES': test_smiles,
        'FragmentMZ': fragment_mz,
        'FragmentIntensidades': fragment_intensidades
    })

# Convierte la lista de resultados en un nuevo DataFrame
resultados_df = pd.DataFrame(resultados)

# Añadir la columna de resultados al DataFrame original 'df'
df_prueba['FragmentMZ'] = resultados_df['FragmentMZ']
df_prueba['FragmentIntensidades'] = resultados_df['FragmentIntensidades']
df_prueba['PRECURSOR_TYPE']="[M+H]+"




ARNING: Omitted undefined stereo
RNING: Omitted undefined stereo
NING: Omitted undefined stereo
ING: Omitted undefined stereo
NG: Omitted undefined stereo
G: Omitted undefined stereo
 undefined stereo
undefined stereo
stereo
 Omitted undefined stereo
Omitted undefined stereo
mitted undefined stereo
ARNING: Omitted undefined stereo
itted undefined stereo
stereo
o
ned stereo

ed stereo
d stereo
 stereo
ARNING: Omitted undefined stereo
stereo
tereo
NING: Omitted undefined stereo
ted undefined stereo
ereo
NG: Omitted undefined stereo
d undefined stereo
 Omitted undefined stereo
ndefined stereo
Omitted undefined stereo
defined stereo
mitted undefined stereo
efined stereo
mitted undefined stereo
efined stereo
itted undefined stereo
fined stereo
[12:05:37] Explicit valence for atom # 3 B, 4, is greater than permitted
RDKit ERROR: [12:05:37] Explicit valence for atom # 3 B, 4, is greater than permitted
[12:05:37] Explicit valence for atom # 3 B, 4, is greater than permitted


AttributeError: 'FragmentEngine' object has no attribute 'max_tree_depth'

In [None]:
df_prueba.to_csv('coconut_MH.csv', index=False)

In [18]:
import pandas as pd
from collections import defaultdict

# Lista de aductos o precursor types
precursor_types = ["[M+H]+", "[M+Na]+", "[M+K]+", "[M+H-H2O]+", "[M+NH4]+", "[M+H-2H2O]+"]

# Inicializa listas para las masas de aductos y masa del precursor
masas_aductos = {aducto: [] for aducto in precursor_types}
masa_precursor = []

# Calcula las masas de aductos y la masa del precursor para cada molécula
for index, row in df.iterrows():
    test_smiles = row['SMILES']

    # Llama a la función iceberg para cada tipo de aducto o precursor type
    for precursor_type in precursor_types:
        try:
            fragment_mz, fragment_intensidades = iceberg(test_smiles, precursor_type)
        except KeyError as e:
            # Manejar la excepción (puedes imprimir un mensaje, omitir la fila, etc.)
            print(f"Error: {e}")
            fragment_mz, fragment_intensidades = None, None

        # Añade las masas de fragmentos y la masa del precursor a las listas
        masas_aductos[precursor_type].append(fragment_mz[0] if fragment_mz else None)

# Añade las columnas al DataFrame original
for precursor_type in precursor_types:
    df[f'Masa{precursor_type}'] = masas_aductos[precursor_type]

# Guarda el DataFrame con las nuevas columnas
df.to_csv('datos_actualizados_con_masas.csv', index=False)




defined stereo
ereo
efined stereo
reo
fined stereo
eo
ined stereo
o
ned stereo

ed stereo
d stereo
 stereo
stereo
RNING: Omitted undefined stereo
tereo
NING: Omitted undefined stereo
ted undefined stereo
ned stereo
ereo
ed undefined stereo
ed stereo
reo
eo
o

ARNING: Omitted undefined stereo
itted undefined stereo
RNING: Omitted undefined stereo
NING: Omitted undefined stereo
ING: Omitted undefined stereo
NG: Omitted undefined stereo
G: Omitted undefined stereo
: Omitted undefined stereo
 Omitted undefined stereo
Omitted undefined stereo
mitted undefined stereo
ARNING: Omitted undefined stereo
itted undefined stereo
RNING: Omitted undefined stereo
tted undefined stereo
NING: Omitted undefined stereo
ted undefined stereo
ING: Omitted undefined stereo
ed undefined stereo
NG: Omitted undefined stereo
d undefined stereo
G: Omitted undefined stereo
 undefined stereo
: Omitted undefined stereo
undefined stereo
 Omitted undefined stereo
ndefined stereo
Omitted undefined stereo
defined stereo


KeyboardInterrupt: 

In [21]:
import pandas as pd

# Obtén las primeras 10 filas del DataFrame
df_prueba = df.head(10)

# Lista de aductos o precursor types
precursor_types = ["[M+H]+", "[M+Na]+", "[M+K]+", "[M+H-H2O]+", "[M+NH4]+", "[M+H-2H2O]+"]

resultados = []

for index, row in df_prueba.iterrows():
    test_smiles = row['SMILES']

    # Inicializa listas para las masas de aductos y masa del precursor
    masas_aductos = {aducto: [] for aducto in precursor_types}
    
    # Llama a la función iceberg para cada tipo de aducto o precursor type
    for precursor_type in precursor_types:
        try:
            fragment_mz, fragment_intensidades = iceberg(test_smiles, precursor_type)
        except KeyError as e:
            # Manejar la excepción (puedes imprimir un mensaje, omitir la fila, etc.)
            print(f"Error: {e}")
            fragment_mz, fragment_intensidades = None, None

        # Añade las masas de fragmentos y la masa del precursor a las listas
        masas_aductos[precursor_type] = fragment_mz if fragment_mz else None

    # Agregar los resultados al DataFrame resultados_df
    resultados.append({
        'SMILES': test_smiles,
        'Masa[M+H]+': masas_aductos["[M+H]+"],
        'Masa[M+Na]+': masas_aductos["[M+Na]+"],
        'Masa[M+K]+': masas_aductos["[M+K]+"],
        'Masa[M+H-H2O]+': masas_aductos["[M+H-H2O]+"],
        'Masa[M+NH4]+': masas_aductos["[M+NH4]+"],
        'Masa[M+H-2H2O]+': masas_aductos["[M+H-2H2O]+"],
    })

# Convierte la lista de resultados en un nuevo DataFrame
resultados_df = pd.DataFrame(resultados)

# Añadir la columna de resultados al DataFrame original 'df_prueba'
df_prueba = pd.concat([df_prueba, resultados_df], axis=1)

# Guarda el DataFrame con las nuevas columnas
df_prueba.to_csv('datos_prueba_con_masas.csv', index=False)


: Omitted undefined stereo
 Omitted undefined stereo
[09:12:28]

In [27]:
df_prueba = df.head(10)
resultados = []
precursor_types = ["[M+H]+", "[M+Na]+", "[M+K]+", "[M+H-H2O]+", "[M+NH4]+", "[M+H-2H2O]+"]

for index, row in df_prueba.iterrows():
    test_smiles = row['SMILES']  # Asegúrate de tener una columna "SMILES" en tu DataFrame
    # print(test_smiles)
    # test_ionization = row['Precursor_type']
        


    test_ionization = "[M+H]+"

    # Inicializa variables para los resultados
    fragment_mz = None
    fragment_intensidades = None

    # Verificar si test_smiles es una cadena antes de llamar a iceberg
    if isinstance(test_smiles, str):
        try:
            fragment_mz, fragment_intensidades = iceberg(test_smiles, test_ionization)
        except KeyError as e:
            # Manejar la excepción (puedes imprimir un mensaje, omitir la fila, etc.)
            print(f"Error: {e}")

    # Agregar los resultados al DataFrame resultados_df
    resultados.append({
        'SMILES': test_smiles,
        'FragmentMZ': fragment_mz,
        'FragmentIntensidades': fragment_intensidades
    })

# Convierte la lista de resultados en un nuevo DataFrame
resultados_df = pd.DataFrame(resultados)

# Añadir la columna de resultados al DataFrame original 'df'
df_prueba['FragmentMZ'] = resultados_df['FragmentMZ']
df_prueba['FragmentIntensidades'] = resultados_df['FragmentIntensidades']
df_prueba['PRECURSOR_TYPE']="[M+H]+"
df_prueba['PRECURSOR_TYPE']="[M+H]+"
1.00728 
df['molecular_weight']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_prueba['FragmentMZ'] = resultados_df['FragmentMZ']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_prueba['FragmentIntensidades'] = resultados_df['FragmentIntensidades']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_prueba['PRECURSOR_TYPE']="[M+H]+"


In [25]:
import pandas as pd

# Obtén las primeras 10 filas del DataFrame
df_prueba = df.head(10)

# Lista de aductos o precursor types
precursor_types = ["[M+H]+", "[M+Na]+", "[M+K]+", "[M+H-H2O]+", "[M+NH4]+", "[M+H-2H2O]+"]

# Añade las columnas para los resultados
df_prueba['FragmentMZ'] = None
df_prueba['FragmentIntensidades'] = None
df_prueba['PRECURSOR_TYPE'] = None  # Nueva columna para el precursor type

# Itera sobre las filas y los aductos
for index, row in df_prueba.iterrows():
    test_smiles = row['SMILES']
    
    # Itera sobre los aductos
    for precursor_type in precursor_types:
        try:
            fragment_mz, fragment_intensidades = iceberg(test_smiles, [precursor_type])
        except KeyError as e:
            # Manejar la excepción (puedes imprimir un mensaje, omitir la fila, etc.)
            print(f"Error: {e}")
            fragment_mz, fragment_intensidades = None, None

        # Actualiza las columnas con los resultados
        df_prueba.at[index, 'FragmentMZ'] = fragment_mz
        df_prueba.at[index, 'FragmentIntensidades'] = fragment_intensidades
        df_prueba.at[index, 'PRECURSOR_TYPE'] = precursor_type

# Guarda el DataFrame con las nuevas columnas
df_prueba.to_csv('datos_prueba_con_masas.csv', index=False)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_prueba['FragmentMZ'] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_prueba['FragmentIntensidades'] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_prueba['PRECURSOR_TYPE'] = None  # Nueva columna para el precursor type


TypeError: unhashable type: 'list'

In [28]:
# Guarda el DataFrame de prueba con las nuevas columnas en un archivo CSV
df_prueba.to_csv('resultado_prueba.csv', index=False)
