In [1]:
import glob
import json
import os
import re
import time
import wget
import urllib.parse
import argparse


import numpy as np
import pandas as pd
import pubchempy as pcp


from pybatchclassyfire import *
from pandas import json_normalize
from rdkit import Chem
from rdkit import DataStructs
from rdkit.Chem import AllChem
from rdkit.Chem import rdFMCS
from rdkit.Chem import PandasTools

import plotly.express as px

def isNaN(string):
    return string != string

INFO:rdkit:Enabling RDKit 2021.09.4 jupyter extensions


In [2]:
def sirius_postproc(ms1data, sirius_candidate_json, file_id, db):
    
    msp = pd.read_csv(ms1data)
    msp["SIRIUSCSV"] = np.nan
    if not os.path.isdir(file_id):
        os.mkdir(file_id)
    # for each mz
    for mz, row in msp.iterrows():

        for file in sirius_candidate_json:

            if str(msp["premz"][mz]) in file:
                #print(file)
                json_dirALL = next(os.walk(file))[1]

                if len(json_dirALL) == 1:
                    #print(json_dirALL)


                    # structure file path

                    sub_sub_dirALL_structure_can = (file
                        + "/"
                        + json_dirALL[0]
                        + "/structure_candidates.tsv"
                    )
                    #print(sub_sub_dirALL_structure_can)
                    # formula file path
                    sub_sub_dirALL_formula_can = (file
                        + "/"
                        + json_dirALL[0]
                        + "/formula_candidates.tsv"
                    )
                    #print(sub_sub_dirALL_formula_can)
                    # class file path
                    ALL_Canopus_csv = file + "/canopus_compound_summary.tsv"
                    #print(ALL_Canopus_csv)
                    # if both structure files exist
                    if (
                        os.path.exists(sub_sub_dirALL_structure_can)
                        and len(pd.read_csv(sub_sub_dirALL_structure_can, sep="\t")) > 0
                    ):
                        if (
                             os.path.exists(sub_sub_dirALL_formula_can)
                            and len(pd.read_csv(sub_sub_dirALL_formula_can, sep="\t"))
                            > 0
                        ):
                            ALL_structure_csv = pd.read_csv(
                                sub_sub_dirALL_structure_can, sep="\t"
                            )
                            ALL_formula_csv = pd.read_csv(
                                sub_sub_dirALL_formula_can, sep="\t"
                            )
                            ALL_Canopus = pd.read_csv(ALL_Canopus_csv, sep="\t")

                            # Add the structure and formula files together
                            for structure, rows in ALL_structure_csv.iterrows():
                                for formula, rows in ALL_formula_csv.iterrows():
                                    if (
                                        ALL_structure_csv["formulaRank"][structure]
                                        == ALL_formula_csv["rank"][formula]
                                    ):
                                        ALL_structure_csv.loc[
                                            structure, "SiriusScore"
                                        ] = ALL_formula_csv["SiriusScore"][formula]
                                        ALL_structure_csv.loc[
                                            structure, "numExplainedPeaks"
                                        ] = ALL_formula_csv["numExplainedPeaks"][
                                            formula
                                        ]
                                        ALL_structure_csv.loc[
                                            structure, "explainedIntensity"
                                        ] = ALL_formula_csv["explainedIntensity"][
                                            formula
                                        ]
                                        # ALL_structure_csv.loc[structure, "SuspectListEntry"] = "FALSE"
                                        if len(ALL_Canopus) > 0:
                                            if (
                                                ALL_formula_csv["molecularFormula"][
                                                    formula
                                                ]
                                                == ALL_Canopus["molecularFormula"][0]
                                            ):                                            
                                                ALL_structure_csv.loc[
                                                    structure, "subclass"
                                                ] = ALL_Canopus["ClassyFire#subclass"][0]
                                                ALL_structure_csv.loc[
                                                    structure, "class"
                                                ] = ALL_Canopus["ClassyFire#class"][0]
                                                ALL_structure_csv.loc[
                                                    structure, "superclass"
                                                ] = ALL_Canopus["ClassyFire#superclass"][0]
                                                ALL_structure_csv.loc[
                                                    structure, "all_classification"
                                                ] = ALL_Canopus["ClassyFire#all classifications"][0]

                            result_sirius_name = (
                                file_id + "/"
                                + db
                                + "_results_for_"
                                + json_dirALL[0].split("_")[-1]
                                + "_"
                                + "structure.csv"
                            )
                            msp.loc[mz, "SIRIUSCSV"] = result_sirius_name

                            ALL_structure_csv.to_csv(result_sirius_name)
                    elif not (
                        os.path.exists(sub_sub_dirALL_structure_can)
                        and len(pd.read_csv(sub_sub_dirALL_structure_can, sep="\t")) == 0
                    ):
                        if (
                            os.path.exists(sub_sub_dirALL_formula_can)
                            and len(pd.read_csv(sub_sub_dirALL_formula_can, sep="\t"))
                            > 0
                        ):
                            ALL_formula_csv = pd.read_csv(
                                sub_sub_dirALL_formula_can, sep="\t"
                            )
                            ALL_Canopus = pd.read_csv(ALL_Canopus_csv, sep="\t")
                            if len(ALL_Canopus) > 0:
                                for formula, rows in ALL_formula_csv.iterrows():
                                    if (
                                        ALL_formula_csv["molecularFormula"][
                                            formula
                                        ]
                                        == ALL_Canopus["molecularFormula"][0]
                                    ):                                            
                                        ALL_structure_csv.loc[
                                            structure, "subclass"
                                        ] = ALL_Canopus["ClassyFire#subclass"][0]
                                        ALL_structure_csv.loc[
                                            structure, "class"
                                        ] = ALL_Canopus["ClassyFire#class"][0]
                                        ALL_structure_csv.loc[
                                            structure, "superclass"
                                        ] = ALL_Canopus["ClassyFire#superclass"][0]
                                        ALL_structure_csv.loc[
                                            structure, "all_classification"
                                        ] = ALL_Canopus["ClassyFire#all classifications"][0]
                            for for_siriusA, row in ALL_formula_csv.iterrows():
                                if (
                                    not ALL_formula_csv["explainedIntensity"][
                                        for_siriusA
                                    ]
                                    >= 0.70
                                ):
                                    ALL_formula_csv = ALL_formula_csv.drop(
                                        for_siriusA, inplace=False
                                    )
                            result_sirius_name = (
                                file_id + "/"
                                + db
                                + "_results_for_"
                                + json_dirALL[0].split("_")[-1]
                                + "_"
                                + "formula.csv"
                            )
                            msp.loc[mz, "SIRIUSCSV"] = result_sirius_name

                            ALL_formula_csv.to_csv(
                                result_sirius_name
                            )

                        else:
                            print("no file for formula")
                    else:
                        print("no file for structure or formula")
    msp.to_csv(file_id + "_" + os.path.basename(ms1data))  
    return msp

In [3]:
def chemMN_CandidateSelection(df, tn_sim=0.85):

    """chemMN_CandidateSelection function is used to generate a Cytoscape readable tsv file.
    This file contains start(starting SMILES) and end(target SMILES) nodes and the tanimoto
    similarity scores between the nodes. User can visualize the structural similarity
    between the given SMILES. It provides an "ALL against ALL" network.

    Parameters:
    df: dataframe that contains "SMILES", "ranks", "Source". This function is specifically for
    candidate selection and so these columns are necessary.


    Returns:
    dataframe: it returns a df with following columns to be loaded into Cytoscape.
    1. Start, starting node/SMILES
    2. End, ending node/SMILES
    3. Tanimoto, Tanimoto between Start and End node
    4. Start_SMILES
    5. End_SMILES
    6. Start_Source
    7. End_Source
    8. MCSS, Maximum Common Substructure between start and end node/SMILES
    9. sorted_row, contains ids of the start and end nodes as a list


    Usage:
    chemMN_CandidateSelection(df)

    """

    # define an empty variable
    # one_df = []
    # define empty variable to save the edges
    dbn = []
    # for each entry in the df
    for i, row in df.iterrows():
        # to compare each element with each other element of the df
        for j, row in df.iterrows():
            try:
                # calcultae tanimoto
                ms = [
                    Chem.MolFromSmiles(df["SMILES"][i]),
                    Chem.MolFromSmiles(df["SMILES"][j]),
                ]
                fps = [
                    AllChem.GetMorganFingerprintAsBitVect(x, 2, nBits=2048) for x in ms
                ]
                tn = DataStructs.FingerprintSimilarity(fps[0], fps[1])
                if tn>= tn_sim:
                    if df["SMILES"][i]!= df["SMILES"][j]:
                        # save all entries to a matrix
                        dbn.append(
                            {
                                "Name_i": df["ranks"][i],
                                "Name_j": df["ranks"][j],
                                "i": df["SMILES"][i],
                                "j": df["SMILES"][j],
                                "Source_i": df["Source"][i],
                                "Source_j": df["Source"][j],
                                "Tanimoto": tn,
                            }
                        )

            except Exception:
                # print(e.string)
                pass

    # save chemical similarities
    db_edgenode = pd.DataFrame(dbn)
    
    if not db_edgenode.empty:

        # another empty variable to store the results for final tsv file
        dfe = []

        # heavy atoms for MCSS Calculation
        heavy_atoms = ["C", "N", "P", "O", "S"]

        # for the previous dataframe
        for i, row in db_edgenode.iterrows():
            # if the tanimoto > 0.85 for high similarity
            if db_edgenode["Tanimoto"][i] >= tn_sim:

                # calculate MCSS
                n = [
                    Chem.MolFromSmiles(db_edgenode["i"][i]),
                    Chem.MolFromSmiles(db_edgenode["j"][i]),
                ]
                res = rdFMCS.FindMCS(n, timeout=60)
                sm_res = Chem.MolToSmiles(Chem.MolFromSmarts(res.smartsString))

                # Check if the MCSS has one of the heavy atoms and whether they are
                # more than 3
                elem = [ele for ele in heavy_atoms if (ele in sm_res)]
                if elem and len(sm_res) >= 3:
                    MCSS_SMILES = Chem.MolToSmiles(Chem.MolFromSmarts(res.smartsString))

                # save everything into a dataframe
                dfe.append(
                    {
                        "Start": db_edgenode["Name_i"][i],
                        "End": db_edgenode["Name_j"][i],
                        "Tanimoto": db_edgenode["Tanimoto"][i],
                        "Start_SMILES": db_edgenode["i"][i],
                        "End_SMILES": db_edgenode["j"][i],
                        "Start_Source": db_edgenode["Source_i"][i],
                        "End_Source": db_edgenode["Source_j"][i],
                        "MCSS": MCSS_SMILES,
                    }
                )
        df_edge = pd.DataFrame(dfe)
        # generate a column called sorted_row which contains ids of the start and end nodes as a list
        df_edge["Start"] = df_edge["Start"].astype(str)
        df_edge["End"] = df_edge["End"].astype(str)
        df_edge["sorted_row"] = [sorted([a, b]) for a, b in zip(df_edge.Start, df_edge.End)]
        df_edge["sorted_row"] = df_edge["sorted_row"].astype(str)
        df_edge.drop_duplicates(subset=["sorted_row"], inplace=True)

        return df_edge

In [4]:


def checkSMILES_validity(resultcsv):
    

    """checkSMILES_validity does exactly as the name says, using
    RDKit, whether the SMILES are invalid or have invalid
    chemistry

    Parameters:
    input_dir (str): This is the input directory where all the .mzML
    files and their respective result directories are stored.

    results: df from combine_CuratedR

    Returns:
    dataframe: with valid SMILES
    csv: "MetabolomicsResults/final_curation_with_validSMILES.csv"

    Usage:
    checkSMILES_validity(input_dir = "usr/project/", results)

    """
    results = pd.read_csv(resultcsv)
    # check validity of SMILES
    for i, row in results.iterrows():
        if not isNaN(results["SMILES"][i]):
            m = Chem.MolFromSmiles(results["SMILES"][i], sanitize=False)
            if m is None:
                results["SMILES"][i] = "invalid_SMILES"
            else:
                try:
                    Chem.SanitizeMol(m)
                except Exception:
                    results["SMILES"][i] = "invalid_chemistry"

    return results

In [5]:


# Candidate Selection with SIRIUS
def one_candidate_selection(
    df,
    Source="SGHM",
    tn_ident=0.99,
    sirius_df=None,
    mbank_df=None,
    gnps_df=None,
    hmdb_df=None,
):

    """one_candidate_selection function is used to generate a dataframe that tells,
    for each candidate SMILES, what was the source or how many sources had the same
    candidate. The idea is to merge all candidate SMILES into one list, preserving
    the rank and source, and then checking whether these SMILES come from SIRIUS or
    any spectral DB. If a SMILE is repeated in more sources, its confidence score
    increases and is considered the most likely candidate structure. This function
    is not stand-alone and is part of the function CandidateSelection_SimilarityandIdentity


    Parameters:
    df: dataframe that contains "SMILES", "ranks", "Source". This function is
    specifically for candidate selection and so these columns are necessary.
    Source: this depends on how many sources were used. Possiblilities are:
    1. SGHM (all)
    2. SGM (SIRIUS, GNPS, MassBank)
    3. SHM (SIRIUS, HMDB, MassBank)
    4. SGH (SIRIUS, GNPS, HMDB)
    5. GHM (GNPS, HMDB, MassBank)
    6. SG (SIRIUS, GNPS)
    7. SH (SIRIUS, HMDB)
    8. SM (SIRIUS, MassBank)
    9. GM (GNPS, MassBank)
    10. GH (GNPS, HMDB)
    11. HM (HMDB, MassBank)
    12. S
    13. G
    14. H
    15. M

    Returns:
    dataframe: it returns a df with follwoing columns which can be used to
    prioritize a database for the final candidate selection.
    1. Source, contains name of the source (SIRIUS, GNPS, HMDB or MassBank)
    2. ranks, contains first letter of the source and a rank number seperated
    by _ e.g: G_1(GNPS, 1st rank)
    3. SMILES
    4. SIRIUS, the rank again but only when the corresponding row SMILES is
    also part of SIRIUS results
    5. GNPS , same as SIRIUS but for GNPS
    5. MassBank
    6. HMDB


    Usage:
    chemMN_CandidateSelection(df, Source = "SGHM")

    """

    # define empty columns for each Source to only fill if the corresponding
    # SMILES is also present in the source

    df["SIRIUS"] = np.nan
    df["GNPS"] = np.nan
    df["MassBank"] = np.nan
    df["HMDB"] = np.nan

    # for each SMILES in df
    for smiles, rows in df.iterrows():

        # If the source contains SIRIUS
        if (
            Source == "SGHM"
            or Source == "SGM"
            or Source == "SGH"
            or Source == "SHM"
            or Source == "SG"
            or Source == "SM"
            or Source == "SH"
            or Source == "S"
        ):
            # sirius_df comes from within the function CandidateSelection_SimilarityandIdentity
            for sirius_i, row in sirius_df.iterrows():
                # calculate tanimoto
                try:
                    ms = [
                        Chem.MolFromSmiles(df["SMILES"][smiles]),
                        Chem.MolFromSmiles(sirius_df["smiles"][sirius_i]),
                    ]
                    fps = [
                        AllChem.GetMorganFingerprintAsBitVect(x, 2, nBits=2048)
                        for x in ms
                    ]
                    tn = DataStructs.FingerprintSimilarity(fps[0], fps[1])
                    # since we are dealing with idenity here so tanimoto of 0.99 is appropriate
                    if tn >= tn_ident:

                        # if SIRIUS is blank, add the SIRIUS id
                        if isNaN(df["SIRIUS"][smiles]):

                            df.loc[smiles, "SIRIUS"] = sirius_df["rank_ids"][sirius_i]
                        # if not empty, add SIRIUS id, with a comma
                        else:
                            df.loc[smiles, "SIRIUS"] = (
                                str(df["SIRIUS"][smiles])
                                + ", "
                                + sirius_df["rank_ids"][sirius_i]
                            )

                except Exception:
                    # print(e.string)
                    pass

        # If the Source contains GNPS
        if (
            Source == "SGHM"
            or Source == "SGM"
            or Source == "SGH"
            or Source == "GHM"
            or Source == "SG"
            or Source == "GM"
            or Source == "GH"
            or Source == "G"
        ):

            # gnps_df comes from within the function CandidateSelection_SimilarityandIdentity
            for gnps_i, row in gnps_df.iterrows():
                try:
                    # calculate tanimoto
                    ms = [
                        Chem.MolFromSmiles(df["SMILES"][smiles]),
                        Chem.MolFromSmiles(gnps_df["GNPSSMILES"][gnps_i]),
                    ]
                    fps = [
                        AllChem.GetMorganFingerprintAsBitVect(x, 2, nBits=2048)
                        for x in ms
                    ]
                    tn = DataStructs.FingerprintSimilarity(fps[0], fps[1])

                    # since we are dealing with idenity here so tanimoto of 0.99 is appropriate
                    if tn >= tn_ident:

                        # if GNPS is blank, add the GNPS id
                        if isNaN(df["GNPS"][smiles]):

                            df.loc[smiles, "GNPS"] = gnps_df["rank_ids"][gnps_i]
                        # if not empty, add GNPS id, with a comma
                        else:
                            df.loc[smiles, "GNPS"] = (
                                str(df["GNPS"][smiles])
                                + ", "
                                + gnps_df["rank_ids"][gnps_i]
                            )

                except Exception:
                    # print(e.string)
                    pass

        # If the source contains HMDB
        if (
            Source == "SGHM"
            or Source == "SGH"
            or Source == "SHM"
            or Source == "GHM"
            or Source == "SH"
            or Source == "GH"
            or Source == "HM"
            or Source == "H"
        ):
            for hmdb_i, row in hmdb_df.iterrows():
                try:
                    # calculate tanimoto
                    ms = [
                        Chem.MolFromSmiles(df["SMILES"][smiles]),
                        Chem.MolFromSmiles(hmdb_df["HMDBSMILES"][hmdb_i]),
                    ]
                    fps = [
                        AllChem.GetMorganFingerprintAsBitVect(x, 2, nBits=2048)
                        for x in ms
                    ]
                    tn = DataStructs.FingerprintSimilarity(fps[0], fps[1])

                    # since we are dealing with idenity here so tanimoto of 0.99 is appropriate
                    if tn >= tn_ident:

                        # if HMDB is blank, add the HMDB id
                        if isNaN(df["HMDB"][smiles]):

                            df.loc[smiles, "HMDB"] = hmdb_df["rank_ids"][hmdb_i]
                        # if not empty, add HMDB id, with a comma
                        else:
                            df.loc[smiles, "HMDB"] = (
                                str(df["HMDB"][smiles])
                                + ", "
                                + hmdb_df["rank_ids"][hmdb_i]
                            )

                except Exception:
                    # print(e.string)
                    pass


        # If the source contains MassBank
        if (
            Source == "SGHM"
            or Source == "SGM"
            or Source == "SHM"
            or Source == "GHM"
            or Source == "SM"
            or Source == "GM"
            or Source == "HM"
            or Source == "M"
        ):
            # mbank_df comes from within the function CandidateSelection_SimilarityandIdentity
            for mbank_i, row in mbank_df.iterrows():
                try:
                    # calculate tanimoto
                    ms = [
                        Chem.MolFromSmiles(df["SMILES"][smiles]),
                        Chem.MolFromSmiles(mbank_df["MBSMILES"][mbank_i]),
                    ]
                    fps = [
                        AllChem.GetMorganFingerprintAsBitVect(x, 2, nBits=2048)
                        for x in ms
                    ]
                    tn = DataStructs.FingerprintSimilarity(fps[0], fps[1])

                    # since we are dealing with idenity here so tanimoto of 0.99 is appropriate
                    if tn >= tn_ident:

                        # if MassBank is blank, add the MassBank id
                        if isNaN(df["MassBank"][smiles]):

                            df.loc[smiles, "MassBank"] = mbank_df["rank_ids"][mbank_i]
                        # if not empty, add MassBank id, with a comma
                        else:
                            df.loc[smiles, "MassBank"] = (
                                str(df["MassBank"][smiles])
                                + ", "
                                + mbank_df["rank_ids"][mbank_i]
                            )

                except Exception:
                    # print(e.string)
                    pass
    return df

In [6]:


def add_count_column(df_one_candidate):
    df_one_candidate = df_one_candidate.dropna(axis=0, how="all", subset = ['SIRIUS', 'GNPS', 'MassBank', 'HMDB'])
    # create new df only with the Sources column
    df = pd.DataFrame(
        {
            "SIRIUS": df_one_candidate["SIRIUS"],
            "GNPS": df_one_candidate["GNPS"],
            "MassBank": df_one_candidate["MassBank"],
            "HMDB": df_one_candidate["HMDB"],
        }
    )

    # df_one_candidate = df_one_candidate.dropna(subset=["SIRIUS", "GNPS", "HMDB", "MassBank"], how='all', inplace=True)

    index_SIRIUS = [x for x, row in df.iterrows() if not isNaN(df["SIRIUS"][x])]
    index_GNPS = [x for x, row in df.iterrows() if not isNaN(df["GNPS"][x])]
    index_MassBank = [x for x, row in df.iterrows() if not isNaN(df["MassBank"][x])]
    index_HMDB = [x for x, row in df.iterrows() if not isNaN(df["HMDB"][x])]

    # make a list of the rows
    list_of_indices = [index_SIRIUS] + [index_GNPS] + [index_MassBank] + [index_HMDB]
    length_of_list = len([idx for idx, x in enumerate(list_of_indices)  if x])
    if length_of_list == 1:
        #print("1 is correct")
        # change number of counts, since its 1 source, so all counts should be 1
        df_one_candidate["Count"] = 1
        # extract ranks numbers from ranks
        df_one_candidate["rank_num"] = [counts.split("_")[1] for counts in df_one_candidate["ranks"]]
        # convert any str to int
        df_one_candidate["rank_num"] = [int(x) for x in df_one_candidate["rank_num"]]
        #Sort by ranknum
        df_one_candidate = df_one_candidate.sort_values(
            by="rank_num", ascending=False
        )
        for r, rows in df_one_candidate.iterrows():
            if df_one_candidate["Source"][r]=="GNPS":
                df_one_candidate.loc[r, "rank_db"] = 1
            if df_one_candidate["Source"][r]=="SIRIUS":
                df_one_candidate.loc[r, "rank_db"] = 2
            if df_one_candidate["Source"][r]=="MassBank":
                df_one_candidate.loc[r, "rank_db"] = 3
            if df_one_candidate["Source"][r]=="HMDB":
                df_one_candidate.loc[r, "rank_db"] = 4
        df_one_candidate.sort_values(
            by=["rank_num", "rank_db"], ascending=[True, True],
            inplace=True)

        return df_one_candidate
    elif length_of_list > 1:
        # make a list of the rows
        list_of_indices = index_SIRIUS + index_GNPS + index_MassBank + index_HMDB
        # count how many times one of the rows is appearing and add count
        count_list = [[x, list_of_indices.count(x)] for x in set(list_of_indices)]
        # add this info to one_can
        df_one_candidate["Count"] = [count_list[x][1] for x in range(len(count_list))]
        # sort the list by count in descending order
        sorted_count_one_candidate = df_one_candidate.sort_values(
            by="Count", ascending=False
        )
        sorted_count_one_candidate["rank_num"] = [counts.split("_")[1] for counts in sorted_count_one_candidate["ranks"]]
        sorted_count_one_candidate["rank_num"] = [int(x) for x in sorted_count_one_candidate["rank_num"]]
        for r, rows in sorted_count_one_candidate.iterrows():
            if sorted_count_one_candidate["Source"][r]=="GNPS":
                sorted_count_one_candidate.loc[r, "rank_db"] = 1
            if sorted_count_one_candidate["Source"][r]=="SIRIUS":
                sorted_count_one_candidate.loc[r, "rank_db"] = 2
            if sorted_count_one_candidate["Source"][r]=="MassBank":
                sorted_count_one_candidate.loc[r, "rank_db"] = 3
            if sorted_count_one_candidate["Source"][r]=="HMDB":
                sorted_count_one_candidate.loc[r, "rank_db"] = 4

        sorted_count_one_candidate.sort_values(
            by=["Count", "rank_num", "rank_db"], ascending=[False, True, True],
            inplace=True)
        return sorted_count_one_candidate

In [7]:


def sources_1(candidates_with_counts, merged_df, mer, sirius_df):
    """if only 1 source has confirmed the presence of a certain SMILES.
    This holds true when each candidate SMILES has only one source. The
    function selects the best candidate

    Parameters:
    candidates_with_counts: this is the result from the function add_count_column
    and contains a ordered dataframe, with the most sourced SMILES at top.
    merged_df: dataframe that contains all features from the input mzML file

    Returns:
    merged_df: with added top SMILES, Annotation Sources, Annotation Count, and
    MSI-Level

    Usage:
    sources_1(candidates_with_counts, merged_df)

    """

    df_count_1 = candidates_with_counts[candidates_with_counts["Count"] == 1]


    df_count_1 = df_count_1[df_count_1["rank_num"] == min(df_count_1["rank_num"])]

    df_count_1["count_min"] = [
        str(df_count_1["SIRIUS"][x])
        + str(df_count_1["GNPS"][x])
        + str(df_count_1["MassBank"][x])
        + str(df_count_1["HMDB"][x])
        for x, row in df_count_1.iterrows()
    ]

    df_count_1["count_max"] = [x.count("_") for x in df_count_1["count_min"]]

    df_count_1 = df_count_1.sort_values(by="count_max", ascending=False)

    df_count_1.reset_index(drop=True, inplace=True)

    merged_df.loc[mer, "AnnotationCount"] = df_count_1["Count"][0]

    gnps_indices = list(df_count_1[(df_count_1["GNPS"].notnull())].index)
    mbank_indices = list(df_count_1[(df_count_1["MassBank"].notnull())].index)
    hmdb_indices = list(df_count_1[(df_count_1["HMDB"].notnull())].index)
    sirius_indices = list(df_count_1[(df_count_1["SIRIUS"].notnull())].index)

    if 0 in sirius_indices:
        merged_df.loc[mer, "AnnotationSources"] = (
            str(merged_df["AnnotationSources"][mer]) + "|SIRIUS"
        )
    if 0 in mbank_indices:
        merged_df.loc[mer, "AnnotationSources"] = (
            str(merged_df["AnnotationSources"][mer]) + "|MassBank"
        )
        # print("mbank")
    if 0 in hmdb_indices:
        merged_df.loc[mer, "AnnotationSources"] = (
            str(merged_df["AnnotationSources"][mer]) + "|HMDB"
        )
        # print("hmdb")
    if 0 in gnps_indices:
        merged_df.loc[mer, "AnnotationSources"] = (
            str(merged_df["AnnotationSources"][mer]) + "|GNPS"
        )
        # print("gnps")

    

    merged_df["AnnotationSources"][mer] = merged_df["AnnotationSources"][
        mer
    ].replace("nan|", "")
    
    merged_df.loc[mer, "SMILES"] = df_count_1["SMILES"][0]
    if df_count_1["SMILES"][0] != "CC[S](=O)(O)O":
        comp = pcp.get_compounds(df_count_1["SMILES"][0], 'smiles')
        try:
            if comp:
                for c in comp:
                    if c.cid:
                        merged_df["synonyms"][mer] = c.synonyms
                        merged_df.loc[mer, "IUPAC"] = c.iupac_name
                        merged_df.loc[mer, "Formula"] = c.molecular_formula
                        merged_df.loc[mer, "PubChemID"] = c.cid
                    else:
                        merged_df.loc[mer, "Formula"] = sirius_df["molecularFormula"][0]
                        merged_df.loc[mer, "PubChemID"] = sirius_df["pubchemids"][0]

                
        except Exception:
            if "SIRIUS" == merged_df["AnnotationSources"][mer]:

                comp = pcp.get_compounds(sirius_df["pubchemids"][0], 'cid')
                try:
                    if comp:
                        for c in comp:
                            if c.cid:
                                merged_df["synonyms"][mer] = c.synonyms
                                merged_df.loc[mer, "IUPAC"] = c.iupac_name
                                merged_df.loc[mer, "Formula"] = sirius_df["molecularFormula"][0]
                                merged_df.loc[mer, "PubChemID"] = sirius_df["pubchemids"][0]
                            else:

                                merged_df.loc[mer, "Formula"] = sirius_df["molecularFormula"][0]
                                merged_df.loc[mer, "PubChemID"] = sirius_df["pubchemids"][0]
                    else:
                        merged_df.loc[mer, "Formula"] = sirius_df["molecularFormula"][0]
                        merged_df["synonyms"][mer] = sirius_df["name"][0]
                        merged_df.loc[mer, "PubChemID"] = sirius_df["pubchemids"][0]
                except Exception:
                    merged_df.loc[mer, "Formula"] = sirius_df["molecularFormula"][0]
                    merged_df["synonyms"][mer] = sirius_df["name"][0]
                    merged_df.loc[mer, "PubChemID"] = sirius_df["pubchemids"][0]
            else:
                pass
    if "SIRIUS" not in merged_df["AnnotationSources"][mer]:
        merged_df["superclass"][mer] = np.nan
        merged_df["class"][mer] = np.nan
        merged_df["subclass"][mer] = np.nan
        merged_df["ClassificationSource"][mer] = np.nan

    if (
        "HMDB" in merged_df["AnnotationSources"][mer]
        or "GNPS" in merged_df["AnnotationSources"][mer]
        or "MassBank" in merged_df["AnnotationSources"][mer]
    ):
        merged_df.loc[mer, "MSILevel"] = 2
    elif "SIRIUS" == merged_df["AnnotationSources"][mer]:
        merged_df.loc[mer, "MSILevel"] = 3
            
    return merged_df

In [8]:





def sources_2(candidates_with_counts, merged_df, mer, sirius_df):

    """if only 2 sources have confirmed the presence of a certain SMILES.
    This holds true when each candidate SMILES has only two sources. The
    function selects the best candidate and adds the two sources as
    annotation sources

    Parameters:
    candidates_with_counts: this is the result from the function add_count_column
    and contains a ordered dataframe, with the most sourced SMILES at top.
    merged_df: dataframe that contains all features from the input mzML file

    Returns:
    merged_df: with added top SMILES, Annotation Sources, Annotation Count, and
    MSI-Level

    Usage:
    sources_2(candidates_with_counts, merged_df, mer)

    """

    df_count_2 = candidates_with_counts[candidates_with_counts["Count"] == 2]

    df_countnew = df_count_2[df_count_2["rank_num"] == min(df_count_2["rank_num"])]
    df_countnew["count_min"] = [
        str(df_countnew["SIRIUS"][x])
        + str(df_countnew["GNPS"][x])
        + str(df_countnew["MassBank"][x])
        + str(df_countnew["HMDB"][x])
        for x, row in df_countnew.iterrows()
    ]
    df_countnew["count_max"] = [x.count("_") for x in df_countnew["count_min"]]
    df_countnew = df_countnew.sort_values(by="count_max", ascending=False)

    df_countnew.reset_index(drop=True, inplace=True)

    gnps_indices = list(df_countnew[(df_countnew["GNPS"].notnull())].index)
    mbank_indices = list(df_countnew[(df_countnew["MassBank"].notnull())].index)
    hmdb_indices = list(df_countnew[(df_countnew["HMDB"].notnull())].index)
    sirius_indices = list(df_countnew[(df_countnew["SIRIUS"].notnull())].index)

    if 0 in sirius_indices:
        # print("sirius")
        merged_df.loc[mer, "AnnotationSources"] = (
            str(merged_df["AnnotationSources"][mer]) + "|SIRIUS"
        )
    if 0 in mbank_indices:
        merged_df.loc[mer, "AnnotationSources"] = (
            str(merged_df["AnnotationSources"][mer]) + "|MassBank"
        )
        # print("mbank")
    if 0 in hmdb_indices:
        merged_df.loc[mer, "AnnotationSources"] = (
            str(merged_df["AnnotationSources"][mer]) + "|HMDB"
        )
        # print("hmdb")
    if 0 in gnps_indices:
        merged_df.loc[mer, "AnnotationSources"] = (
            str(merged_df["AnnotationSources"][mer]) + "|GNPS"
        )

    merged_df.loc[mer, "MSILevel"] = 2


    merged_df["AnnotationSources"][mer] = merged_df["AnnotationSources"][mer].replace(
        "nan|", ""
    )
    merged_df.loc[mer, "AnnotationCount"] = df_countnew["Count"][0]
    
    merged_df.loc[mer, "SMILES"] = df_countnew["SMILES"][0]
    try:
        comp = pcp.get_compounds(df_countnew["SMILES"][0], 'smiles')
        try:
            if comp:
                for c in comp:
                    merged_df["synonyms"][mer] = c.synonyms
                    merged_df.loc[mer, "IUPAC"] = c.iupac_name
                    merged_df.loc[mer, "Formula"] = c.molecular_formula
                    merged_df.loc[mer, "PubChemID"] = c.cid
        except Exception:
            pass
    except:
        pass
    if "SIRIUS" not in merged_df["AnnotationSources"][mer]:
        merged_df["superclass"][mer] = np.nan
        merged_df["class"][mer] = np.nan
        merged_df["subclass"][mer] = np.nan
        merged_df["ClassificationSource"][mer] = np.nan
    
    
    return merged_df

In [9]:


def sources_3(candidates_with_counts, merged_df, mer, sirius_df):

    """if only 3 sources have confirmed the presence of a certain SMILES.
    This holds true when each candidate SMILES has only 3 sources. The
    function selects the best candidate and adds the 3 sources as
    annotation sources

    Parameters:
    candidates_with_counts: this is the result from the function add_count_column
    and contains a ordered dataframe, with the most sourced SMILES at top.
    merged_df: dataframe that contains all features from the input mzML file

    Returns:
    merged_df: with added top SMILES, Annotation Sources, Annotation Count, and
    MSI-Level

    Usage:
    sources_2(candidates_with_counts, merged_df, mer)

    """
    # if the count is 3
    df_count_3 = candidates_with_counts[candidates_with_counts["Count"] == 3]
    # extracts the ranks again
    #df_count_3["rank_num"] = [counts.split("_")[1] for counts in df_count_3["ranks"]]
    #df_count_3["rank_num"] = [int(x) for x in df_count_3["rank_num"]]

    #df_count_3 = df_count_3.sort_values(by="rank_num")
    df_count_3 = df_count_3[df_count_3["rank_num"] == min(df_count_3["rank_num"])]
    df_count_3["count_min"] = [
        str(df_count_3["SIRIUS"][x])
        + str(df_count_3["GNPS"][x])
        + str(df_count_3["MassBank"][x])
        + str(df_count_3["HMDB"][x])
        for x, row in df_count_3.iterrows()
    ]

    df_count_3["count_max"] = [x.count("_") for x in df_count_3["count_min"]]
    df_count_3 = df_count_3.sort_values(by="count_max", ascending=False)

    df_count_3.reset_index(drop=True, inplace=True)

    merged_df.loc[mer, "AnnotationCount"] = df_count_3["Count"][0]
    
    heavy_atoms = ["C", "N", "P", "O", "S"]

    Mol = []

    for j in list(df_count_3["SMILES"]):
        if not isNaN(j):
            # print(type(j))
            mol2 = Chem.MolFromSmiles(j)
            Mol.append(mol2)

    if len(Mol) >= 2:
        res = rdFMCS.FindMCS(Mol, timeout=60)
        sm_res = Chem.MolToSmiles(Chem.MolFromSmarts(res.smartsString))
        # if there are atleast 3 heavy atoms in the MCSS, then add it to the result file
        elem = [ele for ele in heavy_atoms if (ele in sm_res)]
        if elem and len(sm_res) >= 3:
            merged_df.loc[mer, "MCSS"] = Chem.MolToSmiles(
                Chem.MolFromSmarts(res.smartsString)
            )

    gnps_indices = list(df_count_3[(df_count_3["GNPS"].notnull())].index)
    mbank_indices = list(df_count_3[(df_count_3["MassBank"].notnull())].index)
    hmdb_indices = list(df_count_3[(df_count_3["HMDB"].notnull())].index)
    sirius_indices = list(df_count_3[(df_count_3["SIRIUS"].notnull())].index)

    if 0 in sirius_indices:
        # print("sirius")
        merged_df.loc[mer, "AnnotationSources"] = (
            str(merged_df["AnnotationSources"][mer]) + "|SIRIUS"
        )
    if 0 in mbank_indices:
        merged_df.loc[mer, "AnnotationSources"] = (
            str(merged_df["AnnotationSources"][mer]) + "|MassBank"
        )
        # print("mbank")
    if 0 in hmdb_indices:
        merged_df.loc[mer, "AnnotationSources"] = (
            str(merged_df["AnnotationSources"][mer]) + "|HMDB"
        )
        # print("hmdb")
    if 0 in gnps_indices:
        merged_df.loc[mer, "AnnotationSources"] = (
            str(merged_df["AnnotationSources"][mer]) + "|GNPS"
        )
        # print("gnps")
    if "nan|SIRIUS" == merged_df["AnnotationSources"][mer]:
        merged_df.loc[mer, "MSILevel"] = 3

    merged_df["AnnotationSources"][mer] = merged_df["AnnotationSources"][mer].replace(
        "nan|", ""
    )

    merged_df.loc[mer, "MSILevel"] = 2
    merged_df.loc[mer, "SMILES"] = df_count_3["SMILES"][0]
    try:
        
        comp = pcp.get_compounds(df_count_3["SMILES"][0], 'smiles')
        try:
            if comp:
                for c in comp:
                    merged_df["synonyms"][mer] = c.synonyms
                    merged_df.loc[mer, "IUPAC"] = c.iupac_name
                    merged_df.loc[mer, "Formula"] = c.molecular_formula
                    merged_df.loc[mer, "PubChemID"] = c.cid
        except Exception:
            pass
    except:
        pass
    if "SIRIUS" not in merged_df["AnnotationSources"][mer]:
        merged_df["superclass"][mer] = np.nan
        merged_df["class"][mer] = np.nan
        merged_df["subclass"][mer] = np.nan
        merged_df["ClassificationSource"][mer] = np.nan
       

    
    return merged_df

In [10]:


def sources_4(candidates_with_counts, merged_df, mer, sirius_df):

    """if only 3 sources have confirmed the presence of a certain SMILES.
    This holds true when each candidate SMILES has only 3 sources. The
    function selects the best candidate and adds the 3 sources as
    annotation sources

    Parameters:
    candidates_with_counts: this is the result from the function add_count_column
    and contains a ordered dataframe, with the most sourced SMILES at top.
    merged_df: dataframe that contains all features from the input mzML file

    Returns:
    merged_df: with added top SMILES, Annotation Sources, Annotation Count, and
    MSI-Level

    Usage:
    sources_2(candidates_with_counts, merged_df, mer)

    """

    df_count_4 = candidates_with_counts[candidates_with_counts["Count"] == 4]
    #df_count_4["rank_num"] = [counts.split("_")[1] for counts in df_count_4["ranks"]]
    #df_count_4["rank_num"] = [int(x) for x in df_count_4["rank_num"]]
    #df_count_4 = df_count_4.sort_values(by="rank_num")
    df_count_4 = df_count_4[df_count_4["rank_num"] == min(df_count_4["rank_num"])]
    df_count_4["count_min"] = [
        str(df_count_4["SIRIUS"][x])
        + str(df_count_4["GNPS"][x])
        + str(df_count_4["MassBank"][x])
        + str(df_count_4["HMDB"][x])
        for x, row in df_count_4.iterrows()
    ]
    df_count_4["count_max"] = [x.count("_") for x in df_count_4["count_min"]]
    df_count_4 = df_count_4.sort_values(by="count_max", ascending=False)

    df_count_4.reset_index(drop=True, inplace=True)

    merged_df.loc[mer, "AnnotationCount"] = df_count_4["Count"][0]
    
    heavy_atoms = ["C", "N", "P", "O", "S"]
    
    Mol = []

    for j in list(df_count_4["SMILES"]):
        if not isNaN(j):
            # print(type(j))
            mol2 = Chem.MolFromSmiles(j)
            Mol.append(mol2)

    if len(Mol) >= 2:
        res = rdFMCS.FindMCS(Mol, timeout=60)
        sm_res = Chem.MolToSmiles(Chem.MolFromSmarts(res.smartsString))
        # if there are atleast 3 heavy atoms in the MCSS, then add it to the result file
        elem = [ele for ele in heavy_atoms if (ele in sm_res)]
        if elem and len(sm_res) >= 3:
            merged_df.loc[mer, "MCSS"] = Chem.MolToSmiles(
                Chem.MolFromSmarts(res.smartsString)
            )

    gnps_indices = list(df_count_4[(df_count_4["GNPS"].notnull())].index)
    mbank_indices = list(df_count_4[(df_count_4["MassBank"].notnull())].index)
    hmdb_indices = list(df_count_4[(df_count_4["HMDB"].notnull())].index)
    sirius_indices = list(df_count_4[(df_count_4["SIRIUS"].notnull())].index)

    if 0 in sirius_indices:
        # print("sirius")
        merged_df.loc[mer, "AnnotationSources"] = (
            str(merged_df["AnnotationSources"][mer]) + "|SIRIUS"
        )
    if 0 in mbank_indices:
        merged_df.loc[mer, "AnnotationSources"] = (
            str(merged_df["AnnotationSources"][mer]) + "|Massbank"
        )
        # print("mbank")
    if 0 in hmdb_indices:
        merged_df.loc[mer, "AnnotationSources"] = (
            str(merged_df["AnnotationSources"][mer]) + "|HMDB"
        )
        # print("hmdb")
    if 0 in gnps_indices:
        merged_df.loc[mer, "AnnotationSources"] = (
            str(merged_df["AnnotationSources"][mer]) + "|GNPS"
        )
        # print("gnps")
    
    if "nan|SIRIUS" == merged_df["AnnotationSources"][mer]:
        merged_df.loc[mer, "MSILevel"] = 3

    merged_df["AnnotationSources"][mer] = merged_df["AnnotationSources"][mer].replace(
        "nan|", ""
    )
    
    merged_df.loc[mer, "MSILevel"] = 2
    
    merged_df.loc[mer, "SMILES"] = df_count_4["SMILES"][0]
    comp = pcp.get_compounds(df_count_4["SMILES"][0], 'smiles')
    try:
        if comp:
            for c in comp:
                merged_df["synonyms"][mer] = c.synonyms
                merged_df.loc[mer, "IUPAC"] = c.iupac_name
                merged_df.loc[mer, "Formula"] = c.molecular_formula
                merged_df.loc[mer, "PubChemID"] = c.cid
    except Exception:
        pass
    if "SIRIUS" not in merged_df["AnnotationSources"][mer]:
        merged_df["superclass"][mer] = np.nan
        merged_df["class"][mer] = np.nan
        merged_df["subclass"][mer] = np.nan
        merged_df["ClassificationSource"][mer] = np.nan  
     
    return merged_df

In [11]:


#@p.provenance()
def CandidateSelection_SimilarityandIdentity(file_id, msp_file, ms1data, standards = False):
    spec_msv = pd.read_csv(msp_file)
    sir_msv = pd.read_csv(ms1data)
#     spec_msv = msp_file
#     sir_msv = ms1data

    merged_df = sir_msv.merge(
        spec_msv,
        how="inner",
        left_on=["premz", "rtmed", "rtmean", "int", "col_eng", "pol"],
        right_on=["premz", "rtmed", "rtmean", "int", "col_eng", "pol"],
    )
    merged_df["Formula"] = np.nan
    merged_df["SMILES"] = np.nan
    merged_df["PubChemID"] = np.nan
    merged_df["IUPAC"] = np.nan
    merged_df["synonyms"] = np.nan
    merged_df["AnnotationSources"] = np.nan
    merged_df["AnnotationCount"] = np.nan
    merged_df["MSILevel"] = np.nan
    merged_df["MCSS"] = np.nan
    merged_df["candidate_list"] = np.nan # new
    merged_df["superclass"] = np.nan
    merged_df["class"] = np.nan
    merged_df["subclass"] = np.nan
    merged_df["ClassificationSource"] = np.nan

    for_only_formula = []
    for_formula_canopus = []

    can_selec_dir = file_id + "_Candidate_Selection"
    if not os.path.isdir(can_selec_dir):
        os.mkdir(can_selec_dir)

    for mer, rows in merged_df.iterrows():

        if not isNaN(merged_df["SIRIUSCSV"][mer]):
            sirius_csv = merged_df["SIRIUSCSV"][mer]
            # print(sirius_csv)
        else:
            df = pd.DataFrame(list())
            # print(df)
            df.to_csv("./empty_csv.csv")
            sirius_csv = "./empty_csv.csv"

        mbank_csv = merged_df["mbank_results_csv"][mer]
        gnps_csv = merged_df["gnps_results_csv"][mer]
        hmdb_csv = merged_df["hmdb_results_csv"][mer]
        if (
            os.path.exists(sirius_csv)
            and os.path.exists(gnps_csv)
            and os.path.exists(mbank_csv)
            and os.path.exists(hmdb_csv)
        ):
            sirius_df = pd.read_csv(sirius_csv)
            if "formula" in sirius_csv:

                if len(sirius_df) > 0:

                    if (
                        "smiles" not in sirius_df.columns
                        and "molecularFormula" in sirius_df.columns
                    ):

                        # merged_df.loc[mer, "Formula"] = sirius_df["molecularFormula"][0]
                        # merged_df["AnnotationSources"][mer] = "SIRIUS-Formula"

                        index = mer
                        Formula = sirius_df["molecularFormula"][0]
                        for_only_formula.append(
                            {
                                "index": index,
                                "Formula": Formula,
                                "AnnotationSources": "SIRIUS-Formula",
                            }
                        )

                        if "class" in sirius_df.columns:
                            # merged_df.loc[mer, "superclass"] = sirius_df["superclass"][0]
                            # merged_df.loc[mer, "class"] = sirius_df["class"][0]
                            # merged_df.loc[mer, "subclass"] = sirius_df["subclass"][0]
                            # merged_df.loc[mer, "ClassificationSource"] = "CANOPUS"
                            # merged_df.loc[mer, "AnnotationSources"] = "SIRIUS-Formula|CANOPUS"
                            index = mer
                            Formula = sirius_df["molecularFormula"][0]
                            superclass = sirius_df["superclass"][0]
                            classes = sirius_df["class"][0]
                            subclass = sirius_df["subclass"][0]
                            for_formula_canopus.append(
                                {
                                    "index": index,
                                    "Formula": Formula,
                                    "superclass": superclass,
                                    "class": classes,
                                    "ClassificationSource": "CANOPUS",
                                    "AnnotationSources": "SIRIUS-Formula|CANOPUS",
                                }
                            )

                        sirius_df = []

            if "structure" in sirius_csv:

                if len(sirius_df) > 0:

                    if len(sirius_df) > 50:
                        sirius_df = sirius_df[0:50]
                    if "smiles" in sirius_df.columns:

                        sirius_df = sirius_df.drop_duplicates("smiles")
                        sirius_df = sirius_df.dropna(subset=["smiles"])
                        merged_df["Formula"][mer] = sirius_df["molecularFormula"][0]
                        merged_df.loc[mer, "PubChemID"] = sirius_df["pubchemids"][0]
                        if "class" in sirius_df.columns:
                            merged_df["superclass"][mer] = sirius_df["superclass"][0]
                            merged_df["class"][mer] = sirius_df["class"][0]
                            merged_df["subclass"][mer] = sirius_df["subclass"][0]
                            merged_df["ClassificationSource"][mer] = "CANOPUS"

            elif len(sirius_df) == 0:
                # print("NO Structures")
                merged_df["Formula"][mer] = np.nan
                merged_df["superclass"][mer] = np.nan
                merged_df["class"][mer] = np.nan
                merged_df["subclass"][mer] = np.nan
                merged_df["ClassificationSource"][mer] = np.nan

            mbank_df = pd.read_csv(mbank_csv)
            if len(mbank_df) > 0:
                mbank_df = mbank_df.drop_duplicates("MBSMILES")
                mbank_df = mbank_df.dropna(subset=["MBSMILES"])

            gnps_df = pd.read_csv(gnps_csv)
            if len(gnps_df) > 0:
                gnps_df = gnps_df.drop_duplicates("GNPSSMILES")
                gnps_df = gnps_df.dropna(subset=["GNPSSMILES"])

            hmdb_df = pd.read_csv(hmdb_csv)
            # print(hmdb_df)
            if len(hmdb_df) > 0:
                hmdb_df = hmdb_df.drop_duplicates("HMDBSMILES")
                hmdb_df = hmdb_df.dropna(subset=["HMDBSMILES"])

            # 1 SGHM
            if (
                len(sirius_df) > 0
                and len(gnps_df) > 0
                and len(mbank_df) > 0
                and len(hmdb_df) > 0
            ):
                mbank_df["rank_ids"] = ["M_" + str(s + 1) for s in range(len(mbank_df))]

                gnps_df["rank_ids"] = ["G_" + str(s + 1) for s in range(len(gnps_df))]

                hmdb_df["rank_ids"] = ["H_" + str(s + 1) for s in range(len(hmdb_df))]

                sirius_df["rank_ids"] = ["S_" + str(s) for s in sirius_df["rank"]]
                sirius_df["Source"] = "SIRIUS"

                source_l1 = [
                    *(list(sirius_df["Source"])),
                    *(list(gnps_df["Source"])),
                    *(list(mbank_df["Source"])),
                    *(list(hmdb_df["Source"])),
                ]

                rank_l2 = [
                    *(list(sirius_df["rank_ids"])),
                    *(list(gnps_df["rank_ids"])),
                    *(list(mbank_df["rank_ids"])),
                    *(list(hmdb_df["rank_ids"])),
                ]

                smiles_l3 = [
                    *(list(sirius_df["smiles"])),
                    *(list(gnps_df["GNPSSMILES"])),
                    *(list(mbank_df["MBSMILES"])),
                    *(list(hmdb_df["HMDBSMILES"])),
                ]

                sm = pd.DataFrame(
                    list(zip(source_l1, rank_l2, smiles_l3)),
                    columns=["Source", "ranks", "SMILES"],
                )

                df_edge = chemMN_CandidateSelection(sm)
                if df_edge is not None:

                    df_edge.to_csv(
                        can_selec_dir
                        + "/"
                        + str(merged_df["premz"][mer])
                        + "_ChemMNedges.tsv",
                        sep="\t"
                    )
                one_candidate = one_candidate_selection(
                    sm,
                    sirius_df=sirius_df,
                    mbank_df=mbank_df,
                    gnps_df=gnps_df,
                    hmdb_df=hmdb_df,
                    Source="SGHM",
                )

                candidates_with_counts = add_count_column(one_candidate)
                candidates_with_counts.to_csv(
                    can_selec_dir
                    + "/"
                    + str(merged_df["premz"][mer])
                    + "sorted_candidate_list.tsv",
                    sep="\t"
                )
                merged_df["candidate_list"][mer] = can_selec_dir + "/"+str(merged_df["premz"][mer])+"sorted_candidate_list.tsv"
                if max(candidates_with_counts["Count"]) == 4:
                    sources_4(candidates_with_counts, merged_df, mer, sirius_df)
                if max(candidates_with_counts["Count"]) == 3:
                    sources_3(candidates_with_counts, merged_df, mer, sirius_df)
                if max(candidates_with_counts["Count"]) == 2:
                    sources_2(candidates_with_counts, merged_df, mer, sirius_df)
                if max(candidates_with_counts["Count"]) == 1:
                    sources_1(candidates_with_counts, merged_df, mer, sirius_df)

            # 2 SGM
            elif (
                len(sirius_df) > 0
                and len(gnps_df) > 0
                and len(mbank_df) > 0
                and len(hmdb_df) == 0
            ):

                mbank_df["rank_ids"] = ["M_" + str(s + 1) for s in range(len(mbank_df))]

                gnps_df["rank_ids"] = ["G_" + str(s + 1) for s in range(len(gnps_df))]

                sirius_df["rank_ids"] = ["S_" + str(s) for s in sirius_df["rank"]]
                sirius_df["Source"] = "SIRIUS"

                source_l1 = [
                    *(list(sirius_df["Source"])),
                    *(list(gnps_df["Source"])),
                    *(list(mbank_df["Source"])),
                ]

                rank_l2 = [
                    *(list(sirius_df["rank_ids"])),
                    *(list(gnps_df["rank_ids"])),
                    *(list(mbank_df["rank_ids"])),
                ]

                smiles_l3 = [
                    *(list(sirius_df["smiles"])),
                    *(list(gnps_df["GNPSSMILES"])),
                    *(list(mbank_df["MBSMILES"])),
                ]

                sm = pd.DataFrame(
                    list(zip(source_l1, rank_l2, smiles_l3)),
                    columns=["Source", "ranks", "SMILES"],
                )

                df_edge = chemMN_CandidateSelection(sm)
                if df_edge is not None:

                    df_edge.to_csv(
                        can_selec_dir
                        + "/"
                        + str(merged_df["premz"][mer])
                        + "_ChemMNedges.tsv",
                        sep="\t"
                    )
                one_candidate = one_candidate_selection(
                    sm,
                    sirius_df=sirius_df,
                    mbank_df=mbank_df,
                    gnps_df=gnps_df,
                    # hmdb_df = hmdb_df,
                    Source="SGM",
                )
                candidates_with_counts = add_count_column(one_candidate)
                candidates_with_counts.to_csv(
                    can_selec_dir
                    + "/"
                    + str(merged_df["premz"][mer])
                    + "sorted_candidate_list.tsv",
                    sep="\t"
                )
                merged_df["candidate_list"][mer] = can_selec_dir + "/" + str(merged_df["premz"][mer])+"sorted_candidate_list.tsv"
                if max(candidates_with_counts["Count"]) == 3:
                    sources_3(candidates_with_counts, merged_df, mer, sirius_df)
                if max(candidates_with_counts["Count"]) == 2:
                    sources_2(candidates_with_counts, merged_df, mer, sirius_df)
                if max(candidates_with_counts["Count"]) == 1:
                    sources_1(candidates_with_counts, merged_df, mer, sirius_df)

            # 3 SHM
            elif (
                len(sirius_df) > 0
                and len(gnps_df) == 0
                and len(mbank_df) > 0
                and len(hmdb_df) > 0
            ):

                mbank_df["rank_ids"] = ["M_" + str(s + 1) for s in range(len(mbank_df))]

                # gnps_df["rank_ids"] = ["G_" + str(s+1) for s in range(len(gnps_df))]

                hmdb_df["rank_ids"] = ["H_" + str(s + 1) for s in range(len(hmdb_df))]

                sirius_df["rank_ids"] = ["S_" + str(s) for s in sirius_df["rank"]]
                sirius_df["Source"] = "SIRIUS"

                source_l1 = [
                    *(list(sirius_df["Source"])),
                    *(list(mbank_df["Source"])),
                    *(list(hmdb_df["Source"])),
                ]

                rank_l2 = [
                    *(list(sirius_df["rank_ids"])),
                    *(list(mbank_df["rank_ids"])),
                    *(list(hmdb_df["rank_ids"])),
                ]

                smiles_l3 = [
                    *(list(sirius_df["smiles"])),
                    *(list(mbank_df["MBSMILES"])),
                    *(list(hmdb_df["HMDBSMILES"])),
                ]

                sm = pd.DataFrame(
                    list(zip(source_l1, rank_l2, smiles_l3)),
                    columns=["Source", "ranks", "SMILES"],
                )

                df_edge = chemMN_CandidateSelection(sm)
                if df_edge is not None:

                    df_edge.to_csv(
                       can_selec_dir
                        + "/"
                        + str(merged_df["premz"][mer])
                        + "_ChemMNedges.tsv",
                        sep="\t"
                    )
                one_candidate = one_candidate_selection(
                    sm,
                    sirius_df=sirius_df,
                    mbank_df=mbank_df,
                    # gnps_df = gnps_df ,
                    hmdb_df=hmdb_df,
                    Source="SHM",
                )
                candidates_with_counts = add_count_column(one_candidate)
                candidates_with_counts.to_csv(
                    can_selec_dir
                    + "/"
                    + str(merged_df["premz"][mer])
                    + "sorted_candidate_list.tsv",
                    sep="\t"
                )
                merged_df["candidate_list"][mer] = can_selec_dir + "/" + str(merged_df["premz"][mer])+"sorted_candidate_list.tsv"
                if max(candidates_with_counts["Count"]) == 3:
                    sources_3(candidates_with_counts, merged_df, mer, sirius_df)
                if max(candidates_with_counts["Count"]) == 2:
                    sources_2(candidates_with_counts, merged_df, mer, sirius_df)
                if max(candidates_with_counts["Count"]) == 1:
                    sources_1(candidates_with_counts, merged_df, mer, sirius_df)

            # 4 SGH
            elif (
                len(sirius_df) > 0
                and len(gnps_df) > 0
                and len(mbank_df) == 0
                and len(hmdb_df) > 0
            ):
                # mbank_df["rank_ids"] = ["M_" + str(s+1) for s in range(len(mbank_df))]

                gnps_df["rank_ids"] = ["G_" + str(s + 1) for s in range(len(gnps_df))]

                hmdb_df["rank_ids"] = ["H_" + str(s + 1) for s in range(len(hmdb_df))]

                sirius_df["rank_ids"] = ["S_" + str(s) for s in sirius_df["rank"]]
                sirius_df["Source"] = "SIRIUS"

                source_l1 = [
                    *(list(sirius_df["Source"])),
                    *(list(gnps_df["Source"])),
                    *(list(hmdb_df["Source"])),
                ]

                rank_l2 = [
                    *(list(sirius_df["rank_ids"])),
                    *(list(gnps_df["rank_ids"])),
                    *(list(hmdb_df["rank_ids"])),
                ]

                smiles_l3 = [
                    *(list(sirius_df["smiles"])),
                    *(list(gnps_df["GNPSSMILES"])),
                    *(list(hmdb_df["HMDBSMILES"])),
                ]

                sm = pd.DataFrame(
                    list(zip(source_l1, rank_l2, smiles_l3)),
                    columns=["Source", "ranks", "SMILES"],
                )

                df_edge = chemMN_CandidateSelection(sm)
                if df_edge is not None:

                    df_edge.to_csv(
                        can_selec_dir
                        + "/"
                        + str(merged_df["premz"][mer])
                        + "_ChemMNedges.tsv",
                        sep="\t"
                    )
                one_candidate = one_candidate_selection(
                    sm,
                    sirius_df=sirius_df,
                    # mbank_df = mbank_df,
                    gnps_df=gnps_df,
                    hmdb_df=hmdb_df,
                    Source="SGH",
                )
                candidates_with_counts = add_count_column(one_candidate)
                candidates_with_counts.to_csv(
                    can_selec_dir
                    + "/"
                    + str(merged_df["premz"][mer])
                    + "sorted_candidate_list.tsv",
                    sep="\t"
                )
                merged_df["candidate_list"][mer] = can_selec_dir + "/" + str(merged_df["premz"][mer])+"sorted_candidate_list.tsv"
                if max(candidates_with_counts["Count"]) == 3:
                    sources_3(candidates_with_counts, merged_df, mer, sirius_df)
                if max(candidates_with_counts["Count"]) == 2:
                    sources_2(candidates_with_counts, merged_df, mer, sirius_df)
                if max(candidates_with_counts["Count"]) == 1:
                    sources_1(candidates_with_counts, merged_df, mer, sirius_df)

            # 5 GHM
            elif (
                len(sirius_df) == 0
                and len(gnps_df) > 0
                and len(mbank_df) > 0
                and len(hmdb_df) > 0
            ):
                mbank_df["rank_ids"] = ["M_" + str(s + 1) for s in range(len(mbank_df))]

                gnps_df["rank_ids"] = ["G_" + str(s + 1) for s in range(len(gnps_df))]

                hmdb_df["rank_ids"] = ["H_" + str(s + 1) for s in range(len(hmdb_df))]

                # sirius_df["rank_ids"] = ["S_" + str(s) for s in sirius_df["rank"]]
                # sirius_df["Source"] = "SIRIUS"

                source_l1 = [
                    *(list(gnps_df["Source"])),
                    *(list(mbank_df["Source"])),
                    *(list(hmdb_df["Source"])),
                ]

                rank_l2 = [
                    *(list(gnps_df["rank_ids"])),
                    *(list(mbank_df["rank_ids"])),
                    *(list(hmdb_df["rank_ids"])),
                ]

                smiles_l3 = [
                    *(list(gnps_df["GNPSSMILES"])),
                    *(list(mbank_df["MBSMILES"])),
                    *(list(hmdb_df["HMDBSMILES"])),
                ]

                sm = pd.DataFrame(
                    list(zip(source_l1, rank_l2, smiles_l3)),
                    columns=["Source", "ranks", "SMILES"],
                )

                df_edge = chemMN_CandidateSelection(sm)
                if df_edge is not None:

                    df_edge.to_csv(
                        can_selec_dir
                        + "/"
                        + str(merged_df["premz"][mer])
                        + "_ChemMNedges.tsv",
                        sep="\t"
                    )
                one_candidate = one_candidate_selection(
                    sm,
                    # sirius_df = sirius_df,
                    mbank_df=mbank_df,
                    gnps_df=gnps_df,
                    hmdb_df=hmdb_df,
                    Source="GHM",
                )
                candidates_with_counts = add_count_column(one_candidate)
                candidates_with_counts.to_csv(
                    can_selec_dir
                    + "/"
                    + str(merged_df["premz"][mer])
                    + "sorted_candidate_list.tsv",
                    sep="\t"
                )
                merged_df["candidate_list"][mer] = can_selec_dir + "/" + str(merged_df["premz"][mer])+"sorted_candidate_list.tsv"
                if max(candidates_with_counts["Count"]) == 3:
                    sources_3(candidates_with_counts, merged_df, mer, sirius_df=None)
                if max(candidates_with_counts["Count"]) == 2:
                    sources_2(candidates_with_counts, merged_df, mer, sirius_df=None)
                if max(candidates_with_counts["Count"]) == 1:
                    sources_1(candidates_with_counts, merged_df, mer, sirius_df=None)

            # 6 SG
            elif (
                len(sirius_df) > 0
                and len(gnps_df) > 0
                and len(mbank_df) == 0
                and len(hmdb_df) == 0
            ):
                # mbank_df["rank_ids"] = ["M_" + str(s+1) for s in range(len(mbank_df))]

                gnps_df["rank_ids"] = ["G_" + str(s + 1) for s in range(len(gnps_df))]

                sirius_df["rank_ids"] = ["S_" + str(s) for s in sirius_df["rank"]]
                sirius_df["Source"] = "SIRIUS"

                source_l1 = [
                    *(list(sirius_df["Source"])),
                    *(list(gnps_df["Source"])),
                ]
                # ,*(list(mbank_df["Source"]))]

                rank_l2 = [
                    *(list(sirius_df["rank_ids"])),
                    *(list(gnps_df["rank_ids"])),
                ]
                # ,*(list(mbank_df["rank_ids"]))]

                smiles_l3 = [
                    *(list(sirius_df["smiles"])),
                    *(list(gnps_df["GNPSSMILES"])),
                ]
                # ,*(list(mbank_df["MBSMILES"]))]

                sm = pd.DataFrame(
                    list(zip(source_l1, rank_l2, smiles_l3)),
                    columns=["Source", "ranks", "SMILES"],
                )

                df_edge = chemMN_CandidateSelection(sm)
                if df_edge is not None:

                    df_edge.to_csv(
                        can_selec_dir
                        + "/"
                        + str(merged_df["premz"][mer])
                        + "_ChemMNedges.tsv",
                        sep="\t"
                    )

                one_candidate = one_candidate_selection(
                    sm,
                    sirius_df=sirius_df,
                    # mbank_df = mbank_df,
                    gnps_df=gnps_df,
                    # hmdb_df = hmdb_df,
                    Source="SG",
                )
                candidates_with_counts = add_count_column(one_candidate)
                candidates_with_counts.to_csv(
                    can_selec_dir
                    + "/"
                    + str(merged_df["premz"][mer])
                    + "sorted_candidate_list.tsv",
                    sep="\t"
                )
                merged_df["candidate_list"][mer] = can_selec_dir + "/" + str(merged_df["premz"][mer])+"sorted_candidate_list.tsv"
                if max(candidates_with_counts["Count"]) == 2:
                    sources_2(candidates_with_counts, merged_df, mer, sirius_df)
                if max(candidates_with_counts["Count"]) == 1:
                    sources_1(candidates_with_counts, merged_df, mer, sirius_df)

            # 7 SH
            elif (
                len(sirius_df) > 0
                and len(gnps_df) == 0
                and len(mbank_df) == 0
                and len(hmdb_df) > 0
            ):
                # mbank_df["rank_ids"] = ["M_" + str(s+1) for s in range(len(mbank_df))]

                # gnps_df["rank_ids"] = ["G_" + str(s+1) for s in range(len(gnps_df))]

                hmdb_df["rank_ids"] = ["H_" + str(s + 1) for s in range(len(hmdb_df))]

                sirius_df["rank_ids"] = ["S_" + str(s) for s in sirius_df["rank"]]
                sirius_df["Source"] = "SIRIUS"

                source_l1 = [
                    *(list(sirius_df["Source"])),
                    *(list(hmdb_df["Source"])),
                ]

                rank_l2 = [
                    *(list(sirius_df["rank_ids"])),
                    *(list(hmdb_df["rank_ids"])),
                ]

                smiles_l3 = [
                    *(list(sirius_df["smiles"])),
                    *(list(hmdb_df["HMDBSMILES"])),
                ]

                sm = pd.DataFrame(
                    list(zip(source_l1, rank_l2, smiles_l3)),
                    columns=["Source", "ranks", "SMILES"],
                )
                df_edge = chemMN_CandidateSelection(sm)
                if df_edge is not None:

                    df_edge.to_csv(
                        can_selec_dir
                        + "/"
                        + str(merged_df["premz"][mer])
                        + "_ChemMNedges.tsv",
                        sep="\t"
                    )

                one_candidate = one_candidate_selection(
                    sm,
                    sirius_df=sirius_df,
                    # mbank_df = mbank_df,
                    # gnps_df = gnps_df ,
                    hmdb_df=hmdb_df,
                    Source="SH",
                )
                candidates_with_counts = add_count_column(one_candidate)
                candidates_with_counts.to_csv(
                    can_selec_dir
                    + "/"
                    + str(merged_df["premz"][mer])
                    + "sorted_candidate_list.tsv",
                    sep="\t"
                )
                merged_df["candidate_list"][mer] = can_selec_dir + "/" + str(merged_df["premz"][mer])+"sorted_candidate_list.tsv"
                if max(candidates_with_counts["Count"]) == 2:
                    sources_2(candidates_with_counts, merged_df, mer, sirius_df)
                if max(candidates_with_counts["Count"]) == 1:
                    sources_1(candidates_with_counts, merged_df, mer, sirius_df)

            # 8 SM
            elif (
                len(sirius_df) > 0
                and len(gnps_df) == 0
                and len(mbank_df) > 0
                and len(hmdb_df) == 0
            ):
                mbank_df["rank_ids"] = ["M_" + str(s + 1) for s in range(len(mbank_df))]

                # gnps_df["rank_ids"] = ["G_" + str(s+1) for s in range(len(gnps_df))]

                # hmdb_df["rank_ids"] = ["H_" + str(s+1) for s in range(len(hmdb_df))]

                sirius_df["rank_ids"] = ["S_" + str(s) for s in sirius_df["rank"]]
                sirius_df["Source"] = "SIRIUS"

                source_l1 = [
                    *(list(sirius_df["Source"])),
                    *(list(mbank_df["Source"])),
                ]

                rank_l2 = [
                    *(list(sirius_df["rank_ids"])),
                    *(list(mbank_df["rank_ids"])),
                ]

                smiles_l3 = [
                    *(list(sirius_df["smiles"])),
                    *(list(mbank_df["MBSMILES"])),
                ]

                sm = pd.DataFrame(
                    list(zip(source_l1, rank_l2, smiles_l3)),
                    columns=["Source", "ranks", "SMILES"],
                )

                df_edge = chemMN_CandidateSelection(sm)
                if df_edge is not None:

                    df_edge.to_csv(
                        can_selec_dir
                        + "/"
                        + str(merged_df["premz"][mer])
                        + "_ChemMNedges.tsv",
                        sep="\t"
                    )

                one_candidate = one_candidate_selection(
                    sm,
                    sirius_df=sirius_df,
                    mbank_df=mbank_df,
                    # gnps_df = gnps_df ,
                    # hmdb_df = hmdb_df,
                    Source="SM",
                )
                candidates_with_counts = add_count_column(one_candidate)
                candidates_with_counts.to_csv(
                    can_selec_dir
                    + "/"
                    + str(merged_df["premz"][mer])
                    + "sorted_candidate_list.tsv",
                    sep="\t"
                )
                merged_df["candidate_list"][mer] = can_selec_dir + "/" + str(merged_df["premz"][mer])+"sorted_candidate_list.tsv"
                if max(candidates_with_counts["Count"]) == 2:
                    sources_2(candidates_with_counts, merged_df, mer, sirius_df)
                if max(candidates_with_counts["Count"]) == 1:
                    sources_1(candidates_with_counts, merged_df, mer, sirius_df)

            # 9 GM
            elif (
                len(sirius_df) == 0
                and len(gnps_df) > 0
                and len(mbank_df) > 0
                and len(hmdb_df) == 0
            ):
                mbank_df["rank_ids"] = ["M_" + str(s + 1) for s in range(len(mbank_df))]

                gnps_df["rank_ids"] = ["G_" + str(s + 1) for s in range(len(gnps_df))]

                # hmdb_df["rank_ids"] = ["H_" + str(s+1) for s in range(len(hmdb_df))]

                # sirius_df["rank_ids"] = ["S_" + str(s) for s in sirius_df["rank"]]
                # sirius_df["Source"] = "SIRIUS"

                source_l1 = [
                    *(list(mbank_df["Source"])),
                    *(list(gnps_df["Source"])),
                ]

                rank_l2 = [
                    *(list(mbank_df["rank_ids"])),
                    *(list(gnps_df["rank_ids"])),
                ]

                smiles_l3 = [
                    *(list(mbank_df["MBSMILES"])),
                    *(list(gnps_df["GNPSSMILES"])),
                ]

                sm = pd.DataFrame(
                    list(zip(source_l1, rank_l2, smiles_l3)),
                    columns=["Source", "ranks", "SMILES"],
                )

                df_edge = chemMN_CandidateSelection(sm)
                if df_edge is not None:

                    df_edge.to_csv(
                        can_selec_dir
                        + "/"
                        + str(merged_df["premz"][mer])
                        + "_ChemMNedges.tsv",
                        sep="\t"
                    )

                one_candidate = one_candidate_selection(
                    sm,
                    # sirius_df = sirius_df,
                    mbank_df=mbank_df,
                    gnps_df=gnps_df,
                    # hmdb_df = hmdb_df,
                    Source="GM",
                )
                candidates_with_counts = add_count_column(one_candidate)
                candidates_with_counts.to_csv(
                    can_selec_dir
                    + "/"
                    + str(merged_df["premz"][mer])
                    + "sorted_candidate_list.tsv",
                    sep="\t"
                )
                merged_df["candidate_list"][mer] = can_selec_dir + "/" + str(merged_df["premz"][mer])+"sorted_candidate_list.tsv"
                if max(candidates_with_counts["Count"]) == 2:
                    sources_2(candidates_with_counts, merged_df, mer, sirius_df=None)
                if max(candidates_with_counts["Count"]) == 1:
                    sources_1(candidates_with_counts, merged_df, mer, sirius_df=None)

            # 10 GH
            elif (
                len(sirius_df) == 0
                and len(gnps_df) > 0
                and len(mbank_df) == 0
                and len(hmdb_df) > 0
            ):
                # mbank_df["rank_ids"] = ["M_" + str(s+1) for s in range(len(mbank_df))]

                gnps_df["rank_ids"] = ["G_" + str(s + 1) for s in range(len(gnps_df))]

                hmdb_df["rank_ids"] = ["H_" + str(s + 1) for s in range(len(hmdb_df))]

                # sirius_df["rank_ids"] = ["S_" + str(s) for s in sirius_df["rank"]]
                # sirius_df["Source"] = "SIRIUS"

                source_l1 = [
                    *(list(gnps_df["Source"])),
                    *(list(hmdb_df["Source"])),
                ]

                rank_l2 = [
                    *(list(gnps_df["rank_ids"])),
                    *(list(hmdb_df["rank_ids"])),
                ]

                smiles_l3 = [
                    *(list(gnps_df["GNPSSMILES"])),
                    *(list(hmdb_df["HMDBSMILES"])),
                ]

                sm = pd.DataFrame(
                    list(zip(source_l1, rank_l2, smiles_l3)),
                    columns=["Source", "ranks", "SMILES"],
                )
                df_edge = chemMN_CandidateSelection(sm)
                if df_edge is not None:

                    df_edge.to_csv(
                        can_selec_dir
                        + "/"
                        + str(merged_df["premz"][mer])
                        + "_ChemMNedges.tsv",
                        sep="\t"
                    )
                one_candidate = one_candidate_selection(
                    sm,
                    # sirius_df = sirius_df,
                    # mbank_df = mbank_df,
                    gnps_df=gnps_df,
                    hmdb_df=hmdb_df,
                    Source="GH",
                )
                candidates_with_counts = add_count_column(one_candidate)
                candidates_with_counts.to_csv(
                    can_selec_dir
                    + "/"
                    + str(merged_df["premz"][mer])
                    + "sorted_candidate_list.tsv",
                    sep="\t"
                )
                merged_df["candidate_list"][mer] = can_selec_dir + "/" + str(merged_df["premz"][mer])+"sorted_candidate_list.tsv"
                if max(candidates_with_counts["Count"]) == 2:
                    sources_2(candidates_with_counts, merged_df, mer, sirius_df=None)
                if max(candidates_with_counts["Count"]) == 1:
                    sources_1(candidates_with_counts, merged_df, mer, sirius_df=None)
            # 11 HM
            elif (
                len(sirius_df) == 0
                and len(gnps_df) == 0
                and len(mbank_df) > 0
                and len(hmdb_df) > 0
            ):
                mbank_df["rank_ids"] = ["M_" + str(s + 1) for s in range(len(mbank_df))]

                # gnps_df["rank_ids"] = ["G_" + str(s+1) for s in range(len(gnps_df))]

                hmdb_df["rank_ids"] = ["H_" + str(s + 1) for s in range(len(hmdb_df))]

                # sirius_df["rank_ids"] = ["S_" + str(s) for s in sirius_df["rank"]]
                # sirius_df["Source"] = "SIRIUS"

                source_l1 = [
                    *(list(mbank_df["Source"])),
                    *(list(hmdb_df["Source"])),
                ]

                rank_l2 = [
                    *(list(mbank_df["rank_ids"])),
                    *(list(hmdb_df["rank_ids"])),
                ]

                smiles_l3 = [
                    *(list(mbank_df["MBSMILES"])),
                    *(list(hmdb_df["HMDBSMILES"])),
                ]

                sm = pd.DataFrame(
                    list(zip(source_l1, rank_l2, smiles_l3)),
                    columns=["Source", "ranks", "SMILES"],
                )

                df_edge = chemMN_CandidateSelection(sm)
                if df_edge is not None:

                    df_edge.to_csv(
                        can_selec_dir
                        + "/"
                        + str(merged_df["premz"][mer])
                        + "_ChemMNedges.tsv",
                        sep="\t"
                    )
                one_candidate = one_candidate_selection(
                    sm,
                    # sirius_df = sirius_df,
                    mbank_df=mbank_df,
                    # gnps_df = gnps_df ,
                    hmdb_df=hmdb_df,
                    Source="HM",
                )
                candidates_with_counts = add_count_column(one_candidate)
                candidates_with_counts.to_csv(
                    can_selec_dir
                    + "/"
                    + str(merged_df["premz"][mer])
                    + "sorted_candidate_list.tsv",
                    sep="\t"
                )
                merged_df["candidate_list"][mer] = can_selec_dir + "/" + str(merged_df["premz"][mer])+"sorted_candidate_list.tsv"
                if max(candidates_with_counts["Count"]) == 2:
                    sources_2(candidates_with_counts, merged_df, mer, sirius_df=None)
                if max(candidates_with_counts["Count"]) == 1:
                    sources_1(candidates_with_counts, merged_df, mer, sirius_df=None)

            # S
            elif (
                len(sirius_df) > 0
                and len(gnps_df) == 0
                and len(mbank_df) == 0
                and len(hmdb_df) == 0
            ):

                sirius_df["rank_ids"] = ["S_" + str(s) for s in sirius_df["rank"]]
                sirius_df["Source"] = "SIRIUS"

                source_l1 = [*(list(sirius_df["Source"]))]

                rank_l2 = [*(list(sirius_df["rank_ids"]))]

                smiles_l3 = [*(list(sirius_df["smiles"]))]

                sm = pd.DataFrame(
                    list(zip(source_l1, rank_l2, smiles_l3)),
                    columns=["Source", "ranks", "SMILES"],
                )

                df_edge = chemMN_CandidateSelection(sm)

                if df_edge is not None:

                    df_edge.to_csv(
                        can_selec_dir
                        + "/"
                        + str(merged_df["premz"][mer])
                        + "_ChemMNedges.tsv",
                        sep="\t"
                    )

                one_candidate = one_candidate_selection(
                    sm,
                    sirius_df=sirius_df,
                    # mbank_df = mbank_df,
                    # gnps_df = gnps_df ,
                    # hmdb_df = hmdb_df,
                    Source="S",
                )
                candidates_with_counts = add_count_column(one_candidate)
                candidates_with_counts.to_csv(
                    can_selec_dir
                    + "/"
                    + str(merged_df["premz"][mer])
                    + "sorted_candidate_list.tsv",
                    sep="\t"
                )
                merged_df["candidate_list"][mer] = can_selec_dir + "/" + str(merged_df["premz"][mer])+"sorted_candidate_list.tsv"
                if max(candidates_with_counts["Count"]) == 1:
                    sources_1(candidates_with_counts, merged_df, mer, sirius_df)
            # G
            elif (
                len(sirius_df) == 0
                and len(gnps_df) > 0
                and len(mbank_df) == 0
                and len(hmdb_df) == 0
            ):
                gnps_df["rank_ids"] = ["G_" + str(s + 1) for s in range(len(gnps_df))]

                source_l1 = [*(list(gnps_df["Source"]))]

                rank_l2 = [*(list(gnps_df["rank_ids"]))]

                smiles_l3 = [*(list(gnps_df["GNPSSMILES"]))]

                sm = pd.DataFrame(
                    list(zip(source_l1, rank_l2, smiles_l3)),
                    columns=["Source", "ranks", "SMILES"],
                )

                df_edge = chemMN_CandidateSelection(sm)
                if df_edge is not None:

                    df_edge.to_csv(
                        can_selec_dir
                        + "/"
                        + str(merged_df["premz"][mer])
                        + "_ChemMNedges.tsv",
                        sep="\t"
                    )

                one_candidate = one_candidate_selection(
                    sm,
                    # sirius_df = sirius_df,
                    # mbank_df = mbank_df,
                    gnps_df=gnps_df,
                    # hmdb_df = hmdb_df,
                    Source="G",
                )
                candidates_with_counts = add_count_column(one_candidate)
                candidates_with_counts.to_csv(
                    can_selec_dir
                    + "/"
                    + str(merged_df["premz"][mer])
                    + "sorted_candidate_list.tsv",
                    sep="\t"
                )
                merged_df["candidate_list"][mer] = can_selec_dir + "/" + str(merged_df["premz"][mer])+"sorted_candidate_list.tsv"
                if max(candidates_with_counts["Count"]) == 1:
                    sources_1(candidates_with_counts, merged_df, mer, sirius_df=None)
            # M
            elif (
                len(sirius_df) == 0
                and len(gnps_df) == 0
                and len(mbank_df) > 0
                and len(hmdb_df) == 0
            ):
                mbank_df["rank_ids"] = ["M_" + str(s + 1) for s in range(len(mbank_df))]

                source_l1 = [*(list(mbank_df["Source"]))]

                rank_l2 = [*(list(mbank_df["rank_ids"]))]

                smiles_l3 = [*(list(mbank_df["MBSMILES"]))]

                sm = pd.DataFrame(
                    list(zip(source_l1, rank_l2, smiles_l3)),
                    columns=["Source", "ranks", "SMILES"],
                )

                df_edge = chemMN_CandidateSelection(sm)
                if df_edge is not None:

                    df_edge.to_csv(
                        can_selec_dir
                        + "/"
                        + str(merged_df["premz"][mer])
                        + "_ChemMNedges.tsv",
                        sep="\t"
                    )

                one_candidate = one_candidate_selection(
                    sm,
                    # sirius_df = sirius_df,
                    mbank_df=mbank_df,
                    # gnps_df = gnps_df ,
                    # hmdb_df = hmdb_df,
                    Source="M",
                )
                candidates_with_counts = add_count_column(one_candidate)
                candidates_with_counts.to_csv(
                    can_selec_dir
                    + "/"
                    + str(merged_df["premz"][mer])
                    + "sorted_candidate_list.tsv",
                    sep="\t"
                )
                merged_df["candidate_list"][mer] = can_selec_dir + "/" + str(merged_df["premz"][mer])+"sorted_candidate_list.tsv"
                if max(candidates_with_counts["Count"]) == 1:
                    sources_1(candidates_with_counts, merged_df, mer, sirius_df=None)
            # H
            elif (
                len(sirius_df) == 0
                and len(gnps_df) == 0
                and len(mbank_df) == 0
                and len(hmdb_df) > 0
            ):
                hmdb_df["rank_ids"] = ["H_" + str(s + 1) for s in range(len(hmdb_df))]

                source_l1 = [*(list(hmdb_df["Source"]))]

                rank_l2 = [*(list(hmdb_df["rank_ids"]))]

                smiles_l3 = [*(list(hmdb_df["HMDBSMILES"]))]

                sm = pd.DataFrame(
                    list(zip(source_l1, rank_l2, smiles_l3)),
                    columns=["Source", "ranks", "SMILES"],
                )

                df_edge = chemMN_CandidateSelection(sm)
                if df_edge is not None:

                    df_edge.to_csv(
                        can_selec_dir
                        + "/"
                        + str(merged_df["premz"][mer])
                        + "_ChemMNedges.tsv",
                        sep="\t"
                    )

                one_candidate = one_candidate_selection(
                    sm,
                    #                                                                         sirius_df = sirius_df,
                    #                                                                         mbank_df = mbank_df,
                    #                                                                         gnps_df = gnps_df ,
                    hmdb_df=hmdb_df,
                    Source="H",
                )
                candidates_with_counts = add_count_column(one_candidate)
                candidates_with_counts.to_csv(
                    can_selec_dir
                    + "/"
                    + str(merged_df["premz"][mer])
                    + "sorted_candidate_list.tsv",
                    sep="\t"
                )
                merged_df["candidate_list"][mer] = can_selec_dir + "/" + str(merged_df["premz"][mer])+"sorted_candidate_list.tsv"
                if max(candidates_with_counts["Count"]) == 1:
                    sources_1(candidates_with_counts, merged_df, mer, sirius_df=None)



        if standards:
            if not isNaN(merged_df["SMILES"][mer]):

                merged_df.loc[mer, "MSILevel"] = 1



    #### code for formula and canopus classes
    df_for_formula=pd.DataFrame(for_only_formula)
    df_for_formula_n_canopus = pd.DataFrame(for_formula_canopus)
    for m, row in merged_df.iterrows():
        for f, row in df_for_formula.iterrows():
            if m == df_for_formula["index"][f]:
                merged_df.loc[m, "Formula"] = df_for_formula["Formula"][f]
                merged_df.loc[m, "AnnotationSources"] = df_for_formula["AnnotationSources"][f]

        for c, row in df_for_formula_n_canopus.iterrows():
            if m == df_for_formula_n_canopus["index"][f]:
                merged_df.loc[m, "Formula"] = df_for_formula_n_canopus["Formula"][f]
                merged_df.loc[m, "subclass"] = df_for_formula_n_canopus["subclass"][f]
                merged_df.loc[m, "class"] = df_for_formula_n_canopus["class"][f]
                merged_df.loc[m, "superclass"] = df_for_formula_n_canopus["superclass"][f]
                merged_df.loc[m, "ClassificationSource"] = df_for_formula_n_canopus["ClassificationSource"][f]
                merged_df.loc[m, "Formula"] = df_for_formula_n_canopus["Formula"][f]
                merged_df.loc[m, "AnnotationSources"] = df_for_formula_n_canopus["AnnotationSources"][f]
    merged_df.to_csv(file_id + "_mergedResults-with-one-Candidates.csv")
    #print(merged_df)
    merged_df = checkSMILES_validity(resultcsv = file_id + "_mergedResults-with-one-Candidates.csv")
    merged_df.to_csv(file_id + "_mergedResults-with-one-Candidates.csv")
    return merged_df


In [24]:
def classification(resultcsv):
   

    """classification function uses ClassyFire ChemONT

    Parameters:
    input_dir (str): This is the input directory where all the .mzML
    files and their respective result directories are stored.

    resultcsv: csv of df from combine_CuratedR or checkSMILES_validity

    Returns:
    dataframe: with classification
    csv: "MetabolomicsResults/final_curationList.csv"

    Usage:
    checkSMILES_validity(input_dir = "usr/project/", frame)

    """
    
    frame = pd.read_csv(resultcsv)
    inchis = []
    for i, row in frame.iterrows():
        if not isNaN(frame["SMILES"][i]):
            if "SIRIUS" not in frame["AnnotationSources"][i]:
                try:
                    InChI = Chem.MolToInchi(Chem.MolFromSmiles(frame["SMILES"][i]))
                    InChIKey = Chem.inchi.InchiToInchiKey(InChI)
                    inchis.append(
                        {
                            "index": i,
                            "smiles": frame["SMILES"][i],
                            "inchi": InChI,
                            "inchikey": InChIKey,
                        }
                    )
                except Exception:
                    pass
            elif "SIRIUS" in frame["AnnotationSources"][i]:
                if isNaN(frame["superclass"][i]):
                    try:
                        InChI = Chem.MolToInchi(Chem.MolFromSmiles(frame["SMILES"][i]))
                        InChIKey = Chem.inchi.InchiToInchiKey(InChI)
                        inchis.append(
                            {
                                "index": i,
                                "smiles": frame["SMILES"][i],
                                "inchi": InChI,
                                "inchikey": InChIKey,
                            }
                        )
                    except Exception:
                        pass
    inchis = pd.DataFrame(inchis)
    if len(inchis):
        inchis = inchis.loc[-isNaN(inchis["inchikey"])]
        # Retrieve ClassyFire classifications

        # This first step is done using inchikey and interrogation of the gnps classified structures
        """
        gnps_proxy = True
        url = "http://classyfire.wishartlab.com"
        proxy_url = "https://gnps-classyfire.ucsd.edu"
        chunk_size = 1000
        sleep_interval = 12
        """

        all_inchi_keys = list(inchis["inchikey"].drop_duplicates())

        resolved_ik_number_list = [0, 0]
        # total_inchikey_number = len(all_inchi_keys)

        while True:

            # start_time = time.time()

            # print('%s inchikey to resolve' % total_inchikey_number )
            get_classifications_cf_mod(all_inchi_keys, par_level=6)

            cleanse("all_json.json", "all_json.json")

            with open("all_json.json") as tweetfile:
                jsondic = json.loads(tweetfile.read())

            df = json_normalize(jsondic)
            df = df.drop_duplicates("inchikey")
            resolved_ik_number = len(df.drop_duplicates("inchikey").inchikey)
            resolved_ik_number_list.append(resolved_ik_number)
            # print('%s resolved inchikeys' % resolved_ik_number )
            # print("done in --- %s seconds ---" % (time.time() - start_time))

            if (
                resolved_ik_number_list[-1] < resolved_ik_number_list[-2]
                or resolved_ik_number_list[-1] == resolved_ik_number_list[-3]
            ):
                break
            cleanse("all_json.json", "all_json_cleaned.json")

            with open("all_json_cleaned.json") as tweetfile:
                jsondic = json.loads(tweetfile.read())

        flattened_classified_json = json_normalize(jsondic)
        flattened_df = flattened_classified_json.drop_duplicates("inchikey")
        flattened_df["inchikey"] = flattened_df["inchikey"].str.replace(
            r"InChIKey=", ""
        )
        df_merged = pd.merge(
            inchis, flattened_df, left_on="inchikey", right_on="inchikey", how="left"
        )
        #df_merged.to_csv("check.csv")
        for p, rowp in df_merged.iterrows():
            for q, rowq in frame.iterrows():
                if df_merged["smiles_x"][p] is frame["SMILES"][q]:
                    if "subclass.name" in df_merged.columns:
                        frame.loc[q, "subclass"] = df_merged["subclass.name"][p]
                    frame.loc[q, "class"] = df_merged["class.name"][p]
                    frame.loc[q, "superclass"] = df_merged["superclass.name"][p]
                    frame.loc[q, "ClassificationSource"] = "ClassyFire"

        frame.to_csv(resultcsv)
        return frame


In [13]:
os.chdir('/Users/mahnoorzulfiqar/OneDriveUNI/GitHub-Repos/MAW/cwl')

In [None]:
# EXO_NEG

In [172]:

ms1data = "/Users/mahnoorzulfiqar/OneDriveUNI/GitHub-Repos/MAW/cwl/ms2_spectra_EXOneg/insilico/MS1DATA.csv"
file_id = "exo_neg_sirius"
sirius_candidate_json_path = r'/Users/mahnoorzulfiqar/OneDriveUNI/GitHub-Repos/MAW/cwl/ms2_spectra_EXOneg/insilico/SIRIUS/no_isotope/*.json'
sirius_candidate_json = glob.glob(sirius_candidate_json_path)
sirius_candidate_json
db = "coconut"

sirius_postproc(ms1data, sirius_candidate_json, file_id, db)

no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file fo

Unnamed: 0.1,Unnamed: 0,X,id_X,premz,rtmed,rtmean,int,col_eng,pol,ms2Peaks,ms1Peaks,neutral_mass,SIRIUSCSV
0,1,1,endo_negM115R35ID1,114.988480,35.436139,35.436139,4.275046e+06,30,neg,ms2_spectra_EXOneg/insilico/peakfiles_ms2/Peak...,no ms1 peaks in QC,no mass from CAMERA,
1,2,2,endo_negM179R39ID2,178.864761,39.000852,39.563980,1.103407e+06,30,neg,ms2_spectra_EXOneg/insilico/peakfiles_ms2/Peak...,no ms1 peaks in QC,no mass from CAMERA,
2,3,3,endo_negM315R40ID3,314.793579,39.814096,39.814096,3.908569e+04,30,neg,ms2_spectra_EXOneg/insilico/peakfiles_ms2/Peak...,no ms1 peaks in QC,no mass from CAMERA,
3,4,4,endo_negM214R44ID4,214.048172,43.542205,43.542205,9.322293e+05,30,neg,ms2_spectra_EXOneg/insilico/peakfiles_ms2/Peak...,no ms1 peaks in QC,no mass from CAMERA,exo_neg_sirius/coconut_results_for_negM214R44I...
4,5,5,endo_negM267R45ID5,267.065094,45.102194,45.102194,1.534127e+06,30,neg,ms2_spectra_EXOneg/insilico/peakfiles_ms2/Peak...,no ms1 peaks in QC,no mass from CAMERA,exo_neg_sirius/coconut_results_for_negM267R45I...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
339,340,340,endo_negM323R436ID340,323.222198,436.122600,436.121664,4.768455e+05,30,neg,ms2_spectra_EXOneg/insilico/peakfiles_ms2/Peak...,no ms1 peaks in QC,no mass from CAMERA,
340,341,341,endo_negM309R439ID341,309.167816,439.194264,438.937642,5.906030e+05,30,neg,ms2_spectra_EXOneg/insilico/peakfiles_ms2/Peak...,no ms1 peaks in QC,no mass from CAMERA,exo_neg_sirius/coconut_results_for_negM309R439...
341,342,342,endo_negM637R455ID342,637.394165,455.026518,455.513512,4.944261e+05,30,neg,ms2_spectra_EXOneg/insilico/peakfiles_ms2/Peak...,no ms1 peaks in QC,no mass from CAMERA,exo_neg_sirius/coconut_results_for_negM637R455...
342,343,343,endo_negM365R463ID343,365.268768,462.500460,462.004048,1.019026e+06,30,neg,ms2_spectra_EXOneg/insilico/peakfiles_ms2/Peak...,no ms1 peaks in QC,no mass from CAMERA,


In [186]:
ms1data = "/Users/mahnoorzulfiqar/OneDriveUNI/GitHub-Repos/MAW/cwl/exo_neg_sirius_MS1DATA.csv"
file_id = "exo_neg_sirius"
msp_file = "/Users/mahnoorzulfiqar/OneDriveUNI/GitHub-Repos/MAW/cwl/exo_neg_spectral_results.csv"
standards = False

CandidateSelection_SimilarityandIdentity(file_id = file_id, msp_file = msp_file, 
ms1data = ms1data, standards = False)

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1%3DCC%28%3DCN%3DC1%29C%28%3DO%29C2%3DC%28C%3DC%28C%3DC2%29O%29O'
DEBUG:pubchempy:Created Compound(14386591)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=14386591'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCOC%28%3DO%29C1CC1S%28%3DO%29%28%3DO%29C2%3DCC%3DC%28C%3DC2%29C'
DEBUG:pubchempy:Created Compound(59253831)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=59253831'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CS%28%3DO%29CCCCCC%23N'
DEBUG:pubchempy:Created Compound(85993299)
DEBUG:pubchempy:Request URL: http

DEBUG:pubchempy:Request data: b'cid=3056186'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCOC1%3DC%28C%3DC%28C%3DC1%29CC2C3%3DCC%28%3DC%28C%3DC3CCN2C%28%3DO%29CN4CCCC4%29OCC%29OCC%29OCC'
DEBUG:pubchempy:Created Compound(4586121)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=4586121'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCC%28C%29CC%28C%29C1C%28C%3DC%28C%28C%28C%3DC%28C%28C%28C%3DC%28C%28%3DO%29NC%28C%28%3DO%29O1%29CC2%3DCC%3DCC%3DC2%29C%29C%29O%29C%29C%29OC%28%3DO%29C%29C%29C'
DEBUG:pubchempy:Created Compound(76513446)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=76513446'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL:

DEBUG:pubchempy:Request data: b'cid=14208647'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCC%28%3DO%29CC%28%3DO%29O'
DEBUG:pubchempy:Created Compound(439684)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=439684'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1C%28C%28OP%28%3DO%29%28O1%29O%29C%28%3DO%29C2%3DCN%3DC3C%28%3DN2%29C%28%3DO%29NC%28%3DN3%29N%29O'
DEBUG:pubchempy:Created Compound(135481187)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=135481187'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1C%28C%28OC2%3DCC%28%3DCC%28%3DC21%29O%29O%29C3%3DC%28C%28%3

DEBUG:pubchempy:Created Compound(3661011)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=3661011'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCCCCCCCCCCCC%28%3DO%29OCC1C2C%28C3C%28O1%29OC%28O3%29%28C%29C%29OC%28O2%29%28C%29C'
DEBUG:pubchempy:Created Compound(3115265)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=3115265'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1%3DCC2%3DC%28C%28%3DC1%29OP%28%3DO%29%28O%29O%29N%3DC%28C%3DC2%29C%23N'
DEBUG:pubchempy:Created Compound(447532)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'c

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=2786994'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28%3DCC%28%3DO%29OC1C%28C%28C%28C%28O1%29CO%29O%29O%29O%29C'
DEBUG:pubchempy:Created Compound(162997451)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162997451'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%28C%28OC%28C%28C1NC%29O%29OC2C%28CC%28C%28C2O%29OC3C%28CCC%28O3%29CN%29O%29N%29N%29CO%29O'
DEBUG:pubchempy:Created Compound(13020912)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=13020912'
INFO:pubchempy:'PUGREST.No

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28C%29COC%28C%29%28C1CCC2%28C1%28CCC3C2%3DCC%28%3DO%29C4%28C3%28CC%28C%28C4%29O%29O%29C%29CC5%3DCC%3DC%28C%3DC5%29O%29C%29O%29C6C%28O6%29C%28C%29C%28C%29C'
DEBUG:pubchempy:Created Compound(162834837)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162834837'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCCCCCCCCCCCC%28%3DO%29OC%28C%29C1%3DC2C%28%3DCC%3DC1%29N%3DC3C%28%3DN2%29C%3DCC%3DC3C%28%3DO%29O'
DEBUG:pubchempy:Created Compound(537492)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=537492'


Unnamed: 0.1,Unnamed: 0,Unnamed: 0.1_x,Unnamed: 0_x,X,id_X_x,premz,rtmed,rtmean,int,col_eng,...,synonyms,AnnotationSources,AnnotationCount,MSILevel,MCSS,candidate_list,superclass,class,subclass,ClassificationSource
0,0,0,1,1,endo_negM115R35ID1,114.988480,35.436139,35.436139,4.275046e+06,30,...,,,,,,,,,,
1,1,1,2,2,endo_negM179R39ID2,178.864761,39.000852,39.563980,1.103407e+06,30,...,,,,,,,,,,
2,2,2,3,3,endo_negM315R40ID3,314.793579,39.814096,39.814096,3.908569e+04,30,...,,,,,,,,,,
3,3,3,4,4,endo_negM214R44ID4,214.048172,43.542205,43.542205,9.322293e+05,30,...,"['(2,4-dihydroxyphenyl)(pyridin-3-yl)methanone...",SIRIUS,1.0,3.0,,exo_neg_sirius_Candidate_Selection/214.0481719...,Benzenoids,Phenols,1-hydroxy-2-unsubstituted benzenoids,CANOPUS
4,4,4,5,5,endo_negM267R45ID5,267.065094,45.102194,45.102194,1.534127e+06,30,...,['SCHEMBL2865588'],SIRIUS,1.0,3.0,,exo_neg_sirius_Candidate_Selection/267.0650939...,,,,CANOPUS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
339,339,339,340,340,endo_negM323R436ID340,323.222198,436.122600,436.121664,4.768455e+05,30,...,,,,,,,,,,
340,340,340,341,341,endo_negM309R439ID341,309.167816,439.194264,438.937642,5.906030e+05,30,...,"['roxithromycin', 'MLS002154033', 'SMR00123336...",GNPS,1.0,2.0,,exo_neg_sirius_Candidate_Selection/309.1678161...,,,,
341,341,341,342,342,endo_negM637R455ID342,637.394165,455.026518,455.513512,4.944261e+05,30,...,[],SIRIUS,1.0,3.0,,exo_neg_sirius_Candidate_Selection/637.3941650...,Lipids and lipid-like molecules,Prenol lipids,Triterpenoids,CANOPUS
342,342,342,343,343,endo_negM365R463ID343,365.268768,462.500460,462.004048,1.019026e+06,30,...,,,,,,,,,,


In [188]:
classification(resultcsv = file_id + "_mergedResults-with-one-Candidates.csv")

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/DJAMWOXJYSHIFD-UHFFFAOYSA-N.json HTTP/1.1" 200 5477
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/TYEYAUURGXCDJR-UHFFFAOYSA-N.json HTTP/1.1" 200 11456
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/NUDVEHBHDBJSMD-UHFFFAOYSA-N.json HTTP/1.1" 200 10718
DEBUG:urllib3.connectionpool:https://gnps

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Unnamed: 0.1_x,Unnamed: 0_x,X,id_X_x,premz,rtmed,rtmean,int,...,synonyms,AnnotationSources,AnnotationCount,MSILevel,MCSS,candidate_list,superclass,class,subclass,ClassificationSource
0,0,0,0,1,1,endo_negM115R35ID1,114.988480,35.436139,35.436139,4.275046e+06,...,,,,,,,,,,
1,1,1,1,2,2,endo_negM179R39ID2,178.864761,39.000852,39.563980,1.103407e+06,...,,,,,,,,,,
2,2,2,2,3,3,endo_negM315R40ID3,314.793579,39.814096,39.814096,3.908569e+04,...,,,,,,,,,,
3,3,3,3,4,4,endo_negM214R44ID4,214.048172,43.542205,43.542205,9.322293e+05,...,"['(2,4-dihydroxyphenyl)(pyridin-3-yl)methanone...",SIRIUS,1.0,3.0,,exo_neg_sirius_Candidate_Selection/214.0481719...,Benzenoids,Phenols,1-hydroxy-2-unsubstituted benzenoids,CANOPUS
4,4,4,4,5,5,endo_negM267R45ID5,267.065094,45.102194,45.102194,1.534127e+06,...,['SCHEMBL2865588'],SIRIUS,1.0,3.0,,exo_neg_sirius_Candidate_Selection/267.0650939...,Benzenoids,Benzene and substituted derivatives,Toluenes,ClassyFire
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
339,339,339,339,340,340,endo_negM323R436ID340,323.222198,436.122600,436.121664,4.768455e+05,...,,,,,,,,,,
340,340,340,340,341,341,endo_negM309R439ID341,309.167816,439.194264,438.937642,5.906030e+05,...,"['roxithromycin', 'MLS002154033', 'SMR00123336...",GNPS,1.0,2.0,,exo_neg_sirius_Candidate_Selection/309.1678161...,Organic oxygen compounds,Organooxygen compounds,Carbohydrates and carbohydrate conjugates,ClassyFire
341,341,341,341,342,342,endo_negM637R455ID342,637.394165,455.026518,455.513512,4.944261e+05,...,[],SIRIUS,1.0,3.0,,exo_neg_sirius_Candidate_Selection/637.3941650...,Lipids and lipid-like molecules,Prenol lipids,Triterpenoids,CANOPUS
342,342,342,342,343,343,endo_negM365R463ID343,365.268768,462.500460,462.004048,1.019026e+06,...,,,,,,,,,,


In [None]:
#### ENDO_neg

In [189]:

ms1data = "/Users/mahnoorzulfiqar/OneDriveUNI/GitHub-Repos/MAW/cwl/ms2_spectra_ENDOneg/insilico/MS1DATA.csv"
file_id = "endo_neg_sirius"
sirius_candidate_json_path = r'/Users/mahnoorzulfiqar/OneDriveUNI/GitHub-Repos/MAW/cwl/ms2_spectra_ENDOneg/insilico/SIRIUS/no_isotope/*.json'
sirius_candidate_json = glob.glob(sirius_candidate_json_path)
sirius_candidate_json
db = "coconut"

sirius_postproc(ms1data, sirius_candidate_json, file_id, db)

no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file fo

Unnamed: 0.1,Unnamed: 0,X,id_X,premz,rtmed,rtmean,int,col_eng,pol,ms2Peaks,ms1Peaks,neutral_mass,SIRIUSCSV
0,1,1,endo_negM153R40ID1,152.995453,40.053931,40.032465,9.387488e+06,30,neg,ms2_spectra_ENDOneg/insilico/peakfiles_ms2/Pea...,no ms1 peaks in QC,no mass from CAMERA,endo_neg_sirius/coconut_results_for_negM153R40...
1,2,2,endo_negM539R46ID2,539.137329,45.773764,45.957333,1.143601e+06,30,neg,ms2_spectra_ENDOneg/insilico/peakfiles_ms2/Pea...,no ms1 peaks in QC,no mass from CAMERA,endo_neg_sirius/coconut_results_for_negM539R46...
2,3,3,endo_negM125R52ID3,124.991096,51.880845,51.932816,4.379691e+05,30,neg,ms2_spectra_ENDOneg/insilico/peakfiles_ms2/Pea...,no ms1 peaks in QC,no mass from CAMERA,endo_neg_sirius/coconut_results_for_negM125R52...
3,4,4,endo_negM161R74ID4,161.045288,73.512243,73.565651,3.634138e+06,30,neg,ms2_spectra_ENDOneg/insilico/peakfiles_ms2/Pea...,no ms1 peaks in QC,no mass from CAMERA,
4,5,5,endo_negM145R77ID5,145.050415,76.683300,76.329839,6.536514e+05,30,neg,ms2_spectra_ENDOneg/insilico/peakfiles_ms2/Pea...,no ms1 peaks in QC,no mass from CAMERA,endo_neg_sirius/coconut_results_for_negM145R77...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
183,184,184,endo_negM976R545ID184,975.529907,545.467350,545.428659,1.394281e+06,30,neg,ms2_spectra_ENDOneg/insilico/peakfiles_ms2/Pea...,no ms1 peaks in QC,no mass from CAMERA,endo_neg_sirius/coconut_results_for_negM976R54...
184,185,185,endo_negM307R565ID185,307.263733,565.256445,565.220976,1.989031e+05,30,neg,ms2_spectra_ENDOneg/insilico/peakfiles_ms2/Pea...,no ms1 peaks in QC,no mass from CAMERA,endo_neg_sirius/coconut_results_for_negM307R56...
185,186,186,endo_negM883R568ID186,883.391663,568.353864,568.029988,6.901780e+05,30,neg,ms2_spectra_ENDOneg/insilico/peakfiles_ms2/Pea...,no ms1 peaks in QC,no mass from CAMERA,
186,187,187,endo_negM813R580ID187,813.477356,580.209147,580.202772,3.209911e+06,30,neg,ms2_spectra_ENDOneg/insilico/peakfiles_ms2/Pea...,no ms1 peaks in QC,no mass from CAMERA,endo_neg_sirius/coconut_results_for_negM813R58...


In [190]:
ms1data = "/Users/mahnoorzulfiqar/OneDriveUNI/GitHub-Repos/MAW/cwl/endo_neg_sirius_MS1DATA.csv"
file_id = "endo_neg_sirius"
msp_file = "/Users/mahnoorzulfiqar/OneDriveUNI/GitHub-Repos/MAW/cwl/endo_neg_spectral_results.csv"
standards = False

CandidateSelection_SimilarityandIdentity(file_id = file_id, msp_file = msp_file, 
ms1data = ms1data, standards = False)

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1%3DCC%3DC%28C%28%3DC1%29C%28%3DO%29O%29S'
DEBUG:pubchempy:Created Compound(5443)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=5443'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28CSCC%28C%28%3DO%29O%29NC%28%3DO%29CCC%28C%28%3DO%29O%29N%29SCC%28C%28%3DO%29O%29NC%28%3DO%29CCC%28C%28%3DO%29O%29N'
DEBUG:pubchempy:Created Compound(131752418)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=131752418'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1%3DC%28N%28NN1%5BN%2B%5D%28%3DO%29%5BO-%5D%29N%29N'
DEBUG:pubchempy:Crea

INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCC1C%28C%3DC%28C%3DCC%28%3DO%29C%28CC%28C%28C%28C%28CC%28%3DO%29O1%29OC%28%3DO%29C%29C%29OC2C%28C%28CC%28O2%29C%29N%28C%29C%29O%29CC%3DO%29C%29C%29C'
DEBUG:pubchempy:Created Compound(163052400)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=163052400'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28C%29C%28C%28%3DO%29O%29NC%28%3DO%29C1%3DCC%3DCC%3DC1NC%28%3DO%29C2%3DCC%3DC%28C%3DC2%29OC'
DEBUG:pubchempy:Created Compound(2868336)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=2868336'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi

DEBUG:pubchempy:Created Compound(78052528)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=78052528'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCN%28CC%29CC1CN2CCC1CC2CNC%28%3DO%29NC3%3DCSC%3DC3'
DEBUG:pubchempy:Created Compound(74450972)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=74450972'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1C%28NC%28%3DO%29C%28NC%28%3DO%29C%28NC%28%3DO%29C%28NC1%3DO%29C%28C%29C%29C%29CC2%3DCC%3DC%28C%3DC2%29O%29CC%3DCCCC%28%3DO%29O'
DEBUG:pubchempy:Created Compound(163051691)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/p

DEBUG:pubchempy:Created Compound(78145485)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=78145485'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1CC%28N2CC1N%28C2%3DO%29OS%28%3DO%29%28%3DO%29O%29C%28%3DO%29NC3CCNCC3'
DEBUG:pubchempy:Created Compound(56971685)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=56971685'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1%3DCC%3DC%28C%3DC1%29C%28%3DO%29SCC%28%3DO%29NCC%28%3DO%29NCC%28%3DO%29NCC%28%3DO%29O'
DEBUG:pubchempy:Created Compound(185457)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=185457'
DEBUG:pubchempy:Req

NUDVEHBHDBJSMD-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/NUDVEHBHDBJSMD-UHFFFAOYSA-N.json
RQHFECMMNCUDGZ-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/RQHFECMMNCUDGZ-UHFFFAOYSA-N.json
WJRALRAIXBIBCJ-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/WJRALRAIXBIBCJ-UHFFFAOYSA-N.json
RQHFECMMNCUDGZ-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/RQHFECMMNCUDGZ-UHFFFAOYSA-N.json
IUFQFQNFCIGWHJ-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/IUFQFQNFCIGWHJ-UHFFFAOYSA-N.json
UQMUNHWYRYBNHU-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/UQMUNHWYRYBNHU-UHFFFAOYSA-N.json
RXZBMPWDPOLZGW-YVNHRGETSA-N
https://gnps-classyfire.ucsd.edu/entities/RXZBMPWDPOLZGW-YVNHRGETSA-N.json
SMOBCLHAZXOKDQ-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/SMOBCLHAZXOKDQ-UHFFFAOYSA-N.json
DJAMWOXJYSHIFD-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/DJAMWOXJYSHIFD-UHFFFAOYSA-N.json
RXZBMPWDPOLZGW-YVNHRGETSA-N
https://gnps-classyfire.ucsd.edu/entities/RXZ

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%3DCC2%3DC%28C%28C3%3DC%28C2%29C%28%3DC%28C%3DC3O%29OC%29C4%3DC%28C5%3DC%28C%3DC4%29C%28C6%3DC%28C5%3DO%29C%28%3DCC%28%3DC6%29C%29O%29C7C%28C%28C%28C%28O7%29CO%29O%29O%29O%29O%29O%29C%28%3DC1%29O'
DEBUG:pubchempy:Created Compound(163005086)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=163005086'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1C%28C%28OC1%3DO%29C%3DC%28C%29C2CNC%28C2CC%28%3DO%29O%29C%28%3DO%29O%29O'
DEBUG:pubchempy:Created Compound(73999944)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=73999944'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:R

Unnamed: 0.1,Unnamed: 0,Unnamed: 0.1_x,Unnamed: 0_x,X,id_X_x,premz,rtmed,rtmean,int,col_eng,...,synonyms,AnnotationSources,AnnotationCount,MSILevel,MCSS,candidate_list,superclass,class,subclass,ClassificationSource
0,0,0,1,1,endo_negM153R40ID1,152.995453,40.053931,40.032465,9.387488e+06,30,...,"['Thiosalicylic acid', '2-Mercaptobenzoic acid...",SIRIUS,1.0,3.0,,endo_neg_sirius_Candidate_Selection/152.995452...,Organoheterocyclic compounds,,,CANOPUS
1,1,1,2,2,endo_negM539R46ID2,539.137329,45.773764,45.957333,1.143601e+06,30,...,"[""N,N'-Bis(g-glutamyl)-3,3'-(1,2-propylenedith...",SIRIUS,1.0,3.0,,endo_neg_sirius_Candidate_Selection/539.137329...,Organic oxygen compounds,Organooxygen compounds,Carbohydrates and carbohydrate conjugates,CANOPUS
2,2,2,3,3,endo_negM125R52ID3,124.991096,51.880845,51.932816,4.379691e+05,30,...,,,,,,,,,,
3,3,3,4,4,endo_negM161R74ID4,161.045288,73.512243,73.565651,3.634138e+06,30,...,,,,,,,,,,
4,4,4,5,5,endo_negM145R77ID5,145.050415,76.683300,76.329839,6.536514e+05,30,...,[],SIRIUS,1.0,3.0,,endo_neg_sirius_Candidate_Selection/145.050415...,Organoheterocyclic compounds,Azoles,Tetrazoles,CANOPUS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183,183,183,184,184,endo_negM976R545ID184,975.529907,545.467350,545.428659,1.394281e+06,30,...,[],SIRIUS,1.0,3.0,,endo_neg_sirius_Candidate_Selection/975.529907...,Organic acids and derivatives,Carboxylic acids and derivatives,"Amino acids, peptides, and analogues",CANOPUS
184,184,184,185,185,endo_negM307R565ID185,307.263733,565.256445,565.220976,1.989031e+05,30,...,,,,,,,,,,
185,185,185,186,186,endo_negM883R568ID186,883.391663,568.353864,568.029988,6.901780e+05,30,...,,,,,,,,,,
186,186,186,187,187,endo_negM813R580ID187,813.477356,580.209147,580.202772,3.209911e+06,30,...,[],SIRIUS,1.0,3.0,,endo_neg_sirius_Candidate_Selection/813.477355...,Organic acids and derivatives,Carboxylic acids and derivatives,"Amino acids, peptides, and analogues",CANOPUS


In [191]:
classification(resultcsv = file_id + "_mergedResults-with-one-Candidates.csv")

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/ICNWLOCYNRZUJT-UHFFFAOYSA-N.json HTTP/1.1" 200 9595
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/VFYGRCPWSYWISD-UHFFFAOYSA-N.json HTTP/1.1" 200 8157
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/UESKGPZDXYFMJF-UHFFFAOYSA-N.json HTTP/1.1" 200 9526
DEBUG:urllib3.connectionpool:https://gnps-c

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Unnamed: 0.1_x,Unnamed: 0_x,X,id_X_x,premz,rtmed,rtmean,int,...,synonyms,AnnotationSources,AnnotationCount,MSILevel,MCSS,candidate_list,superclass,class,subclass,ClassificationSource
0,0,0,0,1,1,endo_negM153R40ID1,152.995453,40.053931,40.032465,9.387488e+06,...,"['Thiosalicylic acid', '2-Mercaptobenzoic acid...",SIRIUS,1.0,3.0,,endo_neg_sirius_Candidate_Selection/152.995452...,Organoheterocyclic compounds,,,CANOPUS
1,1,1,1,2,2,endo_negM539R46ID2,539.137329,45.773764,45.957333,1.143601e+06,...,"[""N,N'-Bis(g-glutamyl)-3,3'-(1,2-propylenedith...",SIRIUS,1.0,3.0,,endo_neg_sirius_Candidate_Selection/539.137329...,Organic oxygen compounds,Organooxygen compounds,Carbohydrates and carbohydrate conjugates,CANOPUS
2,2,2,2,3,3,endo_negM125R52ID3,124.991096,51.880845,51.932816,4.379691e+05,...,,,,,,,,,,
3,3,3,3,4,4,endo_negM161R74ID4,161.045288,73.512243,73.565651,3.634138e+06,...,,,,,,,,,,
4,4,4,4,5,5,endo_negM145R77ID5,145.050415,76.683300,76.329839,6.536514e+05,...,[],SIRIUS,1.0,3.0,,endo_neg_sirius_Candidate_Selection/145.050415...,Organoheterocyclic compounds,Azoles,Tetrazoles,CANOPUS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183,183,183,183,184,184,endo_negM976R545ID184,975.529907,545.467350,545.428659,1.394281e+06,...,[],SIRIUS,1.0,3.0,,endo_neg_sirius_Candidate_Selection/975.529907...,Organic acids and derivatives,Carboxylic acids and derivatives,"Amino acids, peptides, and analogues",CANOPUS
184,184,184,184,185,185,endo_negM307R565ID185,307.263733,565.256445,565.220976,1.989031e+05,...,,,,,,,,,,
185,185,185,185,186,186,endo_negM883R568ID186,883.391663,568.353864,568.029988,6.901780e+05,...,,,,,,,,,,
186,186,186,186,187,187,endo_negM813R580ID187,813.477356,580.209147,580.202772,3.209911e+06,...,[],SIRIUS,1.0,3.0,,endo_neg_sirius_Candidate_Selection/813.477355...,Organic acids and derivatives,Carboxylic acids and derivatives,"Amino acids, peptides, and analogues",CANOPUS


ICNWLOCYNRZUJT-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/ICNWLOCYNRZUJT-UHFFFAOYSA-N.json
AHHMIENJLSUKDM-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/AHHMIENJLSUKDM-UHFFFAOYSA-N.json
ALJUYAAJNSNTHU-OBEQGSJMSA-N
https://gnps-classyfire.ucsd.edu/entities/ALJUYAAJNSNTHU-OBEQGSJMSA-N.json
ZKAVXQOZZXRJGL-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/ZKAVXQOZZXRJGL-UHFFFAOYSA-N.json
ICNWLOCYNRZUJT-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/ICNWLOCYNRZUJT-UHFFFAOYSA-N.json
NSTAZJPMVCIULM-UHFFFAOYSA-M
https://gnps-classyfire.ucsd.edu/entities/NSTAZJPMVCIULM-UHFFFAOYSA-M.json
VFYGRCPWSYWISD-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/VFYGRCPWSYWISD-UHFFFAOYSA-N.json
UFIWZSNSJFCLAC-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/UFIWZSNSJFCLAC-UHFFFAOYSA-N.json
ICNWLOCYNRZUJT-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/ICNWLOCYNRZUJT-UHFFFAOYSA-N.json
SMOBCLHAZXOKDQ-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/SMO

In [None]:
# ENDO pos

In [14]:

ms1data = "/Users/mahnoorzulfiqar/OneDriveUNI/GitHub-Repos/MAW/cwl/ms2_spectra_ENDOpos/insilico/MS1DATA.csv"
file_id = "endo_pos_sirius"
sirius_candidate_json_path = r'/Users/mahnoorzulfiqar/OneDriveUNI/GitHub-Repos/MAW/cwl/ms2_spectra_ENDOpos/insilico/SIRIUS/no_isotope/*.json'
sirius_candidate_json = glob.glob(sirius_candidate_json_path)
sirius_candidate_json
db = "coconut"

sirius_postproc(ms1data, sirius_candidate_json, file_id, db)

no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file fo

Unnamed: 0.1,Unnamed: 0,X,id_X,premz,rtmed,rtmean,int,col_eng,pol,ms2Peaks,ms1Peaks,neutral_mass,SIRIUSCSV
0,1,1,endo_posM160R32ID1,160.180511,32.058754,32.058754,6.466272e+07,30,pos,ms2_spectra_ENDOpos/insilico/peakfiles_ms2/Pea...,no ms1 peaks in QC,no mass from CAMERA,endo_pos_sirius/coconut_results_for_posM160R32...
1,2,2,endo_posM217R34ID2,217.067947,34.428924,34.428924,9.586514e+06,30,pos,ms2_spectra_ENDOpos/insilico/peakfiles_ms2/Pea...,no ms1 peaks in QC,no mass from CAMERA,endo_pos_sirius/coconut_results_for_posM217R34...
2,3,3,endo_posM203R35ID3,203.052368,35.186543,35.186543,0.000000e+00,30,pos,ms2_spectra_ENDOpos/insilico/peakfiles_ms2/Pea...,no ms1 peaks in QC,no mass from CAMERA,
3,4,4,endo_posM252R38ID4,252.143799,38.253020,38.253020,2.773368e+07,30,pos,ms2_spectra_ENDOpos/insilico/peakfiles_ms2/Pea...,no ms1 peaks in QC,no mass from CAMERA,endo_pos_sirius/coconut_results_for_posM252R38...
4,5,5,endo_posM258R40ID5,258.109833,39.780506,39.780506,4.214060e+06,30,pos,ms2_spectra_ENDOpos/insilico/peakfiles_ms2/Pea...,no ms1 peaks in QC,no mass from CAMERA,endo_pos_sirius/coconut_results_for_posM258R40...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
521,522,522,endo_posM763R587ID522,762.513306,586.948800,587.523008,9.368186e+04,30,pos,ms2_spectra_ENDOpos/insilico/peakfiles_ms2/Pea...,no ms1 peaks in QC,no mass from CAMERA,endo_pos_sirius/coconut_results_for_posM763R58...
522,523,523,endo_posM789R594ID523,788.528015,593.836386,593.881798,8.136451e+04,30,pos,ms2_spectra_ENDOpos/insilico/peakfiles_ms2/Pea...,no ms1 peaks in QC,no mass from CAMERA,endo_pos_sirius/coconut_results_for_posM789R59...
523,524,524,endo_posM333R593ID524,333.241608,592.974705,592.974705,0.000000e+00,30,pos,ms2_spectra_ENDOpos/insilico/peakfiles_ms2/Pea...,no ms1 peaks in QC,no mass from CAMERA,endo_pos_sirius/coconut_results_for_posM333R59...
524,525,525,endo_posM651R600ID525,651.315918,600.063657,600.063657,8.092335e+03,30,pos,ms2_spectra_ENDOpos/insilico/peakfiles_ms2/Pea...,no ms1 peaks in QC,no mass from CAMERA,endo_pos_sirius/coconut_results_for_posM651R60...


In [16]:
ms1data = "/Users/mahnoorzulfiqar/OneDriveUNI/GitHub-Repos/MAW/cwl/endo_pos_sirius_MS1DATA.csv"
file_id = "endo_pos_sirius"
msp_file = "/Users/mahnoorzulfiqar/OneDriveUNI/GitHub-Repos/MAW/cwl/endo_pos_spectral_results.csv"
standards = False

CandidateSelection_SimilarityandIdentity(file_id = file_id, msp_file = msp_file, 
ms1data = ms1data, standards = False)

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C%28CCNCCCCN%29CN'
DEBUG:pubchempy:Created Compound(368)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=368'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28%3DO%29N%28C%29CCCCNC%28%3DO%29N%3DC%28N%29N'
DEBUG:pubchempy:Created Compound(14731319)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=14731319'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCC%28C1C%28C%28C%28N1%29CO%29O%29O%29O'
DEBUG:pubchempy:Created Compound(85144910)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=85142149'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%3DCCC%28CC1%29C%28%3DO%29C'
DEBUG:pubchempy:Created Compound(93019)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=93019'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCCC1CC%28%3DO%29CC1%3DO'
DEBUG:pubchempy:Created Compound(14391671)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=14391671'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=

DEBUG:pubchempy:Request data: b'cid=5156'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCC1C%28C%3DCC%28%3DO%29C%28CC%28C%28C%28C%28%3DO%29C%28C%28%3DO%29O1%29C%29C%29OC2C%28C%28CC%28O2%29C%29N%28C%29C%29O%29C%29C%29C'
DEBUG:pubchempy:Created Compound(74340065)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=74340065'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1CC2C%28CC%3DC1CCC%28%3DO%29C%29C%28%3DC%29C%28%3DO%29O2'
DEBUG:pubchempy:Created Compound(540288)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=540288'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pu

DEBUG:pubchempy:Request data: b'smiles=CCC%28C%28CC%3DCCC%3DCCCCCCCCC%28%3DO%29O%29O%29OC'
DEBUG:pubchempy:Created Compound(156603023)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=156603023'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCC1%28CCC%28CC%28C%28CC%23C1%29CC2%3DCC%28%3DC%28C%3DC2%29O%29OC3CCNC%28C3%29CNC%29OC%28%3DO%29C%29O%29CNC%28%3DNC%29N'
DEBUG:pubchempy:Created Compound(163082755)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=163082755'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCC%28%3DO%29C1CCC%28%3DCC1%29CCC%3DC%28C%29C'
DEBUG:pubchempy:Created Compound

DEBUG:pubchempy:Created Compound(4487554)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=4487554'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCCCCCCCCCC%28CC%28%3DO%29OC1C%3DC%28CC%28C1OC%28%3DO%29C%29OC%28%3DO%29C%29C%28%3DO%29O%29OC%28%3DO%29C'
DEBUG:pubchempy:Created Compound(163037617)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=163037617'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28C%29CC1C%28%3DO%29NC%28C%28%3DO%29NC%28C%28%3DO%29NC%28C%28%3DO%29NC%28C%28%3DO%29N1%29CC2%3DCC%3DCC%3DC2%29CC%28C%29C%29CC%28C%29C%29CC%28C%29C'
DEBUG:pubchempy:Created Compound(163063235)
DEBUG:pubchempy:Request UR

DEBUG:pubchempy:Created Compound(101122)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=101122'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28C%29%28C%29C1%3DNC%28%3DNC%3DC1%29NC2COC3C2OCC3NS%28%3DO%29%28%3DO%29C'
DEBUG:pubchempy:Created Compound(73139668)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=73139668'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1%3DC%28NC%28%3DO%29N%3DC1%29N'
DEBUG:pubchempy:Created Compound(597)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=597'
DEBUG:pubchempy:Request URL: https://pubchem.nc

DEBUG:pubchempy:Created Compound(5312942)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=5312942'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28%3DO%29C1%28CCC2%28C1%28C%28CC3C2%28CC%3DC4C3%28CCC%28C4%29O%29C%29O%29O%29C%29O%29O'
DEBUG:pubchempy:Created Compound(12311275)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=12311275'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1CC%3DC%28C1%29C%28%3DO%29CCCCCCCCCC%28%3DO%29O'
DEBUG:pubchempy:Created Compound(46214743)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=46214743'
DEBU

DEBUG:pubchempy:Created Compound(4982)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=4982'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%3DC%28C%28C%28%3DC%28N1%29C%29C%28%3DO%29OCCCN2CCC%28CC2%29%28C3%3DCC%3DCC%3DC3%29C4%3DCC%3DCC%3DC4%29C5%3DCC%28%3DCC%3DC5%29N%3DO%29C%28%3DO%29OC'
DEBUG:pubchempy:Created Compound(88560733)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=88560733'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCCCCCCCCCC%28%3DO%29OCC%28COP%28%3DO%29%28O%29OCCN%29OC%28%3DO%29CCCCCCCCCCCCC'
DEBUG:pubchempy:Created Compound(114944)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JS

DEBUG:pubchempy:Created Compound(72737870)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=72737870'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28C%29CC1C%28%3DO%29NC%28C%28%3DO%29NC%28C%28%3DO%29NC%28C%28%3DO%29NC%28C%28%3DO%29N1%29CC2%3DCC%3DCC%3DC2%29CC%28C%29C%29CC%28C%29C%29CC%28C%29C'
DEBUG:pubchempy:Created Compound(163063235)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=163063235'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCC%28C%29C%28%3DO%29OC1C%28C%28C%28C2%28C1OC34CC%28%3DO%29OC%28C3%28CC2C%28%3DO%29C4%3DC%29C%29C5%3DCOC%3DC5%29C%29C%28C%28%3DO%29OC%29O%29%28

DEBUG:pubchempy:Request data: b'smiles=CCCCCC1CC%28C%28C%3DC1%29CCCCCC%28C%28CCC2%28CC%28CC2CC3%3DCC%3DC%28N3%29C4%3DCC%28%3DCC%28%3DC4%29%5BO-%5D%29O%29C5%28CCCC5%29C6%3DCC%28%3DNC%3DC6%29N%29O%29O%29C%28%3DO%29O%29O'
DEBUG:pubchempy:Created Compound(162794111)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162794111'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28%3DO%29NC1CCC2%3DCC%28%3DC%28C%28%3DC2C3%3DCC%3DC%28C%28%3DO%29C%3DC13%29NCCCCCC%28%3DO%29NCCCN%28C%29C%29OC%29OC%29OC'
DEBUG:pubchempy:Created Compound(162968505)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162968505'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound

RDKit ERROR: [13:23:47] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:23:47] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [13:23:47] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:23:47] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [13:23:47] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:23:47] SMILES Parse Error: syntax error while parsing: 
[13:23:47] SMILES Parse Error: syntax error while parsing: 
[13:23:47] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[13:23:47] SMILES Parse Error: syntax error while parsing: 
[13:23:47] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[13:23:47] SMILES Parse Error: syntax error while parsing: 
[13:23:47] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[13:23:47] SMILES Parse Error: syntax error while parsing: 
[13:23:47] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' 

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%5BC%40H%5D%28C%29%5BC%40%40H%5D%28C%28%3DO%29O%29NC%28%3DO%29%5BC%40H%5D%28CCCN%3DC%28N%29N%29N'
DEBUG:pubchempy:Created Compound(7009553)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=7009553'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1%3DCNC%28%3DO%29NC1%3DO'
DEBUG:pubchempy:Created Compound(1174)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=1174'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1%3DCC%28%3DCC%3DC1C%5BC%40%40H%5D%28C%28%3DO%29O%29N%29O'
DEBUG:pubchempy:Created Compound(6057)
DEBUG:pubchempy:R

DEBUG:pubchempy:Request data: b'smiles=C1CC%28C%3DC1%29CCCCCCCCC%28%3DO%29O'
DEBUG:pubchempy:Created Compound(5282854)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=5282854'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%28C%28N%28CS1%29C%28%3DO%29OC%28C%29%28C%29C%29C%28%3DO%29O%29C'
DEBUG:pubchempy:Created Compound(4712503)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=4712503'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C%3DCCC%3DCCC%3DCCC%3DCCCCCC%28%3DO%29O'
DEBUG:pubchempy:Created Compound(54154481)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=5415

DEBUG:pubchempy:Request data: b'cid=2870263'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCCCCCCCCCCCCOCC%28COP%28%3DO%29%28%5BO-%5D%29OCC%5BN%2B%5D%28C%29%28C%29C%29OC%28%3DO%29%2FC%3DC%2FC'
DEBUG:pubchempy:Created Compound(45138381)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=45138381'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCC%28%3DO%29C%3DCC%3DO'
DEBUG:pubchempy:Created Compound(216297)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=216297'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCC%2FC%3DC%5CCCCCCCC

DEBUG:pubchempy:Request data: b'smiles=CCC1CCCCC%28C%28CC%23C1%29C%28CCC2%3DCC%28%3DC%28C%3DC2%29O%29OC%28CC%28C3%3DCC%28%3DNC%3DC3%29N%29C4%28CCCCC4%29C5%3DCC%3DC%5BN-%5D5%29CO%29O%29O'
DEBUG:pubchempy:Created Compound(162973866)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162973866'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCC1%3DC%28C%28%3DO%29NC1%3DCC2%3DC%28C%28%3DC%28N2%29C%3DC3C%28%3DC%28C%28%3DCC4%3DNC%28%3DO%29C%28%3DC4CC%29C%29N3%29CCC%28%3DO%29O%29C%29C%29CCC%28%3DO%29O%29C'
DEBUG:pubchempy:Created Compound(4478237)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=4478237'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Re

DEBUG:pubchempy:Created Compound(74000086)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=74000086'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCC%28CC%28C1C%28O1%29%28C%29C2CCCC2C3%3DCC%28%3DCC%3DC3%29N%29O%29C4%3DC5CCC6C%28C5%28CC4%3DO%29C%29%28CCC7C6%28CC%28C%28%3DO%29C7%28C%29C%29C8%3DCC%28%3DCC%3DC8%29O%29C%29C'
DEBUG:pubchempy:Created Compound(162831547)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162831547'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C%28CCCCCCCCCCCC%28%3DO%29OCC%28CO%29O%29CCCCCCCCCCCO'
DEBUG:pubchempy:Created Compound(10072879)
DEBUG:pubchempy:Req

RDKit ERROR: [13:48:11] SMILES Parse Error: syntax error while parsing: 
[13:48:11] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [13:48:11] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:48:11] SMILES Parse Error: syntax error while parsing: 
[13:48:11] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:48:11] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[13:48:11] SMILES Parse Error: syntax error while parsing: 
[13:48:11] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:48:11] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [13:48:11] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[13:48:11] SMILES Parse Error: syntax error while parsing: 
[13:48:11] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:48:11] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [13:48:11] SMILES Parse Error: Failed parsin

RDKit ERROR: [13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[13:48:12] SMILES Parse Error: syntax error while parsing: 
[13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:48:12] SMILES Parse Error: syntax error while parsing: 
[13:48:12] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:48:12] SMILES Parse Error: syntax error while parsing: 
[13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[13:48:12] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:48:12] SMILES Parse Error: syntax error while parsing: 
[13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[13:48:12] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' 

[13:48:12] SMILES Parse RDKit ERROR: [13:48:12] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
Error: Failed parsing SMILES ' ' for input: ' '
[13:48:12] SMILES Parse Error: syntax error while parsing: 
[13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[13:48:12] SMILES Parse Error: syntax error while parsing: 
[13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:48:12] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:48:12] SMILES Parse Error: syntax error while parsing: 
[13:48:12] SMILES Parse Error: syntax error while parsing: 
[13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:48:12] SMILES Parse Error: syntax error w

RDKit ERROR: [13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:48:12] SMILES Parse Error: syntax error while parsing: 
[13:48:12] SMILES Parse Error: syntax error while parsing: 
[13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[13:48:12] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:48:12] SMILES Parse Error: syntax error while parsing: 
[13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:48:12] SMILES Parse Error: syntax error while parsing: 
[13:48:12] SMILES Parse Error: syntax error while parsing: 
[13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[13:48:12] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [13:48:12] SMILES Parse Error: Failed parsing SMILES ' ' 

RDKit ERROR: [13:48:13] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:48:13] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [13:48:13] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[13:48:13] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[13:48:13] SMILES Parse Error: syntax error while parsing: 
[13:48:13] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[13:48:13] SMILES Parse Error: syntax error while parsing: 
[13:48:13] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:48:13] SMILES Parse Error: syntax error while parsing: 
[13:48:13] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [13:48:13] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[13:48:13] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [13:48:13] SMILES Parse Error: syntax error while parsing: 
[13:48:13] SMILES Parse Error: syntax error while parsing:

DEBUG:pubchempy:Request data: b'cid=5229250'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC12CCCC%28C1CCC3%28C2CCC%28%3DC3%29C%28C%29%28C%29O%29O%29%28C%29C%28%3DO%29O'
DEBUG:pubchempy:Created Compound(162946291)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162946291'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28%3DCCOC1%3DCC2%3DC%28C%3DC1%29C%28%3DCC%28%3DO%29O2%29C3%3DCC%3DC%28C%3DC3%29OC%29C'
DEBUG:pubchempy:Created Compound(799534)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=799534'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request d

DEBUG:pubchempy:Created Compound(14345441)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=14345441'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1CCCC%28CC1%29CCCCCCCCCC%28%3DO%29O'
DEBUG:pubchempy:Created Compound(54175293)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=54175293'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%28C%28CC2%28C1C%28OC%3DC2C%28%3DO%29OC%29OC3C%28C%28C%28C%28O3%29CO%29O%29O%29O%29O%29OC%28%3DO%29C%3DCC4%3DCC%28%3DC%28C%3DC4%29OC%29OC%29O'
DEBUG:pubchempy:Created Compound(74191793)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG

INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28C%29C%28%3DC1CCCC2%28C1%29CCCC2%29CC%28C%28C%29C3CCC4%28C3%28CCC5C4%3DCC%28%3DO%29C6C5%28CC%28C%28C6%29O%29O%29C%29C%29O%29O'
DEBUG:pubchempy:Created Compound(162830197)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162830197'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%3DC2C%28C%28%3DO%29C3%28CC%28%3DCC%28C%28C2%28C%29C%29CC1OC%28%3DO%29C%29OC%28%3DO%29C%29C%28CC3OC%28%3DO%29C%29OC%28%3DO%29C%3DCC4%3DCC%3DCC%3DC4%29C%29O'
DEBUG:pubchempy:Created Compound(78385556)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=78385556'
DEB

DEBUG:pubchempy:Request data: b'smiles=CC1CC%28C%28C2%28C13C%28C%28CC2OC%28%3DO%29C%3DCC4%3DCC%3DCC%3DC4%29C%28O3%29%28C%29C%29OC%28%3DO%29C%29C%29OC%28%3DO%29C%29OC%28%3DO%29C5%3DCC%3DCC%3DC5'
DEBUG:pubchempy:Created Compound(162819247)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162819247'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCCCCCCCCCCCCCCN1C2%3DCC%3DCC%3DC2C%28C1%3DCN%3DC3C4%3DC%28C%3DCC3%3DO%29N%3DCC%3DC4%29%28C%29C'
DEBUG:pubchempy:Created Compound(4664749)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=4664749'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28C%29%28C%29CC%28%3DO%29N1CCNC%28C1%29C%28%3DO%29N'
DEBUG:pubchempy:Created Compound(78441550)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=78441550'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C%28CCC%28%3DO%29OC%28COP%28OCCN%29%28O%29%3DO%29COCCCCCCCCCCCCCCCC%29%3DCCC%3DCCC%3DCCC%3DCCCCCCCCC'
DEBUG:pubchempy:Created Compound(132993600)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=132993600'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28

DEBUG:pubchempy:Created Compound(10392369)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=10392369'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCC%3DCCC%3DCCC%3DCCC%3DCCCCCC%28%3DO%29OCC%28CO%29O'
DEBUG:pubchempy:Created Compound(76043349)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=76043349'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCC%3DCCC%3DCCC%3DCCCCCCC%3DO'
DEBUG:pubchempy:Created Compound(556280)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=556280'
DEBUG:pubchempy:Request URL:

DEBUG:pubchempy:Request data: b'cid=74735420'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCC%3DCCC%3DCCC%3DCCCCCCCCC%3DO'
DEBUG:pubchempy:Created Compound(33532)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=33532'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1C%28%3DO%29N%28C%28C%28%3DO%29OC%28C%28%3DO%29NC%28C%28%3DO%29OC%28C%28%3DO%29N%28C%28C%28%3DO%29N1%29CC2%3DCC%3DCC%3DC2%29C%29CC%28C%29C%29C%29C%28C%29%28C%29O%29C%29C'
DEBUG:pubchempy:Created Compound(73172832)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=73172832'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.

DEBUG:pubchempy:Request data: b'cid=636130'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCCCCC%23CC%23CC%28CCCCC%28%3DO%29OC%29O'
DEBUG:pubchempy:Created Compound(14309417)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=14309417'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28%3DO%29OCC1CC2%28C%28CCC3%28C2CC%3DC4C3%28CCC5%28C4C%28C%28CC5%29%28C%29C%29CO%29C%29C%29C%29C%28C1COC%28%3DO%29C%29%28C%29C%29C'
DEBUG:pubchempy:Created Compound(163066546)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=163066546'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https:/

Unnamed: 0.1,Unnamed: 0,Unnamed: 0.1_x,Unnamed: 0_x,X,id_X_x,premz,rtmed,rtmean,int,col_eng,...,synonyms,AnnotationSources,AnnotationCount,MSILevel,MCSS,candidate_list,superclass,class,subclass,ClassificationSource
0,0,0,1,1,endo_posM160R32ID1,160.180511,32.058754,32.058754,6.466272e+07,30,...,"['sym-homospermidine', '4427-76-3', 'bis(4-ami...",SIRIUS,1.0,3.0,,endo_pos_sirius_Candidate_Selection/160.180511...,Organic nitrogen compounds,Organonitrogen compounds,Amines,CANOPUS
1,1,1,2,2,endo_posM217R34ID2,217.067947,34.428924,34.428924,9.586514e+06,30,...,,SIRIUS-Formula,,,,,,,,
2,2,2,3,3,endo_posM203R35ID3,203.052368,35.186543,35.186543,0.000000e+00,30,...,,,,,,,,,,
3,3,3,4,4,endo_posM252R38ID4,252.143799,38.253020,38.253020,2.773368e+07,30,...,[],SIRIUS,1.0,3.0,,endo_pos_sirius_Candidate_Selection/252.143798...,,,,CANOPUS
4,4,4,5,5,endo_posM258R40ID5,258.109833,39.780506,39.780506,4.214060e+06,30,...,"['2-Hydroxymethyl-5-(1-hydroxypentyl)-3,4-pyrr...",SIRIUS,1.0,3.0,,endo_pos_sirius_Candidate_Selection/258.109832...,Organic nitrogen compounds,Organonitrogen compounds,Quaternary ammonium salts,CANOPUS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
521,521,521,522,522,endo_posM763R587ID522,762.513306,586.948800,587.523008,9.368186e+04,30,...,[],SIRIUS,1.0,3.0,,endo_pos_sirius_Candidate_Selection/762.513305...,Lipids and lipid-like molecules,Fatty Acyls,Fatty acyl glycosides,CANOPUS
522,522,522,523,523,endo_posM789R594ID523,788.528015,593.836386,593.881798,8.136451e+04,30,...,[],SIRIUS,1.0,3.0,,endo_pos_sirius_Candidate_Selection/788.528015...,Organic acids and derivatives,Carboxylic acids and derivatives,"Amino acids, peptides, and analogues",CANOPUS
523,523,523,524,524,endo_posM333R593ID524,333.241608,592.974705,592.974705,0.000000e+00,30,...,[],SIRIUS,1.0,3.0,,endo_pos_sirius_Candidate_Selection/333.241607...,,,,CANOPUS
524,524,524,525,525,endo_posM651R600ID525,651.315918,600.063657,600.063657,8.092335e+03,30,...,[],SIRIUS,1.0,3.0,,endo_pos_sirius_Candidate_Selection/651.315917...,,,,CANOPUS


In [20]:
classification(resultcsv = file_id + "_mergedResults-with-one-Candidates.csv")

undefined stereo
stereo
tted undefined stereo
ted undefined stereo


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/VSNFQQXVMPSASB-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/FGCBPQOUONXDAL-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/WPFZNRDJPZPUPO-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/COLNVLDHVKWLRT-QMMMGPOBSA-N.json HTTP/1.1" 502 150
DEBUG:u

DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/OIEYMKBOTBEREC-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/QBJHVUJMMMSLBM-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/MMVNLSXYHISWMN-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/WSULKHZETMBVQK-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/PVMOACIZALDGDN-WXVHOTQCSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/GOTHVZAFXHJLBT-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/DFUFEXRFQYXOMO-UHFFFAOYSA-O.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /enti

DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/XYOFGSQBRNUECO-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/JOTZRWWNBQVGNQ-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/PCEGJEYPXPLDDI-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/NEZDNQCXEZDCBI-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/IKYFJEUICLQXDZ-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/PFFBRXQUYGIQAO-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/PCEGJEYPXPLDDI-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /enti

DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/QXYRWIMCFAKSSQ-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/HECCMWNGTYQREI-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/ACHDMUPTZYZIGR-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/MFXMPUIXRGTKOU-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/OWYNLPMPYBYKJP-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/CAVJWTNXPCMJIR-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/HECCMWNGTYQREI-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /enti

DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/ISWSIDIOOBJBQZ-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/CMOYCZQBKLDPNH-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/YXBWZJTYDIPDDB-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/YDOSHXAMLRSCKS-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/ATIPDCIQTUXABX-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/SIFBPUMCNGQJGE-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/WHWHVMRMOGUQKX-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /enti

DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/MYFMARDICOWMQP-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/FMBGXANJOQLLNA-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/RRZQRPDFHNQSJD-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/QAFYGHBGWCPRCI-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/QGERVCWHOOSNLS-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/JQBOIGWHYPDAGZ-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/LGZPAQOAFXJDTR-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /enti

AttributeError: 'DataFrame' object has no attribute 'inchikey'

COLNVLDHVKWLRT-QMMMGPOBSA-N
https://gnps-classyfire.ucsd.edu/entities/COLNVLDHVKWLRT-QMMMGPOBSA-N.json
https://gnps-classyfire.ucsd.edu/entities/COLNVLDHVKWLRT-UHFFFAOYSA-N.json
QRFMLXHMTWCHKI-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/QRFMLXHMTWCHKI-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/QRFMLXHMTWCHKI-UHFFFAOYSA-N.json
SORYERHBQFTRIK-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/SORYERHBQFTRIK-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/SORYERHBQFTRIK-UHFFFAOYSA-N.json
ZPAJZAMPZXISSE-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/ZPAJZAMPZXISSE-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/ZPAJZAMPZXISSE-UHFFFAOYSA-N.json
NOSOQGDGCLYEEI-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/NOSOQGDGCLYEEI-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/NOSOQGDGCLYEEI-UHFFFAOYSA-N.json
GOTHVZAFXHJLBT-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/GOTHVZAFXHJLBT-UHFFFAOYSA-N.json
https:/

WPFZNRDJPZPUPO-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/WPFZNRDJPZPUPO-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/WPFZNRDJPZPUPO-UHFFFAOYSA-N.json
XVMHOOYMJFUEQW-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/XVMHOOYMJFUEQW-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/XVMHOOYMJFUEQW-UHFFFAOYSA-N.json
KIIIPQXXLVCCQP-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/KIIIPQXXLVCCQP-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/KIIIPQXXLVCCQP-UHFFFAOYSA-N.json
OXXJZDJLYSMGIQ-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/OXXJZDJLYSMGIQ-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/OXXJZDJLYSMGIQ-UHFFFAOYSA-N.json
ZDDFOEZPFDWEQS-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/ZDDFOEZPFDWEQS-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/ZDDFOEZPFDWEQS-UHFFFAOYSA-N.json
MMVNLSXYHISWMN-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/MMVNLSXYHISWMN-UHFFFAOYSA-N.json
https:/

In [None]:
# EXO POS

In [21]:

ms1data = "/Users/mahnoorzulfiqar/OneDriveUNI/GitHub-Repos/MAW/cwl/ms2_spectra_EXOpos/insilico/MS1DATA.csv"
file_id = "exo_pos_sirius"
sirius_candidate_json_path = r'/Users/mahnoorzulfiqar/OneDriveUNI/GitHub-Repos/MAW/cwl/ms2_spectra_EXOpos/insilico/SIRIUS/no_isotope/*.json'
sirius_candidate_json = glob.glob(sirius_candidate_json_path)
sirius_candidate_json
db = "coconut"

sirius_postproc(ms1data, sirius_candidate_json, file_id, db)

no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file for structure or formula
no file fo

Unnamed: 0.1,Unnamed: 0,X,id_X,premz,rtmed,rtmean,int,col_eng,pol,ms2Peaks,ms1Peaks,neutral_mass,SIRIUSCSV
0,1,1,endo_negM83R36ID1,82.537003,35.632626,35.632626,7.722125e+04,30,pos,ms2_spectra_EXOpos/insilico/peakfiles_ms2/Peak...,no ms1 peaks in QC,no mass from CAMERA,
1,2,2,endo_negM101R37ID2,100.956718,37.186119,37.186119,2.867520e+05,30,pos,ms2_spectra_EXOpos/insilico/peakfiles_ms2/Peak...,no ms1 peaks in QC,no mass from CAMERA,exo_pos_sirius/coconut_results_for_negM101R37I...
2,3,3,endo_negM112R39ID3,112.012627,39.483221,39.483221,3.044286e+07,30,pos,ms2_spectra_EXOpos/insilico/peakfiles_ms2/Peak...,no ms1 peaks in QC,no mass from CAMERA,exo_pos_sirius/coconut_results_for_negM112R39I...
3,4,4,endo_negM411R37ID4,410.698517,37.170494,37.170494,7.507285e+05,30,pos,ms2_spectra_EXOpos/insilico/peakfiles_ms2/Peak...,no ms1 peaks in QC,no mass from CAMERA,exo_pos_sirius/coconut_results_for_negM411R37I...
4,5,5,endo_negM104R42ID5,104.106888,41.786335,41.786335,7.744113e+05,30,pos,ms2_spectra_EXOpos/insilico/peakfiles_ms2/Peak...,no ms1 peaks in QC,no mass from CAMERA,exo_pos_sirius/coconut_results_for_negM104R42I...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1036,1037,1037,endo_negM369R470ID1037,369.123383,469.585845,469.604187,7.053756e+05,30,pos,ms2_spectra_EXOpos/insilico/peakfiles_ms2/Peak...,no ms1 peaks in QC,no mass from CAMERA,exo_pos_sirius/coconut_results_for_negM369R470...
1037,1038,1038,endo_negM181R476ID1038,181.121964,476.028294,475.067651,2.449993e+05,30,pos,ms2_spectra_EXOpos/insilico/peakfiles_ms2/Peak...,no ms1 peaks in QC,no mass from CAMERA,exo_pos_sirius/coconut_results_for_negM181R476...
1038,1039,1039,endo_negM605R570ID1039,605.419373,569.768352,569.816243,3.825896e+04,30,pos,ms2_spectra_EXOpos/insilico/peakfiles_ms2/Peak...,no ms1 peaks in QC,no mass from CAMERA,exo_pos_sirius/coconut_results_for_negM605R570...
1039,1040,1040,endo_negM791R582ID1040,791.469666,581.898384,581.948596,2.620681e+04,30,pos,ms2_spectra_EXOpos/insilico/peakfiles_ms2/Peak...,no ms1 peaks in QC,no mass from CAMERA,


In [23]:
ms1data = "/Users/mahnoorzulfiqar/OneDriveUNI/GitHub-Repos/MAW/cwl/exo_pos_sirius_MS1DATA.csv"
file_id = "exo_pos_sirius"
msp_file = "/Users/mahnoorzulfiqar/OneDriveUNI/GitHub-Repos/MAW/cwl/exo_pos_spectral_results.csv"
standards = False

CandidateSelection_SimilarityandIdentity(file_id = file_id, msp_file = msp_file, 
ms1data = ms1data, standards = False)

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C%5BN%2B%5D%28C%29%28C%29CCO'
DEBUG:pubchempy:Created Compound(305)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=305'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C%5BN%2B%5D%28C%29%28C%29CCCC%28%3DO%29O'
DEBUG:pubchempy:Created Compound(134)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=134'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1%3DCSC%28%3DC1%29CC%28%3DO%29O'
DEBUG:pubchempy:Created Compound(15970)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pub

DEBUG:pubchempy:Created Compound(163080574)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=163080574'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1CCC%28%3DO%29NCCCCCC%28%3DO%29NCCCCCC%28%3DO%29NCC1'
DEBUG:pubchempy:Created Compound(9949800)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=9949800'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28C%29CC%28%3DO%29C%28CC1%3DCC%3DCC%3DC1%29O'
DEBUG:pubchempy:Created Compound(11820298)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=11820298'
DEBUG:pubchempy:Request URL: https://pu

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=13442364'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28CCC%3DC%28C%29C%29C1CCC%28%3DCC1O%29C%28%3DO%29O'
DEBUG:pubchempy:Created Compound(162944174)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162944174'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=COC1%3DCC%3DC%28C%3DC1%29C%28%3DNN%29C2%3DCC%3DC%28C%3DC2%29OC'
DEBUG:pubchempy:Created Compound(259399)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=259399'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pu

DEBUG:pubchempy:Created Compound(132993910)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=132993910'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCC%28CCCCCCCC%28CC%28%3DO%29O%29O%29O'
DEBUG:pubchempy:Created Compound(5282923)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=5282923'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCC%28C%23CC%28C%3DCCCCCCC%28%3DO%29O%29O%29O'
DEBUG:pubchempy:Created Compound(74318834)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=74318834'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Req

DEBUG:pubchempy:Request data: b'smiles=CC1%3DCC%28%3DO%29OC2%3DC1C%3DCC%28%3DC2%29OCCSC3%3DNN%3DC%28O3%29C4%3DCC%3DCC%3DC4'
DEBUG:pubchempy:Created Compound(1973636)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=1973636'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1CN2CC%28CC2C1C%28%3DO%29O%29O'
DEBUG:pubchempy:Created Compound(78385755)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=78385755'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=O%3DC%28NCCCC%28%3DO%29O%29C'
DEBUG:pubchempy:Created Compound(18189)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:p

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=3565645'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%3DCCCC%28%3DCC2C%28CC1%29C%28%3DC%29C%28%3DO%29O2%29C%28%3DO%29O'
DEBUG:pubchempy:Created Compound(73312948)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=73312948'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC12CCC%28%3DO%29C%3DC1CCC3C2C%28CC4%28C3CCC4%28C%28%3DO%29COC%28%3DO%29CCC%28%3DO%29NCC%28C%29%28C%29C%28C%28%3DO%29O%29O%29O%29C%29O'
DEBUG:pubchempy:Created Compound(3460420)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data

DEBUG:pubchempy:Request data: b'smiles=CC1CC%3DC%28C%28%3DO%29C1%29C%28C%29C'
DEBUG:pubchempy:Created Compound(107372)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=107372'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28CCC%28C%28C%29%28C%29O%29O%29C%28CO%29O'
DEBUG:pubchempy:Created Compound(10655888)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=10655888'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=COCCN%28CC1CCCN2C1CCCC2%29C%28%3DO%29CN3C%3DCC4%3DCC%3DCC%3DC43'
DEBUG:pubchempy:Created Compound(163082813)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=1

DEBUG:pubchempy:Created Compound(162943)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162943'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCC1C%28CCC1%3DO%29CC%28%3DO%29O'
DEBUG:pubchempy:Created Compound(107126)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=107126'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCCC%3DCCCCCCCCC%28%3DO%29NCC%28%3DO%29O'
DEBUG:pubchempy:Created Compound(123806906)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=123806906'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/

DEBUG:pubchempy:Request data: b'smiles=CC1%28OC%28%3DO%29C%28%3DC2NC%28%3DO%29N%28S2%29CC3%3DCC%3DCC%3DC3%29C%28%3DO%29O1%29C'
DEBUG:pubchempy:Created Compound(5704741)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=5704741'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1COCCN1C%28%3DO%29NC2COC3C2OCC3NC4%3DNC%3DCC%28%3DN4%29C5%3DCC%3DCC%28%3DC5%29C%23N'
DEBUG:pubchempy:Created Compound(73148330)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=73148330'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=COC%28%3DO%29CCCCCCCCC%28CNO%29O'
DEBUG:pubchempy:Created Compound(4836391)
DEBUG:pubchempy:Request URL: https://pubche

INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1C%28CC%28C%28O1%29OC%28C%29CCCCCC%28%3DO%29O%29O%29O'
DEBUG:pubchempy:Created Compound(78099881)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=78099881'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28C%28C%28%3DO%29NC%28CC1%3DCC%3DCC%3DC1%29C%28%3DO%29NC%28CCCCN%29C%28%3DO%29O%29N%29O'
DEBUG:pubchempy:Created Compound(18224434)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=18224434'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%3DCC%28%3DO%

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CNC%28%3DO%29Oc%28c3%29cc%28c%28c3%291%29C%28C%29%28C2%29C%28N%28C%29C2%29N%28C%291'
DEBUG:pubchempy:Created Compound(4811)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=4811'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=O%3DC1C2%3DCC%3DC%28O%29C%3DC2OC%3DC1C%3D3C%3DCC%28OC%29%3DC%28O%29C3'
DEBUG:pubchempy:Created Compound(5280448)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=5280448'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCCCC%3DC%28C%29C%28C%28C%29C%28%3DO%29N1CCCC1C%28%3DO%29O%29O'
DEBUG:pubchempy:Crea

DEBUG:pubchempy:Created Compound(51052247)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=51052247'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCC%3DCCC%3DCCC%3DCCC%23CCCCCC%28%3DO%29O'
DEBUG:pubchempy:Created Compound(162943)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162943'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1C%28C%28CC%28O1%29OC2CCC3%28C4CCC5%28C%28CCC5%28C4CCC3%28C2%29O%29O%29C6%3DCC%28%3DO%29OC6%29C%29C%3DNCCCO%29O%29O'
DEBUG:pubchempy:Created Compound(4256513)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=4256513'
DEBUG:pubchempy:Reque

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=3461696'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28%3DO%29OC1CC%28C2%28C3CCC4%28C%28OC%28%3DO%29C5C4%28C3%28C%28C6C2C1%28CO6%29C%29O%29C%29O5%29C7%3DCOC%3DC7%29C%29C%29OC%28%3DO%29C'
DEBUG:pubchempy:Created Compound(75254066)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=75254066'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCN%28CC%29CCN1C%3DNC2%3DC1C%28%3DO%29N%28C%28%3DO%29N2C%29C'
DEBUG:pubchempy:Created Compound(28329)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEB

DEBUG:pubchempy:Request data: b'smiles=CCOC%28%3DO%29C1C%28O1%29%28C%29C2CCCC2C%28%3DO%29OC'
DEBUG:pubchempy:Created Compound(162788323)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162788323'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%3DCC%28%3DO%29CC%28C1CCC%28%3DO%29C%29%28C%29C'
DEBUG:pubchempy:Created Compound(592057)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=592057'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28CCC%28%3DO%29OC1CC2C3CC%3DC4CC%28CCC4%28C3CCC2%28C1NC%28%3DO%29C%29C%29C%29OC%28%3DO%29C%29COC%28%3DO%29C'
DEBUG:pubchempy:Created Compound(3732739)
DEBUG:pubchempy:Request URL: http

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCOC%28%3DO%29CCC1%3DC%28C%28%3DC2C%28%3DC1%29C%3DCO2%29OCC%5BNH2%2B%5DC3CCCC3%29OC4C5%28C6C%28CCC%28C6CO%29O%29CC%28C5O%29%28C%28O4%29CO%29O%29O'
DEBUG:pubchempy:Created Compound()
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C%5BN%2B%5D%28C%29%28C%29C1CCCCC%28%3DO%29C1'
DEBUG:pubchempy:Created Compound()
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=COCC%28%3DO%29NC1COC2C1OCC2OC%28%3DO%29NC3%3DCC%3DC%28C%3DC3%29OC'
DEBUG:pubchempy:Created Compound(73133196)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=73133196'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.ni

INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28CCC1%3DCC%28%3DO%29CC2C1%28CCCC2%28C%29C%29C%29CC%28%3DO%29O'
DEBUG:pubchempy:Created Compound(162908135)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162908135'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%3DCC%3DC%28C%28CC%28%3DCCCC%28%3DC%29C%28CC1%29OC%28%3DO%29C%29C%29OC%28%3DO%29C%29C%28C%29C'
DEBUG:pubchempy:Created Compound(72730092)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=72730092'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEB

DEBUG:pubchempy:Created Compound(675)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=675'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CN1C2%3DC%28C%28%3DO%29N%28C1%3DO%29CCCN%28C%29C%29N%28C%3DN2%29CC%28CO%29O'
DEBUG:pubchempy:Created Compound(3812059)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=3812059'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28%3DO%29C1%3DCNC2%3DCC%3DCC%3DC21'
DEBUG:pubchempy:Created Compound(12802)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=12802'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound

DEBUG:pubchempy:Request data: b'cid=24013836'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCC%28CCCC%28%3DO%29OCC%28CO%29O%29O'
DEBUG:pubchempy:Created Compound(5463954)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=5463954'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28%3DC%29C1%3DCC2%3DC%28C%3DC1%29C3%28CCCC%28C3CC2%29%28C%29C%28%3DO%29OC%29C'
DEBUG:pubchempy:Created Compound(15609155)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=15609155'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%3DNC2%3DCC%3DCC%3DC2C%28%3DO%29N1CCOC%28%3DO%29C3%3DCC%3DC%28C

DEBUG:pubchempy:Request data: b'smiles=CC12CCCC%28C1C%28%3DO%29OC2%3DO%29C3CCC4CC3%28CC4%3DC%29C%3DO'
DEBUG:pubchempy:Created Compound(163002603)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=163002603'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC12CCCC3%28C1C%28C45C3CCC%28C4%29%28C%28%3DC%29C5%29O%29C%28%3DO%29O%29OC2%3DO'
DEBUG:pubchempy:Created Compound(4632016)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=4632016'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCCC%3DCCCCCCCCC%28%3DO%29OCC%28CO%29O'
DEBUG:pubchempy:Created Compound(3377134)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/

DEBUG:pubchempy:Request data: b'cid=530333'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCC%3DCC%23CC%23CCCCCCCCC%28%3DO%29OC'
DEBUG:pubchempy:Created Compound(162881089)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162881089'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28%3DCC%3DCC%3DC%28C%29C%3DCC%3DC%28C%29C%3DCC12C%28CC%28CC1%28O2%29C%29O%29%28C%29C%29C%3DCC%3DC%28C%29C%3DCC3%28C%28CC%28CC3%28C%29O%29O%29%28C%29C%29O'
DEBUG:pubchempy:Created Compound(162892430)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162892430'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=OC%3D1C%3DC%28O%29C2%3DC%28OC3%28OC4%3DCC%28O%29%3DC5C%28OC%28C6%3DCC%3DC%28O%29C%28O%29%3DC6%29C%28O%29C5%29%3DC4C2C3O%29C7%3DCC%3DC%28O%29C%28O%29%3DC7%29C1'
DEBUG:pubchempy:Created Compound(5089889)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=5089889'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCC%23CCO'
DEBUG:pubchempy:Created Compound(80421)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=80421'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%3DCC2%3DCC%28%3DO%29CCC2CC1'
DEBUG:pubchempy:Created Compound(6193

DEBUG:pubchempy:Request data: b'cid=16781789'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%3DCCC%28%3DO%29C%28%3DCC2C%28CC1%29C%28%3DC%29C%28%3DO%29O2%29C'
DEBUG:pubchempy:Created Compound(162996081)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162996081'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCC%3DC%5BCH%2B%5DC%28%3DO%29%5BN-%5DCC%28CC%28CC%28%3DO%29NCCC%28%3DN%29N%29N%29O'
DEBUG:pubchempy:Created Compound(162851908)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162851908'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
D

DEBUG:pubchempy:Created Compound(163023984)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=163023984'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCC%3DCCC%3DCCC%3DCCC%3DCCCCCC%28%3DO%29OCC%28CO%29O'
DEBUG:pubchempy:Created Compound(76043349)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=76043349'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28CCC%28CCCO%29C%28CC%28C1%28CCNCC1%29CC2%3DC3CCCOC3%3DC%28C%3DC2%29O%29OC%28%3DO%29C%28CNC%29NC%29OC%28%3DO%29C%29O'
DEBUG:pubchempy:Created Compound(163083090)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/comp

DEBUG:pubchempy:Request data: b'smiles=C%5BN%2B%5D1%28CCCC1C%28%3DO%29O%29C'
DEBUG:pubchempy:Created Compound(555)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=555'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1C2C%28C%28S1%29CCCCC%28%3DO%29NN%29NC%28%3DO%29N2'
DEBUG:pubchempy:Created Compound(2832898)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=2832898'
[10:17:29] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [10:17:29] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[10:17:29] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [10:17:29] SMILES Parse Error: syntax error while parsing: 
[10:17:29] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [10:17:29] SMI

RDKit ERROR: [10:17:29] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[10:17:29] SMILES Parse Error: syntax error while parsing: 
[10:17:29] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[10:17:29] SMILES Parse Error: syntax error while parsing: 
[10:17:29] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [10:17:29] SMILES Parse Error: syntax error while parsing: 
[10:17:29] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [10:17:29] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [10:17:29] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [10:17:29] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [10:17:29] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [10:17:29] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [10:17:29] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [10:17:29] SMILES Parse Error: 

RDKit ERROR: [10:17:29] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [10:17:29] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[10:17:29] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[10:17:29] SMILES Parse Error: syntax error while parsing: 
[10:17:29] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[10:17:29] SMILES Parse Error: syntax error while parsing: 
[10:17:29] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [10:17:29] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [10:17:29] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
RDKit ERROR: [10:17:29] SMILES Parse Error: syntax error while parsing: 
[10:17:29] SMILES Parse Error: syntax error while parsing: 
[10:17:29] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' '
[10:17:29] SMILES Parse Error: syntax error while parsing: 
RDKit ERROR: [10:17:29] SMILES Parse Error: Failed parsing SMILES ' ' for input: ' 

DEBUG:pubchempy:Request data: b'cid=163058635'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCC1CN%28CCC1CC%28%3DO%29NCCOC%29C%28%3DO%29N2CCOCC2'
DEBUG:pubchempy:Created Compound(73138881)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=73138881'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCC%3DO'
DEBUG:pubchempy:Created Compound(8063)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=8063'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCC%28C%23CC%23CC%28CO%29O%29O'
DEBUG:pubchempy:Created Compo

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=14162697'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28C%28%3DO%29N%29NC%28%3DO%29C1CN%28CCN1C%28%3DO%29NC2CCCCC2%29C%28%3DO%29C'
DEBUG:pubchempy:Created Compound(163153972)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=163153972'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC12CCC3C%28C1CCC2%28C%29O%29CCC4C3%28CC%28C%28C4%29O%29CO%29C'
DEBUG:pubchempy:Created Compound(3513093)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=3513093'
DEBUG:pubchempy:Request URL: https://pubch

DEBUG:pubchempy:Created Compound(162888726)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162888726'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=COC1%3DCC%28%3DCC%28%3DC1O%29OC%29C2%3DCC%28%3DC%28C%28%3DC2%29OC%29O%29OC'
DEBUG:pubchempy:Created Compound(256604)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=256604'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCCCCC%3DCCCCCCCCC%28%3DO%29OCC%28CO%29O'
DEBUG:pubchempy:Created Compound(33022)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=33022'
DEBUG:pubchempy:Request URL: h

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=8058'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28C%29CC%28C%28%3DO%29NCCCCN%3DC%28N%29N%29NC%28%3DO%29C1C%28O1%29C%28%3DO%29O'
DEBUG:pubchempy:Created Compound(4641099)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=4641099'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCCCCCCCCC%28C%28%3DO%29NN%29OC1%3DCC%3DC%28C%3DC1%29%5BN%2B%5D%28%3DO%29%5BO-%5D'
DEBUG:pubchempy:Created Compound(2753168)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=2753168'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.g

DEBUG:pubchempy:Created Compound(18408254)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=18408254'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28CC%28%3DO%29OC%28C%29CC%28%3DO%29O%29O'
DEBUG:pubchempy:Created Compound(3375153)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=3375153'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCC%3DCC%28C%3DCCC%28C%3DC%29O%29O'
DEBUG:pubchempy:Created Compound(162821746)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162821746'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1C%28CCC%28%3DC%29C2CC%28C2CC1%3DO%29%28C%29C%29O'
DEBUG:pubchempy:Created Compound(85521472)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=85521472'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28C%29C1%3DNOC%28%3DN1%29C2CC%28CN2C%29NC%28%3DO%29NC3CCCCC3'
DEBUG:pubchempy:Created Compound(74577547)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=74577547'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=COC1%3DC%28C%3DC2C%28%3DC1%29C%28%3DO%29C%3DC%2

DEBUG:pubchempy:Request data: b'smiles=CCCCN%28CCCC%29CC1%3DC%28C%3DCC2%3DC1OC%28%3DCC3%3DCC%28%3DC%28C%3DC3OC%29OC%29OC%29C2%3DO%29O'
DEBUG:pubchempy:Created Compound(72170092)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=72170092'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%28C%28N2C%28S1%29C3%3DCC%3DCC%3DC3C2%3DO%29C%28%3DO%29NCC4%3DCC%3DCC%3DC4OC%29C'
DEBUG:pubchempy:Created Compound(3835261)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=3835261'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCCCCC%3DCCCCCCCCC%28%3DO%29OCC%28COP%28%3DO%29%28O%29OCC%5BN%2B%5D%28C%29%

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=74735124'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1CC1NC%28%3DO%29CC2CC3C%28O2%29C%28C%28O3%29CN%29O'
DEBUG:pubchempy:Created Compound(162790749)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162790749'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%3DNC%3DC%28C%3DC1%29O'
DEBUG:pubchempy:Created Compound(14275)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=14275'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compo

DEBUG:pubchempy:Created Compound(162874780)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162874780'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=COC1%3DCC%3DC%28NC%28%3DO%29CC%28C%29%3DO%29C%3DC1'
DEBUG:pubchempy:Created Compound(21576)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=21576'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28%3DO%29NC1%3DCC%28%3DNC%3DN1%29N%28C%29C%28%3DO%29C'
DEBUG:pubchempy:Created Compound(2755866)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=2755866'
DEBUG:pubchempy:Request URL: https://pu

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162790287'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1CCC%28CC1O%29C%28C%29C'
DEBUG:pubchempy:Created Compound(86850)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=86850'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCC%3DCCC1C%28CCC1%3DO%29CC%28%3DO%29O'
DEBUG:pubchempy:Created Compound(557758)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=557758'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smil

DEBUG:pubchempy:Request data: b'smiles=CC%28C1%28CCC%28CC%28C2CC3%3DCC%28%3DC%28C%3DC3CO%29O%29OC4CCNC%28C4%29%28CCCC2C%23C1%29CNC%29OC%28%3DO%29C%29O%29CNC%28%3DNC%29N%29O'
DEBUG:pubchempy:Created Compound(163039322)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=163039322'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%3DC%28C%28CC%28C1%3DO%29OC%28%3DO%29C%29%28C%29C%29C%3DCC%28%3DCC%3DCC%28%3DCC%3DCC%3DC%28C%29C%3DCC%3DC%28C%29C%3DCC2%3DC%28C%28%3DO%29C%28CC2%28C%29C%29OC%28%3DO%29C%29C%29C%29C'
DEBUG:pubchempy:Created Compound(73835373)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=73835373'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest

INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28C%29CCN1C2%3DC%28N%3DC1NCCO%29N%28C%28%3DO%29NC2%3DO%29C'
DEBUG:pubchempy:Created Compound(3145100)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=3145100'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%3DCOC%28C2C1%28C%28C3C2%28O3%29C%29O%29O%29OC4C%28C%28C%28C%28O4%29CO%29O%29O%29O'
DEBUG:pubchempy:Created Compound(162991634)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162991634'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%3DCC%28CC%2

DEBUG:pubchempy:Created Compound(5326566)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=5326566'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1CN2CC%28C1CC2CNC%28%3DO%29C3%3DCC%3DCC%3DC3%29CN4CCOCC4'
DEBUG:pubchempy:Created Compound(74579456)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=74579456'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCOC1C%28C%28C%28C%28O1%29C%29O%29O%29O'
DEBUG:pubchempy:Created Compound(13059942)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=13059942'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pub

DEBUG:pubchempy:Created Compound(86146923)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=86146923'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCCC%23CC%23CC%28%3DO%29CCCCCCC%28%3DO%29O'
DEBUG:pubchempy:Created Compound(5312942)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=5312942'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C%3DCCC%3DCCCC1CCC%28%3DO%29O1'
DEBUG:pubchempy:Created Compound(73158272)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=73158272'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: htt

DEBUG:pubchempy:Created Compound(625291)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=625291'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCCCCOC%28%3DO%29C1%3DCC%3DCC%3DC1C%28%3DO%29OCCCCCCCC'
DEBUG:pubchempy:Created Compound(8346)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=8346'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28%3DO%29OCC1%28CCC2%28CCC3%28C%28%3DCCC4C3%28CCC5C4%28CC%28C%28C5%28C%29CO%29O%29O%29C%29C%29C2C1%29C%29C%29C'
DEBUG:pubchempy:Created Compound(162846545)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=162846545'
INFO:pubchemp

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCC%28C%28%3DO%29O%29NC%28%3DO%29CCC%28%3DO%29O'
DEBUG:pubchempy:Created Compound(22259938)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=22259938'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28%3DO%29OC1%3DCC%3DCC%28%3DC1%29N%3DNC2%3DCC%28%3DCC%3DC2%29OC%28%3DO%29C'
DEBUG:pubchempy:Created Compound(2749931)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=2749931'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28%3DO%29C1%3DCC%3DC%28C%3DC1%29NC%28%3DO%29N2CCN%28C%28C2%29C%28%3DO%29NC3CCCNC3%3DO%29C%28%3DO%29CCCC

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C1C%28C%28C%28C%28O1%29CO%29O%29O%29O'
DEBUG:pubchempy:Created Compound(219984)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=219984'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=C%28CSCC%28C%28%3DO%29O%29N%29N'
DEBUG:pubchempy:Created Compound(20049)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=20049'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1%3DCC%3DC%28C%3DC1%29S%28%3DO%29%28%3DO%29NCC%28%3DO%29OC'
DEBUG:pubchempy:Created Compound(562880)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/p

DEBUG:pubchempy:Created Compound(440265)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=440265'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC%28C%29NC1COC%28C1O%29CNC%28%3DO%29NC%28C%29C'
DEBUG:pubchempy:Created Compound(163075794)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=163075794'
INFO:pubchempy:'PUGREST.NotFound'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCCCCCC%3DCCCCCCCC%28%3DO%29O'
DEBUG:pubchempy:Created Compound(33604)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=33604'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/p

DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCCCCC1C%28CCC1%3DO%29CCCCCC%28%3DO%29O'
DEBUG:pubchempy:Created Compound(23219149)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=23219149'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CCC%3DCCC%3DCCC%3DCCC%3DCCCCCCCC%28%3DO%29O'
DEBUG:pubchempy:Created Compound(3080584)
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/synonyms/JSON
DEBUG:pubchempy:Request data: b'cid=3080584'
DEBUG:pubchempy:Request URL: https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/JSON
DEBUG:pubchempy:Request data: b'smiles=CC1CCC2%28C%3DCCCC2C1%28C%29CCC%28C%29CC%28%3DO%29O%29CO'
DEBUG:pubchempy:Created Compound(162884798)
DEBUG:pubchempy:Request URL: https://pubchem.nc

Unnamed: 0.1,Unnamed: 0,Unnamed: 0.1_x,Unnamed: 0_x,X,id_X_x,premz,rtmed,rtmean,int,col_eng,...,synonyms,AnnotationSources,AnnotationCount,MSILevel,MCSS,candidate_list,superclass,class,subclass,ClassificationSource
0,0,0,1,1,endo_negM83R36ID1,82.537003,35.632626,35.632626,7.722125e+04,30,...,,,,,,,,,,
1,1,1,2,2,endo_negM101R37ID2,100.956718,37.186119,37.186119,2.867520e+05,30,...,,,,,,,,,,
2,2,2,3,3,endo_negM112R39ID3,112.012627,39.483221,39.483221,3.044286e+07,30,...,,,,,,,,,,
3,3,3,4,4,endo_negM411R37ID4,410.698517,37.170494,37.170494,7.507285e+05,30,...,,,,,,,,,,
4,4,4,5,5,endo_negM104R42ID5,104.106888,41.786335,41.786335,7.744113e+05,30,...,"['choline', 'Choline ion', 'Bilineurine', '62-...",SIRIUS,1.0,3.0,,exo_pos_sirius_Candidate_Selection/104.1068878...,Organic nitrogen compounds,Organonitrogen compounds,Quaternary ammonium salts,CANOPUS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1036,1036,1036,1037,1037,endo_negM369R470ID1037,369.123383,469.585845,469.604187,7.053756e+05,30,...,"['porphyra-334', 'SCHEMBL13532042', '[[3-[(1-C...",SIRIUS,1.0,3.0,,exo_pos_sirius_Candidate_Selection/369.1233825...,,,,CANOPUS
1037,1037,1037,1038,1038,endo_negM181R476ID1038,181.121964,476.028294,475.067651,2.449993e+05,30,...,[],SIRIUS,1.0,3.0,,exo_pos_sirius_Candidate_Selection/181.1219635...,,,,CANOPUS
1038,1038,1038,1039,1039,endo_negM605R570ID1039,605.419373,569.768352,569.816243,3.825896e+04,30,...,[],SIRIUS,1.0,3.0,,exo_pos_sirius_Candidate_Selection/605.4193725...,Lipids and lipid-like molecules,Prenol lipids,Triterpenoids,CANOPUS
1039,1039,1039,1040,1040,endo_negM791R582ID1040,791.469666,581.898384,581.948596,2.620681e+04,30,...,[],GNPS,1.0,2.0,,exo_pos_sirius_Candidate_Selection/791.4696655...,,,,


In [28]:
frame = pd.read_csv(file_id + "_mergedResults-with-one-Candidates.csv")
inchis = []
for i, row in frame.iterrows():
    if not isNaN(frame["SMILES"][i]):
        if "SIRIUS" not in frame["AnnotationSources"][i]:
            try:
                InChI = Chem.MolToInchi(Chem.MolFromSmiles(frame["SMILES"][i]))
                InChIKey = Chem.inchi.InchiToInchiKey(InChI)
                inchis.append(
                    {
                        "index": i,
                        "smiles": frame["SMILES"][i],
                        "inchi": InChI,
                        "inchikey": InChIKey,
                    }
                )
            except Exception:
                pass
        elif "SIRIUS" in frame["AnnotationSources"][i]:
            if isNaN(frame["superclass"][i]):
                try:
                    InChI = Chem.MolToInchi(Chem.MolFromSmiles(frame["SMILES"][i]))
                    InChIKey = Chem.inchi.InchiToInchiKey(InChI)
                    inchis.append(
                        {
                            "index": i,
                            "smiles": frame["SMILES"][i],
                            "inchi": InChI,
                            "inchikey": InChIKey,
                        }
                    )
                except Exception:
                    pass
inchis = pd.DataFrame(inchis)

 undefined stereo
ARNING: Omitted undefined stereo

NING: Omitted undefined stereo
ned stereo
ereo
ed undefined stereo
Omitted undefined stereo
d stereo


[13:14:

In [29]:
inchis

Unnamed: 0,index,smiles,inchi,inchikey
0,14,C1=CC=C(C=C1)C(=O)CCNCC(=O)O,InChI=1S/C11H13NO3/c13-10(6-7-12-8-11(14)15)9-...,XHSURMJJKAFELI-UHFFFAOYSA-N
1,23,CC(C)(C1CCC(=CC1)C(=O)OCC2C(C(C(C(O2)OC3(C(C(C...,"InChI=1S/C22H36O13/c1-21(2,31)11-5-3-10(4-6-11...",NRXDBALRMKOHET-UHFFFAOYSA-N
2,24,C1=CC=C2C(=C1)C(=O)N(C=N2)CCCC(=O)NC3=CC=CC=C3...,InChI=1S/C19H18N4O3/c20-18(25)13-6-1-4-9-16(13...,GYKLMRHRNCAEEZ-UHFFFAOYSA-N
3,28,CC1CC(C(C2=CC(=O)C3(C(C12C)O3)C(=C)COC(=O)C(C(...,InChI=1S/C19H27NO8/c1-8-4-11(21)14(24)10-5-13(...,HUOHZWBSCYBQDH-UHFFFAOYSA-N
4,29,COC1=CC=C(C=C1)CC(=O)N2CCC3CC(=O)N(CCC3C2)CCN4...,InChI=1S/C24H35N3O4/c1-30-22-4-2-19(3-5-22)16-...,XRFNRSSSMPGXBI-UHFFFAOYSA-N
...,...,...,...,...
418,1035,CC1CCC2(C=CCCC2C1(C)CCC(C)CC(=O)O)CO,InChI=1S/C19H32O3/c1-14(12-17(21)22)7-10-18(3)...,YQFZWTTZGCKOKU-UHFFFAOYSA-N
419,1036,CC(C(C(=O)O)NC1=C(C(=NCC(=O)O)CC(C1)(CO)O)OC)O,InChI=1S/C14H22N2O8/c1-7(18)11(13(21)22)16-9-4...,AWCCBAPDJMUZOK-UHFFFAOYSA-N
420,1037,CC1C(C1(C)C(=O)C)C=CC(=O)C,"InChI=1S/C11H16O2/c1-7(12)5-6-10-8(2)11(10,4)9...",XCRDXWBECXOVPH-UHFFFAOYSA-N
421,1039,OC1COC(OC2CCC34CC54CCC6(C)C(C(O)CC6(C)C5CC(OC7...,InChI=1S/C41H68O13/c1-19-26(44)28(46)30(48)34(...,UPADPCUOTDTWHH-UHFFFAOYSA-N


XRFNRSSSMPGXBI-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/XRFNRSSSMPGXBI-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/XRFNRSSSMPGXBI-UHFFFAOYSA-N.json
KMPWYEUPVWOPIM-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/KMPWYEUPVWOPIM-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/KMPWYEUPVWOPIM-UHFFFAOYSA-N.json
PXQXFZBYTZILJD-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/PXQXFZBYTZILJD-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/PXQXFZBYTZILJD-UHFFFAOYSA-N.json
GCUMPGCBTUSGIM-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/GCUMPGCBTUSGIM-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/GCUMPGCBTUSGIM-UHFFFAOYSA-N.json
NYTNVERZLHWZGZ-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/NYTNVERZLHWZGZ-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/NYTNVERZLHWZGZ-UHFFFAOYSA-N.json
GSTVTHMQXVKNQF-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/GSTVTHMQXVKNQF-UHFFFAOYSA-N.json
https:/

NRXDBALRMKOHET-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/NRXDBALRMKOHET-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/NRXDBALRMKOHET-UHFFFAOYSA-N.json
GDWDBGSWGNEMGJ-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/GDWDBGSWGNEMGJ-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/GDWDBGSWGNEMGJ-UHFFFAOYSA-N.json
BUIOBTSUIYLOKG-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/BUIOBTSUIYLOKG-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/BUIOBTSUIYLOKG-UHFFFAOYSA-N.json
CAVJWTNXPCMJIR-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/CAVJWTNXPCMJIR-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/CAVJWTNXPCMJIR-UHFFFAOYSA-N.json
MXTLAHSTUOXGQF-UHFFFAOYSA-O
https://gnps-classyfire.ucsd.edu/entities/MXTLAHSTUOXGQF-UHFFFAOYSA-O.json
https://gnps-classyfire.ucsd.edu/entities/MXTLAHSTUOXGQF-UHFFFAOYSA-N.json
CHPATGWRKBIUFF-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/CHPATGWRKBIUFF-UHFFFAOYSA-N.json
https:/

In [30]:
inchis = inchis.loc[-isNaN(inchis["inchikey"])]

In [31]:
inchis

Unnamed: 0,index,smiles,inchi,inchikey
0,14,C1=CC=C(C=C1)C(=O)CCNCC(=O)O,InChI=1S/C11H13NO3/c13-10(6-7-12-8-11(14)15)9-...,XHSURMJJKAFELI-UHFFFAOYSA-N
1,23,CC(C)(C1CCC(=CC1)C(=O)OCC2C(C(C(C(O2)OC3(C(C(C...,"InChI=1S/C22H36O13/c1-21(2,31)11-5-3-10(4-6-11...",NRXDBALRMKOHET-UHFFFAOYSA-N
2,24,C1=CC=C2C(=C1)C(=O)N(C=N2)CCCC(=O)NC3=CC=CC=C3...,InChI=1S/C19H18N4O3/c20-18(25)13-6-1-4-9-16(13...,GYKLMRHRNCAEEZ-UHFFFAOYSA-N
3,28,CC1CC(C(C2=CC(=O)C3(C(C12C)O3)C(=C)COC(=O)C(C(...,InChI=1S/C19H27NO8/c1-8-4-11(21)14(24)10-5-13(...,HUOHZWBSCYBQDH-UHFFFAOYSA-N
4,29,COC1=CC=C(C=C1)CC(=O)N2CCC3CC(=O)N(CCC3C2)CCN4...,InChI=1S/C24H35N3O4/c1-30-22-4-2-19(3-5-22)16-...,XRFNRSSSMPGXBI-UHFFFAOYSA-N
...,...,...,...,...
418,1035,CC1CCC2(C=CCCC2C1(C)CCC(C)CC(=O)O)CO,InChI=1S/C19H32O3/c1-14(12-17(21)22)7-10-18(3)...,YQFZWTTZGCKOKU-UHFFFAOYSA-N
419,1036,CC(C(C(=O)O)NC1=C(C(=NCC(=O)O)CC(C1)(CO)O)OC)O,InChI=1S/C14H22N2O8/c1-7(18)11(13(21)22)16-9-4...,AWCCBAPDJMUZOK-UHFFFAOYSA-N
420,1037,CC1C(C1(C)C(=O)C)C=CC(=O)C,"InChI=1S/C11H16O2/c1-7(12)5-6-10-8(2)11(10,4)9...",XCRDXWBECXOVPH-UHFFFAOYSA-N
421,1039,OC1COC(OC2CCC34CC54CCC6(C)C(C(O)CC6(C)C5CC(OC7...,InChI=1S/C41H68O13/c1-19-26(44)28(46)30(48)34(...,UPADPCUOTDTWHH-UHFFFAOYSA-N


In [32]:
"""
gnps_proxy = True
url = "http://classyfire.wishartlab.com"
proxy_url = "https://gnps-classyfire.ucsd.edu"
chunk_size = 1000
sleep_interval = 12
"""

all_inchi_keys = list(inchis["inchikey"].drop_duplicates())

resolved_ik_number_list = [0, 0]

In [33]:
 while True:

    # start_time = time.time()

    # print('%s inchikey to resolve' % total_inchikey_number )
    get_classifications_cf_mod(all_inchi_keys, par_level=6)

    cleanse("all_json.json", "all_json.json")

    with open("all_json.json") as tweetfile:
        jsondic = json.loads(tweetfile.read())

    df = json_normalize(jsondic)
    df = df.drop_duplicates("inchikey")
    resolved_ik_number = len(df.drop_duplicates("inchikey").inchikey)
    resolved_ik_number_list.append(resolved_ik_number)
    # print('%s resolved inchikeys' % resolved_ik_number )
    # print("done in --- %s seconds ---" % (time.time() - start_time))

    if (
        resolved_ik_number_list[-1] < resolved_ik_number_list[-2]
        or resolved_ik_number_list[-1] == resolved_ik_number_list[-3]
    ):
        break
    cleanse("all_json.json", "all_json_cleaned.json")

    with open("all_json_cleaned.json") as tweetfile:
        jsondic = json.loads(tweetfile.read())

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): gnps-classyfire.ucsd.edu:443
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/NRXDBALRMKOHET-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/XHSURMJJKAFELI-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/GYKLMRHRNCAEEZ-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-clas

DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/ZHMGFSLYVIQXCR-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/GGJRAQULURVTAJ-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/TXWSMSPRFREESP-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/GSTVTHMQXVKNQF-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/QHELXIATGZYOIB-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/CHPATGWRKBIUFF-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/ZHMGFSLYVIQXCR-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /enti

DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/IOHFKXXYPVEHCV-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/HAGKOYTUAINBNR-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/CFNQSQWVQAALGO-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/UXMMIMGEKFYPFK-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/YWPIANMBCWTPEE-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/RNWHJFUXZQBBLK-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/CFNQSQWVQAALGO-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /enti

DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/YBWRTYGQIKFYGT-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/AXPZWZNFVKVRRH-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/ATWLFQWAUVXIEG-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/VAQVCNUMZITNLC-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/XZDCIORWACLZKX-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/FDUZTAGLNCBNSN-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/ATWLFQWAUVXIEG-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /enti

DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/HHZXUCYNPCWYJI-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/SGANUWYCIFTIGM-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/LVXMZSNPIXUAFS-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/YNZHDHOLDNJBTI-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/CFLVYJJIZHNITM-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/DOKCNDVEPDZOJQ-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/SGANUWYCIFTIGM-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /enti

DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/YAACYYNCHMHECD-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/XGESKIWNDBIILZ-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/OYXZMSRRJOYLLO-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/KNUYHEQHOCPQAW-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/HBFARTJEFUKQDS-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/HEZNVIYQEUHLNI-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/HEZNVIYQEUHLNI-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /enti

DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/CHPDRVUVDOLQOB-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/HPSZQGQRLMRJLO-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/HGKBUFQLERZSEX-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/DQRDSEOBJZHHET-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/GFMYIOGFYYHKLA-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/NIOKVQPUJKYTOZ-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/NYHBQMYGNKIUIF-UUOKFMHZSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /enti

DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/ZJXSJVWONMTTER-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/QBZJGHGGRYDFGP-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/NRTOUGPMXLJETQ-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/XNEJGMIDRAJPEO-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/WYFRRYWIUVEOAM-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/BJEPALDWQRJLJD-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/STUMFFBVBKIJSG-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /enti

DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/QRJVLGBTAPYMIM-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/DAEOHFMQYYWHJP-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/FOPALECPEUVCTL-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/QUPFAVJVCWBXBS-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/GHBZVNHEVHMKOB-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/DMXHGHMHPMJTAD-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/FOPALECPEUVCTL-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /enti

DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/IBUAPFMCBQEHGC-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/OFIDNKMQBYGNIW-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/ZEWGSHDZCDJZJF-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/JEAWDPPGGHIQAA-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/HDLNSTQYXPTXMC-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/IBUAPFMCBQEHGC-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/ZEWGSHDZCDJZJF-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /enti

DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/WZXQWLWGLWIQGK-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/WSXLBHUODQJZTO-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/OFKQSXLEVOMFHN-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/VXGVKPGULBZBOZ-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/OKGIACCGHKKYEE-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/WZXQWLWGLWIQGK-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/OFKQSXLEVOMFHN-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /enti

DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/CUNSWQQUFGMINE-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/OHFPODKNNYZZIR-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/HRYSXCQQEVMPFB-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/JJRYPZMXNLLZFH-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/WRIDQFICGBMAFQ-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/NYCIXAIPHCQNDD-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /entities/YJCJVMMDTBEITC-UHFFFAOYSA-N.json HTTP/1.1" 502 150
DEBUG:urllib3.connectionpool:https://gnps-classyfire.ucsd.edu:443 "GET /enti

AttributeError: 'DataFrame' object has no attribute 'inchikey'

In [36]:
all_inchi_keys

['XHSURMJJKAFELI-UHFFFAOYSA-N',
 'NRXDBALRMKOHET-UHFFFAOYSA-N',
 'GYKLMRHRNCAEEZ-UHFFFAOYSA-N',
 'HUOHZWBSCYBQDH-UHFFFAOYSA-N',
 'XRFNRSSSMPGXBI-UHFFFAOYSA-N',
 'DWSGZUNYLCQKEF-UHFFFAOYSA-N',
 'KMPWYEUPVWOPIM-UHFFFAOYSA-N',
 'AWYMRTKEVGBNRW-UHFFFAOYSA-N',
 'IKFFQIXHTZRUGX-UHFFFAOYSA-N',
 'SZVNKXCDJUBPQO-UHFFFAOYSA-N',
 'GDWDBGSWGNEMGJ-UHFFFAOYSA-N',
 'JTVPZMFULRWINT-UHFFFAOYSA-N',
 'QFZISQBFEIXWDM-UHFFFAOYSA-N',
 'URJSQVJWZNGFOL-UHFFFAOYSA-N',
 'AJYXUNWZABAKQI-UHFFFAOYSA-N',
 'PXQXFZBYTZILJD-UHFFFAOYSA-N',
 'VNQXSTWCDUXYEZ-UHFFFAOYSA-N',
 'BUIOBTSUIYLOKG-UHFFFAOYSA-N',
 'XVYSFJOWUXMIEN-UHFFFAOYSA-N',
 'QDEZXRSOIDPJTE-UHFFFAOYSA-N',
 'ASZVZWUNIUXBJT-UHFFFAOYSA-N',
 'CAVJWTNXPCMJIR-UHFFFAOYSA-N',
 'GCUMPGCBTUSGIM-UHFFFAOYSA-N',
 'BINIYSYTGGGFLC-UHFFFAOYSA-N',
 'NYTNVERZLHWZGZ-UHFFFAOYSA-N',
 'IJBTXFFPFVNDAG-UHFFFAOYSA-N',
 'MXTLAHSTUOXGQF-UHFFFAOYSA-O',
 'ZHADMZSKNJUERU-UHFFFAOYSA-N',
 'FRNWBAJBSISRNI-UHFFFAOYSA-N',
 'BQIBBYLXJDSLIR-UHFFFAOYSA-N',
 'GSTVTHMQXVKNQF-UHFFFAOYSA-N',
 'ZHMGFS

NRXDBALRMKOHET-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/NRXDBALRMKOHET-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/NRXDBALRMKOHET-UHFFFAOYSA-N.json
KMPWYEUPVWOPIM-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/KMPWYEUPVWOPIM-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/KMPWYEUPVWOPIM-UHFFFAOYSA-N.json
QFZISQBFEIXWDM-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/QFZISQBFEIXWDM-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/QFZISQBFEIXWDM-UHFFFAOYSA-N.json
ASZVZWUNIUXBJT-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/ASZVZWUNIUXBJT-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/ASZVZWUNIUXBJT-UHFFFAOYSA-N.json
NYTNVERZLHWZGZ-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/NYTNVERZLHWZGZ-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/NYTNVERZLHWZGZ-UHFFFAOYSA-N.json
ZHMGFSLYVIQXCR-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/ZHMGFSLYVIQXCR-UHFFFAOYSA-N.json
https:/

XHSURMJJKAFELI-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/XHSURMJJKAFELI-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/XHSURMJJKAFELI-UHFFFAOYSA-N.json
AWYMRTKEVGBNRW-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/AWYMRTKEVGBNRW-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/AWYMRTKEVGBNRW-UHFFFAOYSA-N.json
AJYXUNWZABAKQI-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/AJYXUNWZABAKQI-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/AJYXUNWZABAKQI-UHFFFAOYSA-N.json
QDEZXRSOIDPJTE-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/QDEZXRSOIDPJTE-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/QDEZXRSOIDPJTE-UHFFFAOYSA-N.json
BQIBBYLXJDSLIR-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/BQIBBYLXJDSLIR-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/BQIBBYLXJDSLIR-UHFFFAOYSA-N.json
GSTVTHMQXVKNQF-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/GSTVTHMQXVKNQF-UHFFFAOYSA-N.json
https:/

XRFNRSSSMPGXBI-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/XRFNRSSSMPGXBI-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/XRFNRSSSMPGXBI-UHFFFAOYSA-N.json
GDWDBGSWGNEMGJ-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/GDWDBGSWGNEMGJ-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/GDWDBGSWGNEMGJ-UHFFFAOYSA-N.json
VNQXSTWCDUXYEZ-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/VNQXSTWCDUXYEZ-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/VNQXSTWCDUXYEZ-UHFFFAOYSA-N.json
BINIYSYTGGGFLC-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/BINIYSYTGGGFLC-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/BINIYSYTGGGFLC-UHFFFAOYSA-N.json
FRNWBAJBSISRNI-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/FRNWBAJBSISRNI-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/FRNWBAJBSISRNI-UHFFFAOYSA-N.json
TXWSMSPRFREESP-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/TXWSMSPRFREESP-UHFFFAOYSA-N.json
https:/

HUOHZWBSCYBQDH-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/HUOHZWBSCYBQDH-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/HUOHZWBSCYBQDH-UHFFFAOYSA-N.json
SZVNKXCDJUBPQO-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/SZVNKXCDJUBPQO-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/SZVNKXCDJUBPQO-UHFFFAOYSA-N.json
PXQXFZBYTZILJD-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/PXQXFZBYTZILJD-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/PXQXFZBYTZILJD-UHFFFAOYSA-N.json
XVYSFJOWUXMIEN-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/XVYSFJOWUXMIEN-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/XVYSFJOWUXMIEN-UHFFFAOYSA-N.json
IJBTXFFPFVNDAG-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/IJBTXFFPFVNDAG-UHFFFAOYSA-N.json
https://gnps-classyfire.ucsd.edu/entities/IJBTXFFPFVNDAG-UHFFFAOYSA-N.json
GGJRAQULURVTAJ-UHFFFAOYSA-N
https://gnps-classyfire.ucsd.edu/entities/GGJRAQULURVTAJ-UHFFFAOYSA-N.json
https:/

In [None]:


flattened_classified_json = json_normalize(jsondic)
flattened_df = flattened_classified_json.drop_duplicates("inchikey")
flattened_df["inchikey"] = flattened_df["inchikey"].str.replace(
    r"InChIKey=", ""
)
df_merged = pd.merge(
    inchis, flattened_df, left_on="inchikey", right_on="inchikey", how="left"
)
#df_merged.to_csv("check.csv")
for p, rowp in df_merged.iterrows():
    for q, rowq in frame.iterrows():
        if df_merged["smiles_x"][p] is frame["SMILES"][q]:
            if "subclass.name" in df_merged.columns:
                frame.loc[q, "subclass"] = df_merged["subclass.name"][p]
            frame.loc[q, "class"] = df_merged["class.name"][p]
            frame.loc[q, "superclass"] = df_merged["superclass.name"][p]
            frame.loc[q, "ClassificationSource"] = "ClassyFire"

frame.to_csv(resultcsv)