# Chemspider Search kernel

In [None]:
from chemspipy import ChemSpider

# search setup
chemspider = ChemSpider('I4J8BJXpTNfTz1TOGxK4fmIBICXMCuyj')
databases =  ['Human Metabolome Database', 'KEGG', 'LipidMAPS']
equal_compound_treshold = 0.001

# Chemspider search functions
def find_compound_info(target_compound_name, target_compound_mass):
    
    TOLERATED_ERROR = target_compound_mass * equal_compound_treshold
    
    # return value dict(database_name -> id_value1;id_value2):
    compound_info = dict()
    for db in databases:
        compound_info[db] = ""
    compound_info['Warnings'] = ""
    
    print("###############################")
    print("Compound: ", target_compound_name)
        
    # search compound by name
    found_compounds = chemspider.search(target_compound_name)
    found_compounds.wait();
    
    if not found_compounds.success():
        compound_info['Warnings'] += "Compound search failed verify api key;"        
        return compound_info
    
    if found_compounds.count == 0:
        compound_info['Warnings'] += "Compound Not found in chemspider search;"

    for result in found_compounds :
        try:
            print("Result: ")
            print("ID: ", result.record_id)
            ##print("Molecular Formula: ", result.molecular_formula)
            print("Monoisotopic Mass: ", result.monoisotopic_mass)
            if target_compound_mass > 0 and abs(result.monoisotopic_mass - target_compound_mass) > TOLERATED_ERROR :
                #print("Mass too diferent, skipping")
                compound_info['Warnings']+= "Skipped csID(" + str(result.record_id) + ") Because mass is outside tolerated Error"
                compound_info['Warnings']+= "Mass Found:" + str(result.monoisotopic_mass) + ";"
                continue # mass too diferent from target, continue search

            # search for external references from known databases
            external_refs = chemspider.get_external_references(result.record_id, databases)

            if len(external_refs) == 0 :
                compound_info['Warnings'] += "No external references found;"

            #print("External References: ", external_refs)
            for ref in external_refs :
                #print("External Source: ", ref['source'], " ID: ", ref['externalId'])
                db_name = ref['source']
                db_comp_id = ref['externalId']            
                compound_info[db_name]+= (db_comp_id + ";")
        except KeyError:
            compound_info['Warnings']+= "Compound csID(" + str(result.record_id) + ") missing needed info;"
        

    return compound_info

# Input Params

In [1]:
import xlrd as excel
import ipywidgets as widgets
from IPython.display import display, Markdown, Latex

filename = widgets.Text(
    value='thermomix_tissue_teste.xlsx',
    placeholder='Copy filename here',
    description='Excel File:',
    disabled=False   
)
display(filename)

Text(value='thermomix_tissue_teste.xlsx', description='Excel File:', placeholder='Copy filename here')

In [None]:
wb = excel.open_workbook(filename.value) 
sheet = wb.sheet_by_index(0) 
  
# For row 0 and column 0 
sheet.cell_value(0, 0) 
column_titles = []
for i in range(sheet.ncols): 
    column_titles.append((sheet.cell_value(0, i), i))
    
compound_name_selection = widgets.Dropdown(
    options=column_titles,
    description='Name',
    disabled=False,
)
compound_mass_selection = widgets.Dropdown(
    options=column_titles,
    description='Mass',
    disabled=False,
)
display(Markdown('**Select Input Columns**'))
display(compound_name_selection, compound_mass_selection)


# Output File

In [None]:
output_file = widgets.Text(
    value='output.xls',
    placeholder='Copy filename here',
    description='Excel File:',
    disabled=False   
)
display(output_file)

In [None]:
### output columns
name_col_dict = dict()
name_col_dict["Compound"] = 0;
name_col_dict["Mass"] = 1;
curr_col = 2

for db_name in databases:
    name_col_dict[db_name] = curr_col;
    curr_col+=1
    
name_col_dict["Warnings"] = curr_col;

#### build excel output
import xlwt 
from xlwt import Workbook 

wb = Workbook() 

# add_sheet is used to create sheet. 
result_sheet = wb.add_sheet('Results')
for col_name in list(name_col_dict):
    result_sheet.write(0, name_col_dict[col_name], col_name)

compound_col = compound_name_selection.value
compound_mass_col = compound_mass_selection.value
for row in range(1, sheet.nrows - 1): 
    #print("###############################################")
    compound_name = sheet.cell_value(row, compound_col)
    compound_mass = float(sheet.cell_value(row, compound_mass_col))
    print("Parsing Compound: ", compound_name)
    compound_info = find_compound_info(compound_name, compound_mass)
    #print("Info: ", compound_info)
    result_sheet.write(row, name_col_dict["Compound"], compound_name)
    result_sheet.write(row, name_col_dict["Mass"], compound_mass)
    for info_name in list(compound_info):
        result_sheet.write(row, name_col_dict[info_name], compound_info[info_name])
    #print("###############################################")
    
wb.save(output_file.value);    