In [50]:
import os
import xlrd
import pprint
import sys
import copy
pp = pprint.PrettyPrinter()

In [51]:
home_folder = '/home/simon'

In [52]:
sys.path.append(os.path.join(home_folder,'git/lipid_prototype'))
sys.path.append(os.path.join(home_folder,'git/mass-spec-utils'))
sys.path.append(os.path.join(home_folder,'git/pymzm'))

In [53]:
excel_folder = os.path.join(home_folder,'git/lipid_prototype/excel_input_files')
data_path = os.path.join(home_folder,'data/lipid_files/lipid_files/Condition_1/Neg')

In [54]:
PERMITTED_HEADS = {'name','formula','ion',
                   'ion_mz (optional)',
                   'mz_tolerance_ppm (optional)',
                   'ion rt (seconds)',
                   'ion rt tol (seconds)',
                   'files to exclude',
                   'max_iso_n'}

DEFAULT_PARAMS = {'mz_tolerance': 0.01,
                  'mz_tolerance_units': 'abs', # or ppm
                  'scan_delta': 2,
                  'max_iso_n': 5}

In [55]:
def read_heads(sheet_object,permitted_vals = PERMITTED_HEADS):
    col_pos = 0
    row_pos = 0
    head_dict = {}
    for col_pos in range(sheet_object.ncols):
        heads_val = sheet_object.cell_value(row_pos,col_pos)
        heads_val = heads_val.lower()
        if heads_val in permitted_vals:
            head_dict[heads_val] = col_pos
        else:
            print("Warning: head {} not permitted".format(heads_val))
    return head_dict

def load_lipids(sheet_object,permitted_vals = PERMITTED_HEADS):
    head_dict = read_heads(sheet_object,permitted_vals = permitted_vals)
#     print(head_dict)
    lipids = {}
    for row_pos in range(sheet_object.nrows):
        if row_pos == 0:
            continue # skip the headings row
        lipid_name = sheet_object.cell_value(row_pos,head_dict['name'])
        lipids[lipid_name] = {}
        for key,col in head_dict.items():
            data_val = sheet_object.cell_value(row_pos,col)
            if type(data_val) == str and len(data_val) == 0:
                continue # blank value
            else:
                lipids[lipid_name][key] = data_val
                
        if 'mz_tolerance_ppm (optional)' in lipids[lipid_name]:
            # fix the metadata to be more general
            lipids[lipid_name]['mz_tolerance'] = lipids[lipid_name]['mz_tolerance_ppm (optional)']
            lipids[lipid_name]['mz_tolerance_units'] = 'ppm'
        force_int = ['max_iso_n','scan_delta']
        for fo in force_int:
            if fo in lipids[lipid_name]:
                # ensure this is an int
                lipids[lipid_name][fo] = int(lipids[lipid_name][fo])
    return lipids
        
    
def load_files(sheet_object):
    assert sheet_object.cell_value(0,0).lower() == 'filename', print("First column in files sheet must be headed filename")
    assert sheet_object.cell_value(0,1).lower() == 'timepoint', print("Second column in files sheet must be headed timepoint")
    time_points = []
    for row_pos in range(1,sheet_object.nrows):
        file_name = sheet_object.cell_value(row_pos,0)
        timepoint = sheet_object.cell_value(row_pos,1)
        time_points.append((file_name,float(timepoint)))
    time_points.sort(key = lambda x: x[1])
    return time_points

def load_data(workbook_object,permitted_vals = PERMITTED_HEADS):
    lipid_sheet = workbook_object.sheet_by_name('lipids')
    lipids = load_lipids(lipid_sheet,permitted_vals = permitted_vals)
    files_sheet = workbook_object.sheet_by_name('files')
    time_points = load_files(files_sheet)
    
    parameters = copy.deepcopy(DEFAULT_PARAMS)
    
    force_int = ['max_iso_n','scan_delta']
    try:
        parameter_sheet= workbook_object.sheet_by_name('parameters')
        for row_pos in range(1,parameter_sheet.nrows):
            parameter_name = parameter_sheet.cell_value(row_pos,0)
            parameter_value = parameter_sheet.cell_value(row_pos,1)
            if not parameter_name in parameters:
                print("PARAMETER NOT FOUND: {}".format(parameter_name))
                print("\tAllowed names are: {}".format(",".join([str(k) for k in parameters.keys()])))
            else:
                parameters[parameter_name] = parameter_value
                if parameter_name in force_int:
                    parameters[parameter_name] = int(parameter_value)
                print("Set {} to {}".format(parameter_name,parameters[parameter_name]))
        
    except xlrd.biffh.XLRDError as e:
        # no sheet called parameters, so use defaults
        pass
    
    return lipids,time_points,parameters

In [56]:
excel_file = os.path.join(excel_folder,'Test_input_Condition1Neg_Subset.xlsx')
wb = xlrd.open_workbook(excel_file) 

lipids,time_points,parameters = load_data(wb)
# pp.pprint(lipids)
# pp.pprint(time_points)
print(parameters)

Set max_iso_n to 3
Set mz_tolerance to 0.01
Set mz_tolerance_units to abs
Set scan_delta to 0
{'mz_tolerance': 0.01, 'mz_tolerance_units': 'abs', 'scan_delta': 0, 'max_iso_n': 3}


In [57]:
print(lipids['Pe 37.1'])

{'name': 'Pe 37.1', 'formula': 'C42H82NO8P', 'ion': '[M-H]-', 'ion_mz (optional)': 758.572, 'ion rt (seconds)': 654.0, 'ion rt tol (seconds)': 20.0, 'max_iso_n': 3}


In [58]:
mzml_file_objs = {}

from ms2_matching import MZMLFile
for filename,time in time_points:
    if not '.mzML' in filename:
        load_filename = filename + '.mzML'
    else:
        load_filename = filename
    full_file = os.path.join(data_path,load_filename)
    mzml_file_objs[filename] = MZMLFile(full_file)

  if self.element:
  if b_data_array:
  if self.element:


Loaded 1965 scans
Loaded 1965 scans
Loaded 1965 scans
Loaded 1965 scans
Loaded 1965 scans
Loaded 1965 scans
Loaded 1965 scans


In [59]:
%load_ext autoreload
%autoreload 2
from lipid_kinetics import compute_lipid_kinetics,create_plot
output_dict = {}
for lipid in lipids:
#     if lipid in output_dict:
#         continue
    print(lipid,lipids[lipid]['ion'])
    rt_mean = lipids[lipid]['ion rt (seconds)']
    rt_tol = lipids[lipid]['ion rt tol (seconds)']
    lipids[lipid]['rt_range'] = [rt_mean - rt_tol,rt_mean+rt_tol]
    lipids[lipid]['adduct_type'] = lipids[lipid]['ion']
    output_dict[lipid] = compute_lipid_kinetics(lipid,lipids[lipid],time_points,mzml_file_objs,parameters)
    create_plot(lipid,output_dict[lipid])




The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Pe 37.1 [M-H]-
pc 34:2 [M-H+FA]-
[M-H+CH2O2]- not a valid adduct, trying to parse
Failed to load .csv files. Assuming normal dialect.
[M-H+CH2O2]- not a valid adduct, trying to parse
Failed to load .csv files. Assuming normal dialect.
[M-H+CH2O2]- not a valid adduct, trying to parse
Failed to load .csv files. Assuming normal dialect.
[M-H+CH2O2]- not a valid adduct, trying to parse
Failed to load .csv files. Assuming normal dialect.
[M-H+CH2O2]- not a valid adduct, trying to parse
Failed to load .csv files. Assuming normal dialect.
[M-H+CH2O2]- not a valid adduct, trying to parse
Failed to load .csv files. Assuming normal dialect.
[M-H+CH2O2]- not a valid adduct, trying to parse
Failed to load .csv files. Assuming normal dialect.
[M-H+CH2O2]- not a valid adduct, trying to parse
Failed to load .csv files. Assuming normal dialect.
[M-H+CH2O2]- not a valid adduct, trying to parse
Failed to load .csv fi

In [60]:
from lipid_kinetics import create_xlsx_output
xlsx_output_name = 'condition_1_neg_subset.xlsx'
create_xlsx_output(output_dict,output_filename = xlsx_output_name)

  if self.element:


Writing:  temp_0.png
Writing:  temp_1.png
Writing:  temp_2.png
Writing:  temp_3.png
Writing:  temp_4.png
Writing:  temp_5.png
