In [4]:
import petl as etl
from collections import OrderedDict, defaultdict
import traceback

In [5]:
# need to configure the source? or assume always a text file for now?
t1 = etl.io.xlsx.fromxlsx(r'c:\data\synth_migration_data.xlsx')
t1 = etl.select(t1, "{Extr} != 'y'")  # filter out extract eg for now

In [6]:
t1

Site,Sample Name,Notes,Comments,Depth,Depth Unit,Method,Extr,TOC,wt,PeakProperty,Pr/Ph,Custom1,Custom2
Well1,S1,blah,…,2400,m,l,,8.6,20.0,Area,0.65,errrm,
Well1,S2,,,2410,m,l,,8.9,21.0,,,,232.0
Well2,S4,but,I'll say it,2215,m,py,,5.5,13.0,Conc Area,0.58,,114.0
Well2,S5-ex,,anyway,2215,m,py,y,5.6,12.1,,,nah,
Well3,S6,-,,4890,f,x,,2.1,,,,,


In [4]:
from collections import namedtuple
from dataclasses import dataclass

@dataclass
class ConversionFor:
    from_uom: str
    from_ratio: str
    to_uom: str
    to_ratio: str

ConversionParams = namedtuple('ConversionParams', ['from_uom', 'from_ratio', 'to_uom', 'to_ratio', 'a', 'b', 'c', 'd'])
co

# template code
def row_mapper(row):
    try:
        #ref_col_src_col_pair_to_dist_ref_col_values
        cache = defaultdict(set)

        def expand(rc_val, src_col, ref_col):
            cache[(src_col, ref_col)].add(rc_val)
            return row[src_col] if row[ref_col] == rc_val else None

        def expand_default(src_col, ref_col):
            # return src col val (for this row) if ref col val for this row has not been mapped
            return row[src_col] if row[ref_col] not in cache[(src_col, ref_col)] else None

        # incl all collapse / value mod funcs incase needed or add on demand?
        def append(cols, descs, sep=':', delim=';'):
            return delim.join([f"{d if d else c}{sep}{row[c]}" 
                               for d, c in zip(descs, cols) if row[c]])

        # TODO (uoms / ratios)
        def convert():
            raise NotImplementedError()

        return [
            # <!*** injected code form here ***> 
            # direct mappings e.g.(mapped col names come later)
            row['Site'],
            row['Sample Name'],

            # do collapse rules
            append(cols=['Notes', 'Comments'], descs=[None, None]),

            row ['Depth'],

            # expansion rule 1 
            expand(rc_val='l', src_col='TOC', ref_col='Method'),  # mapping for TOC.leco
            expand(rc_val='py', src_col='TOC', ref_col='Method'),  # mapping for TOC.pyrol
            expand_default(src_col='TOC', ref_col='Method'),  

            # expansion rule 2 
            expand(rc_val='l', src_col='wt', ref_col='Method'),  # mapping for sam wt.leco
            expand(rc_val='py', src_col='wt', ref_col='Method'),  # mapping for sam wt.pyrol
            expand_default(src_col='wt', ref_col='Method'), 

            # exp rule n...
            expand(rc_val='Area', src_col='Pr/Ph', ref_col='PeakProperty'),  # mapping for pr/ph[a]
            expand(rc_val='Conc Area', src_col='Pr/Ph', ref_col='PeakProperty'),  # mapping for sam wt.pyrol
            expand_default(src_col='Pr/Ph', ref_col='PeakProperty'), 
            
            row['Custom1'],
            row['Custom2'],

            ## TODO -
            #  * Value Mods
            #  * Remaining collapse rules (pref, add)
            #  * Expand on multiple elements e.g. anal & ind;
            #  * comb collapse / exp / mod  (work out degree neeeded if very difficult) !?!?!
            #  * ? support nested case - caution -> need to support on server as well if we do?
        ]
    except :
        traceback.print_exc()  # log as failure are silent otherwise

# <!*** inject headers ***>
# build list of headers based on expanded col names in IGI prop model (to be dynamically generated)
headers = ['Well Name.Well', 'Name.Sam', 
           'Comments.Sam', 
           'Base MD.Sam', 
           'TOC.leco', 'TOC.pyrol', 'TOC.un', 
           'sam wt.leco', 'sam wt.pyrol','sam wt.un',
           'Pr/Ph[a].Sat-GC', 'Pr/Ph[ca].Sat-GC', 'Pr/Ph[un].Sat-GC', 
           'Custom1', 'Custom2']  # user properties from p:IGI3 may stay unmapped for user to map in p:IGI+
t2 = etl.rowmap(t1, row_mapper, header=headers)
t2

Well Name.Well,Name.Sam,Comments.Sam,Base MD.Sam,TOC.leco,TOC.pyrol,TOC.un,sam wt.leco,sam wt.pyrol,sam wt.un
Well1,S1,Notes:blah;Comments:…,2400,8.6,,20.0,20.0,,
Well1,S2,,2410,8.9,,21.0,21.0,,
Well1,S3,Notes:not much;Comments:to say,2420,9.5,,28.0,28.0,,
Well2,S4,Notes:but;Comments:I'll say it,2215,,5.5,13.0,,13.0,
Well2,S5-ex,Comments:anyway,2215,,5.6,12.1,,12.1,
