In [1]:
# default_exp operators

from nbdev.showdoc import show_doc


In [2]:
#hide
#export
import pandas as pd
import numpy as np
import numexpr as ne

# Operators

In order to be able to capture the transformations required to normalize neuro data with different styles
we'll need a collection of operations to manage them.
In the abstract, these operations should manage grabbing the relevant data and then serving the result back in a standardized form.

In [3]:
#hide
#export
class AbstractOperation(object):

    fields = []
    result_fields = []

    @staticmethod
    def from_config(config):
        op_classes = [EquationOp,
                      AggregationOp,
                      ClipOp,
                      NormativeLookupOp,
                      CategoricalOp,
                      BinnedScalingOp,
                      EquationFilterOp]

        for op_class in op_classes:
            op = op_class.from_config(config)
            if op is not None:
                return op
        raise NotImplementedError(f'Did not understand type: {config["type"]}')
        #return None

    def process_single(self, row):
        raise NotImplementedError

    def explain(self, row):
        raise NotImplementedError
        
    def to_config(self):
        raise NotImplementedError


    def to_series(self, row):

        series = pd.Series(dict((field, row.get(field)) for field in self.fields))
        return series

    def __call__(self, row):

        res = self.process_single(row)
        yield self.result_fields[0], res

In [4]:
#hide

abs_op = AbstractOperation()
assert type(abs_op) == AbstractOperation

abs_op.fields = ['t1', 't2']
test_data = {'t1': 1, 't2': 2, 'other': 3}
ser = abs_op.to_series(test_data)
assert (ser['t1'] == 1) & (ser['t2'] == 2) & ('other' not in ser)


Again, we'll uss the BVMT test as the example.
But we're going to back up a step. Since there are a bunch of different intermediate values, I want to calculate those using operations.

Measured Values:
 - `trial1` - Trial 1 successes
 - `trial2` - Trial 2 successes
 - `trial3` - Trial 3 successes
 - `delay` - Delayed Successes
 - `hits` - Successful recognitions with distractors
 - `false_pos` - False-positive recognitions

Our goal is to define all of the operations required to calculated intermediate values (ie immediate)
as well as scaled values.

There are three derived values to calculate:
  - `immediate`: the sum of the three trials
  - `regonition`: the number of hits - false-positive recognitions
  - `retention`: ratio of delayed successes and largest of the trial 2 & trial 3 successes

The first two can be solved with basic equations.
The third will require an additional strategy effort.

## Basic Equations

In [5]:
#export
class EquationOp(AbstractOperation):
    "Manipulate values with 1numexpr1 equations."

    def __init__(self, out_field, equation, fields):
        """

        Parameters
        ----------
        out_field : str
        equation : str
        fields : list[str]
        """

        self.fields = fields
        self.equation = equation
        self.result_fields = [out_field]

    @staticmethod
    def from_config(config):
        """

        Expecting yaml of the format:
          type: equation
          equation: "hits-false_pos"
          fields: ['hits', 'false_pos']
          out_field: 'recognition'

        Parameters
        ----------
        config : dict

        Returns
        -------
        EquationOp
        """
        if config['type'] == 'equation':
            return EquationOp(config['out_field'],
                              config['equation'],
                              config['fields'])
        return None
    
    def to_config(self):
        
        return {'type': 'equation',
                'out_field': self.result_fields[0],
                'equation': self.equation,
                'fields': self.fields}
    
    

    def explain(self, row):
        """

        Parameters
        ----------
        row : dict,pd.Series

        Returns
        -------
        str
        """

        res = self.process_single(row)
        return f'Used Equation: {self.equation} = {res} = {self.result_fields[0]}'

    def process_single(self, row):
        """ Apply the equation to the row

        Parameters
        ----------
        row : mapping

        Returns
        -------
        float

        """

        data = self.to_series(row)
        #print(data)
        if data.notnull().all():
            res = pd.eval(self.equation, local_dict=data.to_dict())
        else:
            res = np.nan
        return res

Let's imagine a individual to test.

Measured Values:
 - `trial1` - 5
 - `trial2` - 6
 - `trial3` - 7
 - `delay` - 8
 - `hits` - 6
 - `false_pos` - 2
 - `copy` - 12

Using the `EquationOp` let's calculate `immediate` and `recognition`.

In [18]:
DATA = {'bvmt_trial1': 5, 'bvmt_trial2': 6, 'bvmt_trial3': 7,
        'bvmt_delay': 8, 'bvmt_hits': 6, 'bvmt_false_pos': 2,
        'bvmt_copy': 12}

total_op = EquationOp('bvmt_immediate',
                      'bvmt_trial1+bvmt_trial2+bvmt_trial3',
                      ['bvmt_trial1', 'bvmt_trial2', 'bvmt_trial3'])
immed = total_op.process_single(DATA)
assert immed == 18
DATA['bvmt_immediate'] = immed

We can also `explain` the result using the method.

In [19]:
print(total_op.explain(DATA))

Used Equation: bvmt_trial1+bvmt_trial2+bvmt_trial3 = 18 = bvmt_immediate


While one might construct these operations in Python code, I actually expect most things to be saved as yaml.
So, we need a way to represent this info in that format.
This is also useful when constructing larger sets.

Here's the yaml example for the recognition calculation.
```
type: equation
equation: "hits-false_pos"
fields: ['hits', 'false_pos']
out_field: 'recognition'
```

In [21]:
import yaml

st = """
type: equation
equation: "bvmt_hits-bvmt_false_pos"
fields: ['bvmt_hits', 'bvmt_false_pos']
out_field: 'bvmt_recognition'
"""

ret_op = EquationOp.from_config(yaml.full_load(st))
recog = ret_op.process_single(DATA)

assert recog == 4
print(ret_op.explain(DATA))
DATA['bvmt_recognition'] = recog


Used Equation: bvmt_hits-bvmt_false_pos = 4 = bvmt_recognition


## Aggregation Operations

Due to limitations in numexpr, it cannot choose the largest of two numbers, as needed for retention.
So, we use an `AggregationOp`.


In [9]:
#export

class AggregationOp(AbstractOperation):

    def __init__(self, out_field, aggregation, fields):
        """

        Parameters
        ----------
        out_field : str
        aggregation : str
        fields : list[str]
        """

        self.fields = fields
        self.aggregation = aggregation
        self.result_fields = [out_field]

    @staticmethod
    def from_config(config):
        """
        Load from config. Expects:
            type: agg
            method: 'max'
            fields: ['trial2', 'trial3']
            out_field: retention_denom

        Parameters
        ----------
        config : dict

        Returns
        -------
        AggregationOp
        """
        if config['type'] == 'agg':
            return AggregationOp(config['out_field'],
                                 config['method'],
                                 config['fields'])
        return None
    
    def to_config(self):
        
        return {'type': 'agg',
                'out_field': self.result_fields[0],
                'method': self.aggregation,
                'fields': self.fields}
    

    def explain(self, row):
        res = self.process_single(row)
        return f'Aggregation: {self.aggregation} [{", ".join(self.fields)}]  = {res}'

    def process_single(self, row):
        data = self.to_series(row)
        return data.agg(self.aggregation)

In [22]:
ret_denom_op = AggregationOp('bvmt_retention_denom', 'max', ['bvmt_trial2', 'bvmt_trial3'])
re_denom = ret_denom_op.process_single(DATA)

assert re_denom == 7
print(ret_denom_op.explain(DATA))
DATA['bvmt_retention_denom'] = re_denom

Aggregation: max [bvmt_trial2, bvmt_trial3]  = 7


Now that we have the denominator we can do another equation to calculate recognition.

In [23]:
retent_op = EquationOp('bvmt_retention', 'bvmt_delay/bvmt_retention_denom', ['bvmt_delay', 'bvmt_retention_denom'])
retent = retent_op.process_single(DATA)

assert retent == 8/7
print(retent_op.explain(DATA))
DATA['bvmt_retention'] = retent

Used Equation: bvmt_delay/bvmt_retention_denom = 1.1428571428571428 = bvmt_retention


Dang, the value was above 100%. And sometimes it may be negative.
By convention we clip these to a [0,1] scale, which we'll need an operation for that.


In [24]:
#export
class ClipOp(AbstractOperation):

    def __init__(self, field, lower = 0, upper=1):
        """

        Parameters
        ----------
        field : str
        lower : float
        upper : float
        """

        self.fields = [field]
        self.lower = lower
        self.upper = upper
        self.result_fields = [field]

    @staticmethod
    def from_config(config):
        """
        Load from config. Expects:
            type: clip
            field: retention
            lower: 0
            upper: 1
        Parameters
        ----------
        config

        Returns
        -------
        ClipOp

        """
        if config['type'] == 'clip':
            return ClipOp(config['field'],
                          lower = config['lower'],
                          upper = config['upper'])
        return None
    
    def to_config(self):
        
        return {'type': 'clip',
                'lower': self.lower,
                'upper': self.aggregation,
                'field': self.fields[0]}

    def explain(self, row):
        return f'Clipped {self.fields[0]} to [{self.lower}, {self.upper}]'

    def process_single(self, row):

        data = self.to_series(row)
        clipped = data.clip(lower=self.lower, upper=self.upper)
        return clipped[self.result_fields[0]]

In [25]:
ret_clip_op = ClipOp('bvmt_retention', lower=0, upper=1)
ret_clip = ret_clip_op.process_single(DATA)

assert ret_clip == 1
print(ret_clip_op.explain(DATA))
DATA['bvmt_retention'] = ret_clip

Clipped bvmt_retention to [0, 1]


Now we've calculated all of the intermediate values.
We'll need to use these values, along with demographic data, to lookup a "healthy" normal distribution.

## Normative Lookups

These lookup tables are composed of lookup tables that index a mean and std given a demographic filter.
For example:

A 32 year old should have an `immediate` memory of 26.9 with a std of 4.6.
Our example has an `immediate` of 18, a deficit of 9, roughly 2 stds.

We'll need to make an object to contain the logic of matching and filtering each "element" of the table. This will be useful to deal with many different types of normative lookups.

In [59]:
# export
# hide


class AbstractNormative(object):
    
    flt = None
    
    def to_config(self):
        raise NotImplementedError
        
    @staticmethod
    def from_config(config):
        for cl in [MeanStdNormative, LookupNormative]:
            obj = cl.from_config(config)
            if obj is not None:
                return obj
        raise ValueError(f'Could not understand config: {config}')
        
    
    def explain(self, value): raise NotImplementedError
    
    def scale(self, data): raise NotImplementedError
    
    
    def is_valid(self, data):
        return ne.evaluate(self.flt, local_dict=data)



This `MeanStdNormative` represents a table where a mean/std is provided for healthy individuals for a given filter criteria. Each element of the table is one `MeanStdNormative` object and the `NormativeLookupOp` manages a list of these objects.

In [66]:
# export

class MeanStdNormative(AbstractNormative):
    """Deal with mean/std scaled -> percentile lookup tables"""
    
    def __init__(self, flt, mean, std):
        self.flt = flt
        self.mean, self.std = mean, std
    
    @staticmethod
    def from_config(config):
        if ('mean' in config) and ('std' in config):
            return MeanStdNormative(config['filter'], 
                                    config['mean'], 
                                    config['std'])
        return None
    
    def to_config(self):
        
        return {'type': 'mean_std', 
                'filter': self.filter, 
                'mean': self.mean, 'std': self.std}
    
    def scale(self, value):
        return (value-self.mean)/self.std
    
    def explain(self, value):
        
        scaled = self.scale(value)
        matched = f'Matched: {self.flt}'
        calculated = f'Expected {self.mean}+/-{self.std} but observed {value}'
        result = f'Scaled to: z={scaled}'
        return '\n'.join([matched, calculated, result])

In [63]:
#export

class NormativeLookupOp(AbstractOperation):
    """Lookup table with normalized scores."""

    def __init__(self, lookup_table, filter_cols, measure_col, out_name):

        self.lookup_table = lookup_table
        self.filter_cols = filter_cols
        self.fields = filter_cols + [measure_col]
        self.result_fields = [out_name]
        self.measure_col = measure_col

    @staticmethod
    def from_config(config):

        if config['type'] == 'normative_lookup':

            return NormativeLookupOp([AbstractNormative.from_config(row) for row in config['table']],
                                     config['filter_cols'],
                                     config['measure_col'],
                                     config['out_name'])
        return None
    
    def to_config(self):
        
        return {'type': 'normative_lookup',
                'table': [norm.to_config() for norm in self.lookup_table],
                'filter_cols': self.filter_cols,
                'measure_col': self.measure_col,
                'out_name': self.result_fields[0]}

    def lookup_norm(self, row):

        data = self.to_series(row)
        for norm in self.lookup_table:
            if norm.is_valid(data):
                return norm

        return None

    def explain(self, row):

        norm = self.lookup_norm(row)
        data = self.to_series(row)

        if norm is None:
            return f'{self.result_fields[0]}: Could not find matching filter for {data[self.filter_cols]}'
        else:
            return norm.explain(data[self.measure_col])

    def process_single(self, row):

        data = self.to_series(row)
        norm = self.lookup_norm(data)
        if norm is None:
            return np.nan
        r = norm.scale(data[self.measure_col])
        return r

While it is possible to create these in Python, it's much easier to build using yaml definitions.

```
type: normative_lookup
measure_col: immediate
filter_cols: ['age']
out_name: 'heaton_immediate'
table:
  - filter: (18 <= age) & (age <= 21)
    mean: 28.74
    std: 4.32
  - filter: (20 <= age) & (age <= 23)
    mean: 28.44
    std: 4.38
  ...

```

The filters are anything acceptable to `pd.eval`.
The Heaton norms for the BVMT are currently in `data/norms/from_kate/heaton_bvmt.yaml`

In [65]:
DATA['age'] = 32
bvmt_config = yaml.full_load(open('data/norms/from_kate/heaton_bvmt.yaml'))

lookup_op = NormativeLookupOp.from_config(bvmt_config['operations'][0])
lookup_score = lookup_op.process_single(DATA)

assert lookup_score == -1.9224137931034488
print(lookup_op.explain(DATA))


Matched: (30 <= age) & (age <= 33)
Expected 26.92+/-4.64 but observed 18
Scaled to: z=-1.9224137931034488


The collection of these operators can be combined into a `TestCalculator` which manages applying these operations sequentially.
That is discussed elsewhere.

These features are sufficient for any analysis that requires looking up normalizations based on demographic information.
However, for regression based norms like the `Norman` set we need a further collection of operators.

## Regression Based Norms

When doing regression based normalization the first step is to `scale` the raw values based on a set of bins.
This is done to help _normalize_ the raw values before entering the regression equation.

For example. When scaling the `delay` column, the `norman` scheme uses:

| Raw | Scaled |
|-----|--------|
| 12  | 14
| 11  | 11
| 10  | 9
| 9   | 8
| 8   | 7
| 7   | 6
| 5   | 5
| 4   | 4
| 3   | 3
| 0   | 2

The `BinnedScalingOp` can be used to deal with these conditions.

In [29]:
# export

class BinnedScalingOp(AbstractOperation):
    def __init__(self, bins, measure_col, out_field = None):


        self.fields = [measure_col]
        if out_field is None:
            self.result_fields = [measure_col+'_scaled']
        else:
            self.result_fields = [out_field]
        self.bins = sorted(bins, key = lambda x: x['min'],
                           reverse=True)

    @staticmethod
    def from_config(config):
        """
        Build from config, Expecting yaml like:
          type: binned_scaling
          measure_col: delay
          bins:
            - scaled: 14
              min: 12
            - scaled: 11
              min: 11
            - scaled: 9
              min: 10

        Parameters
        ----------
        config

        Returns
        -------
        BinnedScalingOp

        """

        if config['type'] == 'binned_scaling':
            return BinnedScalingOp(config['bins'],
                                   config['measure_col'])
        return None
    
    def to_config(self):
        
        return {'type': 'binned_scaling',
                'bins': self.bins,
                'measure_col': self.measure_col}

    def lookup_bin(self, row):

        data = self.to_series(row)
        val = data[self.fields[0]]
        if val == val:
            for bin in self.bins:
                if val >= bin['min']:
                    return bin['min'], bin['scaled']
            return np.nan, np.nan
        else:
            return np.nan, np.nan

    def explain(self, row):

        edge, scaled = self.lookup_bin(row)

        if edge != edge:
            data = self.to_series(row)
            return f'Could not find matching bin for {data[self.fields[0]]}'
        else:
            return f'{self.fields[0]} matched {edge}, scaled to {scaled}'

    def process_single(self, row):
        _, res = self.lookup_bin(row)
        return res

In [30]:
bins = [{'min': 12, 'scaled': 14},
        {'min': 11, 'scaled': 11},
        {'min': 10, 'scaled': 9},
        {'min': 9, 'scaled':  8},
        {'min': 8, 'scaled': 7},
        {'min': 7, 'scaled': 6},
        {'min': 5, 'scaled': 5},
        {'min': 4, 'scaled': 4},
        {'min': 3, 'scaled': 3},
        {'min': 0, 'scaled': 2}]

scale_op = BinnedScalingOp(bins, 'bvmt_delay')
delay_scaled = scale_op.process_single(DATA)

assert delay_scaled == 7
print(scale_op.explain(DATA))

DATA['bvmt_delay_scaled'] = delay_scaled

bvmt_delay matched 8, scaled to 7


Now that we have the scaled value we need to handle the demographic variables.
For the `norman` set Male gender is set to 1 with females as 0.
The race also needs to be converted with white = 0 and AA = 1.


In [31]:
#export

class CategoricalOp(AbstractOperation):

    def __init__(self, measure_col, mapping, out_col):

        self.result_fields = [out_col]
        self.mapping = mapping
        self.fields = [measure_col]

    @staticmethod
    def from_config(config):
        """
        Build from config. Expects yaml like:
          type: categorical
          in_field: gender
          out_field: norman_gender
          mapping:
            male: 0
            female: 1

        Parameters
        ----------
        config : dict

        Returns
        -------

        """

        if config['type'] == 'categorical':
            return CategoricalOp(config['in_field'],
                                 config['mapping'],
                                 config['out_field'])
        return None
    
    def to_config(self):
        
        return {'type': 'categorical',
                'in_field': self.fields[0],
                'mapping': self.mapping,
                'out_field': self.result_fields[0]}

    def lookup(self, row):

        data = self.to_series(row)
        return self.mapping.get(data[self.fields[0]])

    def process_single(self, row):

        return self.lookup(row)

    def explain(self, row):

        res = self.lookup(row)
        if res is not None:
            return f'{self.fields[0]}:{row[self.fields[0]]} -> {self.result_fields[0]}:{res}'
        else:
            return f'Could not match {self.fields[0]}:{row[self.fields[0]]}'

In [32]:
cat_op = CategoricalOp('gender', {'male': 0, 'female': 1}, 'norman_gender')
DATA['gender'] = 'male'

norman_gender = cat_op.process_single(DATA)

assert norman_gender == 0
print(cat_op.explain(DATA))
DATA['norman_gender'] = norman_gender

gender:male -> norman_gender:0


Now for the big finale, regression based norms.
After scaling the relevant data and handling categorical variables we need to apply an equation.
However, the equation changes depending on the individual's demographic variables.
One for african americans, one for caucasians, and a different one for spanish speakers.
The `EquationFilterOp` takes care of these intricacies.

In [33]:
#export
class EquationFilterOp(AbstractOperation):

    def __init__(self, fields, regressions, out_field, result_type = 'zscale'):

        self.regressions = regressions
        self.fields = fields
        self.result_fields = [out_field]
        self.result_type = result_type

    @staticmethod
    def from_config(config):
        if config['type'] == 'equation_filter':
            return EquationFilterOp(config['fields'],
                                    config['equations'],
                                    config['out_field'],
                                    result_type = config['result_type'])

        return None
    
    def to_config(self):
        
        return {'type': 'equation_filter',
                'fields': self.fields,
                'equations': self.regressions,
                'result_type': self.result_type,
                'out_field': self.result_fields[0]}

    def search_filters(self, row):

        data = self.to_series(row)
        check_func = lambda reg: pd.eval(reg['filter'], local_dict=data.to_dict())
        return [reg for reg in self.regressions if check_func(reg)]

    def scale_data(self, row):

        data = self.to_series(row)
        hits = self.search_filters(row)
        if hits: #Currently only implementing "first"
            reg = hits[0]
            val = pd.eval(reg['norm'], local_dict=data.to_dict())

            if (self.result_type == 'standard_score') | (self.result_type == 'tscore'):
                val = (val - 50)/10
            elif (self.result_type  == 'zscore') | (self.result_type  == 'zscale'):
                pass
            elif self.result_type == 'other':
                pass
            else:
                raise ValueError(f'Did not understand result_type: {self.result_type}')

            return reg, val
        return None, None


    def explain(self, row):

        data = self.to_series(row)
        reg, val = self.scale_data(row)

        if reg is None:
            return 'Could not find a match for regression normalization.'
        else:
            return f'Matched {reg["filter"]}, applied {reg["norm"]} = {float(val)}'


    def process_single(self, row):

        _, val = self.scale_data(row)
        return val

These are best explained through their yaml imports.
Examine the `data/norms/norman/norman_bvmt_regnorm.yaml` for a complete example.


# Multidemensional Lookups

Some set of Heaton Norms (Grooved Peg, trails, etc) require a multi-step lookup. The original tables look like this:

| 5 | 4 | 3 | 2 | 1 | Education | Gender | Age
|---|---|---|---|---|-----------|--------|----
| 100 | 80 | 70 | 50 | 10 | 0 | 0 | 0
| 100 | 85 | 72 | 48 | 11 | 0 | 1 | 0
| 100 | 82 | 75 | 42 | 15 | 0 | 0 | 1
| 100 | 82 | 75 | 42 | 15 | 0 | 1 | 1

Where the first columns refer to Scaled scores and the last columns refer to categorical features. In this case, the intent would be to get the appropriate scaled score for the test, look to the row that corresponds with the patient's Education, Gender, and Age.

These are actually just more cases of `NormativeLookupOp` and `MeanStdNormative`. So, the approach is the same.

A general `LookupNormative` class takes a mapping dictionary that maps raw values to scaled values. These can then be post-processed using `post`. This can convert _standard scores_ and _percentiles_ to _z-scales_.

In [95]:
# export
import scipy.stats as st

class LookupNormative(AbstractNormative):
    """Deal with scaled -> percentile lookup tables"""
    
    def __init__(self, flt, mapping, post = None):
        self.flt = flt
        self.mapping = mapping # A dict {'raw': 'scaled', 'raw': 'scaled'}
        self.post = post
        
    @staticmethod
    def from_config(config):
        if ('mapping' in config):
            return LookupNormative(config['filter'], 
                                   config['mapping'], 
                                   post = config.get('post', None))
        return None
    
    def to_config(self):
        
        return {'type': 'lookup', 
                'filter': self.filter, 
                'mapping': self.mapping,
                'post': self.post}
    
    def scale(self, value):
        try:
            scaled = self.mapping[value]
        except KeyError:
            return np.nan
        
        if self.post == 'leave':
            pass
        elif self.post == 'ss2z':
            # standard (50/10) to Z (0/1)
            scaled = (scaled - 50)/10
        elif self.post == 'percentile2z':
            scaled = st.norm.ppf(scaled/100)        
            
        return scaled
        
    def explain(self, value):
        
        try:
            mapped = self.mapping[value]
        except KeyError:
            mapped = 'missing'
        scaled = self.scale(value)
        
        matched = f'Matched: {self.flt}'
        calculated = f'Mapped to: {mapped}'
        if (self.post is None) or (self.post == 'leave'):
            result = ''
        else: result = f'Scaled to: z={scaled}'
            
        return '\n'.join([matched, calculated, result])

This `MultiLookupOp` is just a light wrapper around the `NormativeLookupOp` that implements some helper features to create these items from "Kate Excel Sheet" format.

In [97]:
# export

class MultiLookupOp(NormativeLookupOp):
    
    def __init__(self, lookup_table, filter_cols, measure_col, out_name):

        self.lookup_table = lookup_table
        self.filter_cols = filter_cols
        self.fields = filter_cols + [measure_col]
        self.result_fields = [out_name]
        self.measure_col = measure_col
    
    def incoperate(self, other):
        
        self.lookup_table += other.lookup_table
    
        
    @staticmethod
    def from_sheet_format(path, filter_mappings, filter_cols,
                          measure_col, out_name, post = 'ss2z', extra_filter = None):
        
        data = pd.read_csv(path)
        scale_cols = [col for col in data.columns if col not in filter_mappings]
        
        lookup_table = []
        
        for _, row in data.iterrows():
            flt = []
            for col, mapping in filter_mappings.items():
                flt.append(mapping[row[col]])
            flt = ' & '.join(f'({fl})' for fl in flt)
            
            if extra_filter is not None:
                flt = f'({extra_filter}) & ({flt})'
                                    
            lookup_table.append(LookupNormative(flt,
                                                dict((int(sc), row[sc]) for sc in scale_cols),
                                                post = post))
        
        return MultiLookupOp(lookup_table, filter_cols,
                             measure_col, out_name)
                            
        

In [110]:
# export
# hide

HEATON_MAPPINGS = {'Age':{1: '(0 < age) & (age < 35)',
                          2: '(35 <= age) & (age < 40)',
                          3: '(40 <= age) & (age < 45)',
                          4: '(45 <= age) & (age < 50)',
                          5: 'age >= 50'},
                   'Education':{1: '(0 < education) & (education <= 9)',
                                2: '(9 < education) & (education <= 12)',
                                3: '(12 < education) & (education <= 13)',
                                4: '(13 < education) & (education <= 16)',
                                5: '(16 < education) & (education <= 18)',
                                6: '(education > 18)',
                               },
                   'Gender': {1: 'heaton_gender == 1',
                              2: 'heaton_gender == 2'}}

In [111]:
multi_op = MultiLookupOp.from_sheet_format('data/norms/from_kate/sheets/GPD.csv',
                                           HEATON_MAPPINGS,
                                           ['age', 'heaton_gender', 'education'],
                                           'grooved_peg_dom_scaled',
                                           'grooved_peg_dom_heaton')

In [112]:
print(multi_op.explain({'age': 45, 'heaton_gender': 1, 'education': 13,
                        'grooved_peg_dom_scaled': 11}))

Matched: ((45 <= age) & (age < 50)) & ((12 < education) & (education <= 13)) & (heaton_gender == 1)
Mapped to: 57
Scaled to: z=0.7
