In [1]:
# default_exp field_mapping

# Field Mapping

A big problem with the different neuro measures is that everyone seems to have their own field names for the same measurement. This module is a small collection of utilities for converting between internal and external fields.

In [2]:
# export

import pandas as pd

In [12]:
# export

class FieldMapper(object):
    
    def __init__(self, mapping_df):
        
        self.mapping_df = mapping_df
        
    @staticmethod
    def from_file(path):
        
        if path.endswith('csv'):
            mapping_df = pd.read_csv(path)
        elif path.endswith('tsv'):
            mapping_df = pd.read_csv(path, sep = '\t')
        elif path.endswith('xlsx'):
            mapping_df = pd.read_excel(path)
        else:
            raise ValueError(f'Could not understand {path}')
        
        return FieldMapper(mapping_df)
    
    def convert(self, data, source_column, target_column):
        
        assert source_column in self.mapping_df
        assert target_column in self.mapping_df
        
        source_fields = sorted(set(self.mapping_df[source_column].dropna()))
        target_fields = sorted(set(self.mapping_df[target_column].dropna()))
        
        id_df = self.mapping_df[[source_column, target_column]].dropna()
        id_dict = dict(row.values for _, row in id_df.iterrows())
        
        mapped_data = data[source_fields].rename(columns = id_dict)
        #print(target_fields)
        mapped_data = mapped_data.reindex(target_fields, axis=1)
        
        return mapped_data
        
        

In [13]:
map_df = pd.DataFrame([{'internal_field': 'fieldA', 'redcap_id': 'field_a', 'cnns_id': 'fielda'},
                       {'internal_field': 'fieldB', 'redcap_id': 'field_b', 'cnns_id': 'fieldb'},
                       {'internal_field': 'fieldC', 'redcap_id': None, 'cnns_id': 'fieldc'},
                       {'internal_field': 'fieldD', 'redcap_id': 'field_d', 'cnns_id': None},])
field_data = pd.DataFrame([{'fieldA': 1, 'fieldB': 2, 'fieldC':3, 'fieldD': 4},
                           {'fieldA': 1, 'fieldB': 2, 'fieldC':3, 'fieldD': 4},
                           {'fieldA': 1, 'fieldB': 2, 'fieldC':None, 'fieldD': None},
                           {'fieldA': 1, 'fieldB': None, 'fieldC':3, 'fieldD': 4}])
field_data

Unnamed: 0,fieldA,fieldB,fieldC,fieldD
0,1,2.0,3.0,4.0
1,1,2.0,3.0,4.0
2,1,2.0,,
3,1,,3.0,4.0


In [14]:
mapper = FieldMapper(map_df)

out = mapper.convert(field_data, 'internal_field', 'redcap_id')
assert (out.columns == ['field_a', 'field_b', 'field_d']).all()
assert (out['field_a'] == 1).all()
out

redcap_id,field_a,field_b,field_d
0,1,2.0,4.0
1,1,2.0,4.0
2,1,2.0,
3,1,,4.0


In [17]:
mapper.convert(field_data, 'internal_field', 'cnns_id')

cnns_id,fielda,fieldb,fieldc
0,1,2.0,3.0
1,1,2.0,3.0
2,1,2.0,
3,1,,3.0
