In [1]:
# Import raw LIMS dataset
%run -i "lims_export_v2.py"

In [2]:
#order dependant: 1) export raw lims
df_lims = export_df_from_LIMS()



In [3]:
#order dependant: 2) filter only relavent fields
df_lims = isolate_relavent_data(df_lims)

In [None]:
df_lims[['ntc_amplify','pretreatment']]

#### Order independant transformations

In [4]:
##### Order independant operations #####
df_lims = convert_numeric(df_lims)
df_lims = freetext_transform(df_lims)

#### pipeline update progress

In [None]:
### Critical convert long to wide ####
df_lims = (
    drop_null_sample_ID(df_lims ####COMPLETE######
    .pipe(drop_all_but_N1_N2)#Remove full rows where PCR Target is enything except N1 or N2
    .pipe(below_lod_to_yes_no)
    .pipe(long_to_wide) #meat and potatos! everything not under PCRTarget N1 or N2 will not be imported
    )

####Transform lims dataframe#### 
df_lims = (
    rename_lims_columns(df_lims) ######COMPLETE######
    .pipe(convert_numeric) ######COMPLETE######
    .pipe(freetext_transform) ######COMPLETE######
    .pipe(validate_yes_no_clms)
    .pipe(validate_choice_fields))

# Development Cell

In [5]:
#the followeing fields must be in all lowercase for consistent import to Redcap.
#Mostly yes/no values. Lims sometimes has entires such as YES/Yes/yes, or NO/No/no 
lowercase_fields = [
                 'quality_flag',
                 'inhibition_adjust',
                 'ntc_amplify',
                 'pretreatment',
                 'inhibition_detect',
                 'sars_cov2_below_lod',
                    ]

#standard yes/no conversion from LIMS to REDCap format
yes_no_map = {'yes': 'Yes', 'no': 'No', 'not_tested': 'Not Tested'}

choice_fields = {
                 'quality_flag': yes_no_map,
                 'inhibition_adjust': yes_no_map,
                 'ntc_amplify': yes_no_map,
                 'inhibition_detect': yes_no_map,
                 'sars_cov2_below_lod': yes_no_map,
                 'pretreatment': {"yes":1,"no":0}
}

def convert_to_lowercase(df_lims):
    """
    
    """
    
    df_lims = df_lims.copy()
    
    #setting specified fields to lowercase
    df_lims[lowercase_fields] = df_lims[lowercase_fields].apply(lambda x: x.str.lower())
    
    #convert choice_fields to mapped value
    df_lims[list(choice_fields.keys())] = df_lims[choice_fields.keys()].apply(lambda x: x.map(choice_fields[x.name]) )
    
    
    return df_lims

In [6]:
df_lims = convert_to_lowercase(df_lims)

In [8]:
df_lims.dtypes

sample_collect_date         object
sample_collect_time         object
collection_water_temp      float64
collection_storage_time    float64
collection_storage_temp    float64
pretreatment               float64
pretreatment_specify        object
equiv_sewage_amt           float64
test_result_date            object
flow_rate                  float64
sars_cov2_units             object
sars_cov2_avg_conc         float64
sars_cov2_std_error        float64
sars_cov2_cl_95_lo         float64
sars_cov2_cl_95_up         float64
sars_cov2_below_lod         object
lod_sewage                  object
ntc_amplify                 object
rec_eff_percent            float64
inhibition_detect           object
inhibition_adjust           object
inhibition_method           object
concentration_method        object
extraction_method           object
pre_conc_storage_time      float64
pre_conc_storage_temp       object
pre_ext_storage_time       float64
pre_ext_storage_temp       float64
tot_conc_vol        

In [None]:
def validate_yes_no_clms(df_lims):
    """
    force values to either "yes", "no" or nan for the list of yes_no_clms
    """
    df_lims = df_lims.copy()

    di = {
        "Yes": "yes",
        "No": "no",
        "yes":"yes",
        "no":"no"
        }

    for clm in yes_no_clms:
        df_lims[clm] = df_lims[clm].map(di)
        
    return df_lims

def validate_choice_fields(df_lims):
    """
    map values in choice columns to equivolent REDCap values
    """
    df_lims = df_lims.copy()
    
    choice_fileds = {
            'pretreatment':{
                            "yes":1,
                            "no":0
                            },
            'extraction_method': {'MagMAX Viral/Pathogen Nucleic Acid Isolation Kit':"magmax"}, #empty dict, will change all values to nan
            'sars_cov2_units':{
                              'Copies/L':1, #REDCAP:copies/L wastewater
                              'Copies/g':3  #REDCAP:copies/g wet sludge
                               },
            'concentration_method':{"Skim Milk Flocculation":"skimmilk",
                                   "Ceres Nanotrap":"ceresnano"}
                    }
    for key in choice_fileds.keys():
        df_lims[key] = df_lims[key].map(choice_fileds[key])

    for key in ['sars_cov2_units','pretreatment']:
        df_lims[key] = df_lims[key].map(choice_fileds[key])
        df_lims[key] = df_lims[key].astype("Int64")
        
        
    #force all units to be "Copies/L wastewater"
    df_lims['sars_cov2_units']= 1
        
    return df_lims




In [None]:
'quality_flag': {'yes': 'Yes', 'no': 'No'}
'inhibition_adjust': {'yes': 'Yes', 'no': 'No'},
'ntc_amplify': {'yes': 'Yes', 'no': 'No'},#dropdown as two options yes/no in REDCap
'pretreatment':{"yes":1,"no":0} #yesno field in redcap, must be 0 or 1 

'inhibition_detect': {'yes': 'Yes', 'no': 'No', 'not_tested': 'Not Tested'},

'n1_sars_cov2_below_lod': {'yes': 'Yes', 'no': 'No'},
'n2_sars_cov2_below_lod': {'yes': 'Yes', 'no': 'No'},


HISTORIC:

yes_no_clms = [
                 'quality_flag',
                 'inhibition_adjust',
                 'ntc_amplify',
                 'pretreatment',
                 'inhibition_detect',
                 'sars_cov2_below_lod',
                 'n1_sars_cov2_below_lod',
                 'n2_sars_cov2_below_lod',
               ]


In [None]:

token = "AB21CE90EF475E08AC11F92105A39690"
url = 'https://redcap.doh.wa.gov/api/'
meta = redcap_metadata_export(url,token)

In [None]:
accpted = accepted_redcap_fields(meta)

In [None]:
{i:j for i,j in accpted.items() if i in set(dict_lims_column_map.values())}

In [None]:
meta.loc["ntc_amplify"]

In [None]:
meta.loc["pretreatment"]

In [None]:
token = "AB21CE90EF475E08AC11F92105A39690"
url = 'https://redcap.doh.wa.gov/api/'

pycap_prjc = redcap.Project(url,token)

In [None]:
pycap_meta = pycap_prjc.export_metadata(format = "df")

In [None]:
pycap_meta.loc["ntc_amplify"]

In [None]:
pycap_meta.loc["pretreatment"]

In [None]:
pycap_meta[pycap_meta["field_type"] == "yesno"]

In [None]:
pycap_meta["field_type"].value_counts()

In [None]:
pycap_meta[pycap_meta["field_type"] == "sql"]

In [None]:
pycap_meta[pycap_meta["field_type"] == "radio"]

# Scratch Sheet

In [None]:
import redcap
#credentials for PID171
token = "AB21CE90EF475E08AC11F92105A39690"
url = 'https://redcap.doh.wa.gov/api/'

#Create Project objects
project = redcap.Project(url,token)


In [None]:
def describe_fields(project):
    """
    create a summary dataframe to describe every standard field from default API export
    
    args:
        pycap Project object
    return:
        Dataframe
        
    """  
    df_fields = project.export_field_names(format_type = "df")
    df_meta = project.export_metadata(format_type = "df")
    
    #set aside choice fields Series
    choice_fields = df_meta[df_meta["field_type"].isin(['dropdown', 'radio', 'checkbox'])]["select_choices_or_calculations"].copy()#select field type "dropdown","radio", "checkbox"
    choice_fields = choice_fields[choice_fields.notnull()] #remove possibility of NA fields

    #drop the first row, it is the index of dataframe export (unique identified: record_id, sample_id)
    df_meta = df_meta.drop(df_meta.index[0])
    df_fields = df_fields.drop(df_fields.index[0])

    #only need 2 fields from metadata export
    df_meta = df_meta[["form_name", "field_type"]].copy()

    #combine fields from metadata and from export_field_names 
    df_fields_2 = df_fields.join(df_meta)
    
    #add field names that have type "file" (from metadata)
    files_df = df_meta[df_meta["field_type"] == "file"].copy()
    files_df['export_field_name'] = files_df.index
    df_fields_3 = pd.concat([df_fields_2, files_df])
    
    #set index to export_field_name
    df_fields_3 = df_fields_3.set_index("export_field_name")
    
    ##### add a new columns: str(dict) of possible choices for multiple choice fields####
    fields_dict = {}
    for i in choice_fields.iteritems():

        string_to_process = i[1] #the value element of the series (as opposed to index element)
        list_of_strings = string_to_process.split("|") # split the string 
        keys_values_list = [i.split(", ", 1) for i in list_of_strings]# split each list once for list of lists [key, values]
        values_dict = {t[0]:t[1] for t in keys_values_list} #dictionary of key value pairs
        fields_dict[i[0]] =  str(values_dict)

    choice_fields_series = pd.Series(fields_dict)

    complete = pd.concat([df_fields_3 ,choice_fields_series.to_frame("Choice Values")], axis = 1)
    
    ## add special case for multiple choice fields - "yesno" field type 
    yesno_dict = {"1":"Yes", "0":"No"}
    yesno_index = df_meta[df_meta["field_type"] == "yesno"].index
    yesno_series = pd.Series(str(yesno_dict), yesno_index) #create series, yes/values and index for all yesno field type
    yesno_frame = yesno_series.to_frame("Choice Values")
    
    df_joined = complete.join(yesno_frame, lsuffix='_l', rsuffix='_r')
    
    complete["Choice Values"] = df_joined["Choice Values_l"].fillna(df_joined["Choice Values_r"])
    
    return complete


In [None]:
df = describe_fields(project)

In [None]:
yes_no_clms = [
                 'quality_flag',
                 'inhibition_adjust',
                 'ntc_amplify',
                 'pretreatment',
                 'inhibition_detect',
                 'sars_cov2_below_lod',
                 'n1_sars_cov2_below_lod',
                 'n2_sars_cov2_below_lod',
               ]


In [None]:
df[df.index.isin(yes_no_clms)]

In [None]:
df[df["field_type"] == "dropdown"]

In [None]:

redcap_dropdown = ['sars_cov2_units', 'n1_sars_cov2_below_lod', 'n2_sars_cov2_below_lod',
                   'ntc_amplify', 'inhibition_detect', 'inhibition_adjust',
                   'concentration_method', 'extraction_method', 'hum_frac_mic_unit',
                   'other_norm_unit', 'quality_flag']

In [None]:
for i in redcap_dropdown:
    print(i)

In [None]:
set(redcap_dropdown) - set(yes_no_clms)