In [50]:
import pandas as pd
import numpy as np
import math
from functools import partial, reduce
import os
import re

In [51]:
analyte_name_dict = {'HRP2_pg_ml': ('HRP2', 'pg/ml'), 'LDH_Pan_pg_ml': ('LDH_Pan', 'pg/ml'),
                     'LDH_Pv_pg_ml': ('LDH_Pv', 'pg/ml'), 'LDH_Pf_pg_ml': ('LDH_Pf', 'pg/ml'),
                     'CRP_ng_ml': ('CRP', 'ng/ml')}

In [52]:
def set_llq_ulq(df, data_col):
    if isinstance(df[data_col], str):
        if '<' in df[data_col]:
            return 0
        elif '>' in df[data_col]:
            return 2
    return 1

In [53]:
def clean_strings(val):
    if isinstance(val, str):
        clean = val.replace('<', '')
        clean = clean.replace('>', '')
        try:
            return float(clean)
        except ValueError:
            return clean
    return val

In [54]:
order = ['study_id', 'sample_id', 'participant_id', 'sample_type', 'country', 'lat', 'long', 'age_yrs', 'fever',
         'timepoint_days', 'initial_specimen_category', 'confirmation_via', 'quansys_HRP2_pg_ml', 'quansys_HRP2_result',
         'quansys_HRP2_bin', 'quansys_LDH_Pan_pg_ml', 'quansys_LDH_Pan_result', 'quansys_LDH_Pan_bin',
         'quansys_LDH_Pv_pg_ml', 'quansys_LDH_Pv_result', 'quansys_LDH_Pv_bin', 'quansys_LDH_Pf_pg_ml',
         'quansys_LDH_Pf_result', 'quansys_LDH_Pf_bin', 'quansys_CRP_ng_ml', 'quansys_CRP_result', 'quansys_CRP_bin']

In [55]:
input_path = 'C:/Users/lzoeckler/Desktop/qfu'
files = [file for file in os.listdir(input_path) if 'csv' in file]
for fname in files:
    print(fname)
    df = pd.read_csv('{}/{}'.format(input_path, fname))
    modified_df = df.copy(deep=True)
    for analyte in analyte_name_dict.keys():
        data_col = 'quansys_{}'.format(analyte)
        name, unit = analyte_name_dict[analyte]
        partial_qs = partial(set_llq_ulq, data_col=data_col)
        modified_df['quansys_{}_bin'.format(name)] = modified_df.apply(partial_qs, axis=1)
        modified_df[data_col] = modified_df[data_col].apply(clean_strings)
    modified_df = modified_df[order]
    modified_df.to_csv('{}/binned/{}'.format(input_path, fname), index=False)
modified_df.head()

formatted_4plex_NIH_clinical.csv
formatted_4plex_WB_NIH_Pf_SPZ.csv
formatted_4plex_WB_NIH_Pf_TBV.csv


KeyError: "['confirmation_via'] not in index"

In [47]:
df[['quansys_HRP2_pg_ml']].head()

Unnamed: 0,quansys_HRP2_pg_ml
0,17215.98
1,> 492800.00
2,3.64
3,5.17
4,3.37


In [48]:
modified_df[['quansys_HRP2_pg_ml', 'quansys_HRP2_bins']].head()

Unnamed: 0,quansys_HRP2_pg_ml,quansys_HRP2_bins
0,17215.98,1
1,492800.0,2
2,3.64,1
3,5.17,1
4,3.37,1


In [48]:
input_path = 'C:/Users/lzoeckler/Desktop/quansys_formatting'
df = pd.read_csv('{}/formatted_5plex_WB_FIND_Pf_Pv.csv'.format(input_path))
df.head()

Unnamed: 0,study_id,sample_id,participant_id,sample_type,sample_variable,symptomatic_fever,timepoint,initial_specimen_catagory,location,status,...,quansys_LDH_Pan_pg_ml,quansys_LDH_Pv_pg_ml,quansys_LDH_Pf_pg_ml,quansys_CRP_ng_ml,quansys_HRP2_result,quansys_LDH_Pan_result,quansys_LDH_Pv_result,quansys_LDH_Pf_result,quansys_CRP_result,quansys_type
0,11,MA0070100280001001,,WB,,,,,Senegal,febrile,...,64128.05,39.23,59158.61,745.89,positive,positive,negative,positive,,5plex
1,11,MA0070100340001001,,WB,,,,,Senegal,febrile,...,85514.0,58.87,33415.6,345.06,positive,positive,negative,positive,,5plex
2,11,MA0070100360001001,,WB,,,,,Senegal,febrile,...,2714957.57,128.72,2833674.39,3326.26,positive,positive,negative,positive,,5plex
3,11,MA0070100370001001,,WB,,,,,Senegal,febrile,...,144621.28,17.58,148565.32,5517.8,positive,positive,negative,positive,,5plex
4,11,MA0070100390001001,,WB,,,,,Senegal,febrile,...,6012.82,9.07,2304.52,2077.9,positive,positive,negative,positive,,5plex


In [49]:
df['sample_variable'] = df['location'].apply(lambda x: 'location: {}'.format(x))
df['symptomatic_fever'] = df['status']
df.head()

Unnamed: 0,study_id,sample_id,participant_id,sample_type,sample_variable,symptomatic_fever,timepoint,initial_specimen_catagory,location,status,...,quansys_LDH_Pan_pg_ml,quansys_LDH_Pv_pg_ml,quansys_LDH_Pf_pg_ml,quansys_CRP_ng_ml,quansys_HRP2_result,quansys_LDH_Pan_result,quansys_LDH_Pv_result,quansys_LDH_Pf_result,quansys_CRP_result,quansys_type
0,11,MA0070100280001001,,WB,location: Senegal,febrile,,,Senegal,febrile,...,64128.05,39.23,59158.61,745.89,positive,positive,negative,positive,,5plex
1,11,MA0070100340001001,,WB,location: Senegal,febrile,,,Senegal,febrile,...,85514.0,58.87,33415.6,345.06,positive,positive,negative,positive,,5plex
2,11,MA0070100360001001,,WB,location: Senegal,febrile,,,Senegal,febrile,...,2714957.57,128.72,2833674.39,3326.26,positive,positive,negative,positive,,5plex
3,11,MA0070100370001001,,WB,location: Senegal,febrile,,,Senegal,febrile,...,144621.28,17.58,148565.32,5517.8,positive,positive,negative,positive,,5plex
4,11,MA0070100390001001,,WB,location: Senegal,febrile,,,Senegal,febrile,...,6012.82,9.07,2304.52,2077.9,positive,positive,negative,positive,,5plex


In [50]:
df.drop(['location', 'status'], axis=1, inplace=True)
df.to_csv('{}/formatted_5plex_WB_FIND_Pf_Pv.csv'.format(input_path), index=False)