In [15]:
import pandas as pd
import numpy as np
import math
from functools import partial, reduce
import os
import re

In [16]:
dfs = []
input_path = 'C:/Users/lzoeckler/Desktop/4plex/input_data/20190610'
for fname in os.listdir(input_path):
    plex_data = pd.read_csv('{}/{}'.format(input_path, fname), index_col=False,
                            skiprows=8, names=['patient_id', 'type', 'well', 'error',
                                               'HRP2_pg_ml', 'LDH_Pan_pg_ml',
                                               'LDH_Pv_pg_ml', 'CRP_ng_ml',
                                               'fail1', 'fail2'])
    plex_data.drop(['fail1', 'fail2'], axis=1, inplace=True)
    plex_data = plex_data.applymap(lambda x: x.lower() if isinstance(x, str) else x)
    plex_data['patient_id'] = plex_data['patient_id'].fillna(method='ffill')
    plex_data = plex_data[~plex_data['patient_id'].isnull()]
    test = plex_data.loc[plex_data['patient_id'].str.contains('pa-124-70')]
    if len(test) > 1: 
        print(fname)
    dfs.append(plex_data)
combined = pd.concat(dfs)
combined = combined.loc[~combined['type'].isnull()]
combined = combined.loc[~combined['type'].str.contains('pixel')]
combined.head()

Unnamed: 0,patient_id,type,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml
0,calibrator (neat),reduced concentration (replicate 1),a1,,600.41,11064.05,571.43,9423.69
1,calibrator (neat),reduced concentration (replicate 2),b1,,483.68,9974.46,424.78,11129.36
2,calibrator (1:3),reduced concentration (replicate 1),a2,,219.66,3501.87,181.6,3171.26
3,calibrator (1:3),reduced concentration (replicate 2),b2,,196.66,3507.22,149.18,3156.21
4,calibrator (1:9),reduced concentration (replicate 1),a3,,65.56,1208.19,57.82,990.08


In [17]:
check = pd.read_csv('{}/20190528_Data(Conc)_4Plex(Namibia P56).csv'.format(input_path), skiprows=8,
                    index_col=False)
check.head()

Unnamed: 0,calibrator (Neat),Reduced Concentration (Replicate 1),A1,NA,590.09,12497.25,Incalculable High,10198.85
0,,Reduced Concentration (Replicate 2),B1,,Masked,8499.35,496.46,11085.43
1,calibrator (1:3),Reduced Concentration (Replicate 1),A2,MO1,Masked,3908.95,161.35,2838.81
2,,Reduced Concentration (Replicate 2),B2,,196.47,3132.72,165.71,2610.89
3,calibrator (1:9),Reduced Concentration (Replicate 1),A3,,70.45,1255.37,56.92,920.2
4,,Reduced Concentration (Replicate 2),B3,,61.17,977.37,41.02,1141.47


In [18]:
def fix_concentrations(df):
    con = df['concentration'].partition(':')[2]
    con = con.partition(')')[0]
    if len(con) != 0:
        return con
    else:
        return '1'

In [19]:
samples_data = combined.loc[combined['patient_id'].str.contains('pa-')]
samples_data = samples_data.drop('type', axis=1)
samples_data['concentration'] = samples_data['patient_id'].apply(lambda x: x.partition(' ')[-1])
samples_data['patient_id'] = samples_data['patient_id'].apply(lambda x: x.partition(' ')[0])
samples_data = samples_data.loc[(samples_data['concentration'].str.contains('neat|50'))]
samples_data = samples_data.loc[~samples_data['concentration'].str.contains('low volume')]
samples_data = samples_data.loc[~samples_data['well'].isnull()]
samples_data['concentration'] = samples_data.apply(fix_concentrations, axis=1)
samples_data = samples_data.sort_values(['patient_id', 'concentration'])
samples_data.head()

Unnamed: 0,patient_id,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml,concentration
33,pa-001-14,c8,,299.48,< 14.41,2.76,7064.05,1
67,pa-001-14,d8,,4138.79,< 720.50,< 116.50,3178.09,50
29,pa-001-21,c12,,> 330.00,38.94,12.21,2413.65,1
63,pa-001-21,d12,,1377.18,1593.58,227.59,< 1337.00,50
27,pa-001-28,g2,,> 330.00,29.86,13.90,92.87,1


In [20]:
samples_data.loc[samples_data['patient_id'].str.contains('pa-014-42')]

Unnamed: 0,patient_id,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml,concentration
28,pa-014-42,e4,,1.70,16.58,8.17,> 9574.00,1
31,pa-014-42,h4,,< 133750.00,3913228.94,646642.13,< 3342500.00,125000
30,pa-014-42,g4,,< 2675.00,48096.26,< 5825.00,< 66850.00,2500
29,pa-014-42,f4,,< 53.50,< 720.50,183.82,< 1337.00,50


In [21]:
samples_data['concentration'].unique()

array(['1', '50', '125000', '2500', '312500000', '6250000', '15625000000',
       '781250000000'], dtype=object)

In [22]:
sample_ids = samples_data['patient_id'].unique().tolist()
sample_set = set(sample_ids)

In [23]:
# threshhold values for various analytes
threshholds = {'HRP2_pg_ml': 330, 'LDH_Pan_pg_ml': 10514,
               'LDH_Pv_pg_ml': 497, 'CRP_ng_ml': 9574}

In [24]:
# constant to apply to the threshhold for different dilutions
dil_constants = {'50': 1, '2500': 50, '125000': 2500,
                 '6250000': 125000, '312500000': 6250000,
                 '15625000000': 312500000, '781250000000': 15625000000}

In [25]:
# positivity threshholds for various analytes
pos_threshholds = {'HRP2_pg_ml': 2.3, 'LDH_Pan_pg_ml': 47.8,
                   'LDH_Pv_pg_ml': 75.1, 'CRP_ng_ml': np.nan}

In [26]:
# dilution sets for various dilutions
dilution_sets = {'50': ('1', '50', 'fail'), '2500': ('50', '2500', 'fail'),
                 '125000': ('2500', '125000', 'fail'),
                 '6250000': ('125000', '6250000', 'fail'),
                 '312500000': ('6250000', '312500000', 'fail'),
                 '15625000000': ('312500000', '15625000000', 'fail'),
                 '781250000000': ('15625000000', '781250000000', 'fail')} 

In [27]:
duplicates = samples_data.loc[samples_data.duplicated(subset=['patient_id', 'concentration'], keep=False)]
deduped_dfs = []
for analyte in threshholds.keys():
    dup_analyte = duplicates[['patient_id', 'well', 'error', 'concentration', analyte]]
    pid_dfs = []
    for pid in duplicates['patient_id'].unique():
        dup_data = dup_analyte.loc[dup_analyte['patient_id'] == pid]
        con_dfs = []
        for concentration in dup_data['concentration'].unique():
            fill_df = pd.DataFrame(columns=['patient_id', 'well', 'error',
                                            'concentration', analyte])
            dup_con = dup_data.loc[dup_data['concentration'] == concentration]
            values = dup_con[analyte]
            wells = dup_con['well'].tolist()
            wells = ''.join(c for c in str(wells) if c not in ["[", "]", "'"])
            errors = dup_con['error'].tolist()
            non_nan_error = [e for e in errors if e is not np.nan]
            if not non_nan_error:
                errors = np.nan
            else:
                errors = non_nan_error
            try:
                values = [float(val) for val in values.tolist()]
                val = sum(values) / len(values)
            except ValueError:
                values = values.tolist()
                num_vals = [val for val in values if ('<' not in val) & ('>' not in val)]
                if len(num_vals) == 1:
                    val = num_vals[0]
                else:
                    val = values[0]
            fill_df = fill_df.append({'patient_id': pid, 'well': wells, 'error': errors,
                           'concentration': concentration, analyte: val}, ignore_index=True)
            con_dfs.append(fill_df)
        con_df = pd.concat(con_dfs)
        pid_dfs.append(con_df)
    pid_df = pd.concat(pid_dfs)
    deduped_dfs.append(pid_df)
deduped = reduce(lambda left, right: pd.merge(left, right, on=['patient_id', 'well', 'error', 'concentration']), deduped_dfs)
deduped.head()

Unnamed: 0,patient_id,well,error,concentration,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml
0,pa-013-28,"c5, e1",,1,117.59,56.865,6.515,2030.62
1,pa-013-28,"d5, f1",,50,8823.61,1023.80,219.98,2194.03
2,pa-049-14,"d10, a8",,125000,3187110.0,3.37477e+06,< 291250.00,< 3342500.00
3,pa-064-63,"a9, e4",,1,205.16,22.72,11.85,27.64
4,pa-064-63,"b9, f4",,50,1424.38,< 720.50,< 116.50,< 1337.00


In [28]:
deduped_dfs[0].head()

Unnamed: 0,patient_id,well,error,concentration,HRP2_pg_ml
0,pa-013-28,"c5, e1",,1,117.59
0,pa-013-28,"d5, f1",,50,8823.61
0,pa-049-14,"d10, a8",,125000,3187110.0
0,pa-064-63,"a9, e4",,1,205.16
0,pa-064-63,"b9, f4",,50,1424.38


In [29]:
no_duplicates = samples_data.drop_duplicates(subset=['patient_id', 'concentration'], keep=False)
no_duplicates = pd.concat([no_duplicates, deduped])
no_duplicates.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


Unnamed: 0,CRP_ng_ml,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,concentration,error,patient_id,well
33,7064.05,299.48,< 14.41,2.76,1,,pa-001-14,c8
67,3178.09,4138.79,< 720.50,< 116.50,50,,pa-001-14,d8
29,2413.65,> 330.00,38.94,12.21,1,,pa-001-21,c12
63,< 1337.00,1377.18,1593.58,227.59,50,,pa-001-21,d12
27,92.87,> 330.00,29.86,13.90,1,,pa-001-28,g2


In [30]:
for err in no_duplicates['error'].unique():
    print(err)
    print(type(err))

nan
<class 'float'>
software unable to process full dilution, the sample was run as neat and the data for this sample was multiplied by the dilution factor (1:15625000000) in this data file only. 
<class 'str'>
software unable to process full dilution, the sample was run as neat and the data for this sample was multiplied by the dilution factor (1:781250000000) in this data file only. 
<class 'str'>


In [31]:
def return_decisions(low, high, fail='fail'):
    # Columns = [neat_above, neat_below, neat_LLQ, neat_ULQ, NA]
    # Rows = [dil_above, dil_below, dil_LLQ, dil_ULQ, NA]
    HRP2_matrix = np.array([[high, high, high, high, high],
                            [high, low, low, high, fail],
                            [high, low, low, fail, fail],
                            [high, high, fail, high, high],
                            [fail, high, high, fail, fail]])

    other_matrix = np.array([[high, low, low, high, high],
                               [high, low, low, high, fail],
                               [high, low, low, fail, fail],
                               [high, low, fail, high, high],
                               [fail, low, low, fail, fail]])

    # decisions for various analytes
    decisions = {'HRP2_pg_ml': HRP2_matrix, 'LDH_Pan_pg_ml': other_matrix,
                 'LDH_Pv_pg_ml': other_matrix, 'CRP_ng_ml': other_matrix}
    return(decisions)

In [32]:
def run_compare(df, analyte_val, dil_val):
    above, below, LLQ, ULQ, NA = False, False, False, False, False
    val = df[analyte_val]
    thresh_val = dil_constants[dil_val] * threshholds[analyte_val]
    try:
        float_val = float(val)
        if math.isnan(float_val):
            NA = True
        elif float_val > thresh_val:
            above = True
        elif float_val < thresh_val:
            below = True
    except ValueError:
        if '<' in val:
            LLQ = True
        elif '>' in val:
            ULQ = True
    finally:
        return(np.array([above, below, LLQ, ULQ, NA]))

In [33]:
analyte_dfs = []
error_pids = {}
for analyte in threshholds.keys():
# for analyte in ['HRP2_pg_ml']:
    print(analyte)
    patient_dfs = []
    for pid in no_duplicates['patient_id'].unique():
#     for pid in ['pa-001-3', 'pa-014-63', 'pa-129-14']:
        patient_data = no_duplicates.loc[no_duplicates['patient_id'] == pid]
        dilution_values = sorted([val for val in patient_data['concentration'].unique() if val != '1'], key=len)
        best_decision = '1'
        for max_dilution in dilution_values:
#         for max_dilution in ['15625000000']:
            dil_data = patient_data.loc[patient_data['concentration'].isin([best_decision, max_dilution])]          
            partial_compare = partial(run_compare, analyte_val=analyte, dil_val=max_dilution)
            dil_data['decision_vector'] = dil_data.apply(partial_compare, axis=1)
            decisions = return_decisions(best_decision, max_dilution)
            decision_matrix = decisions[analyte]
            best_df = pd.DataFrame(columns=['patient_id', 'errors', analyte,
                                            '{}_dilution'.format(analyte),
                                            '{}_well'.format(analyte),
                                            '{}_max_dilution'.format(analyte)])
            vector_low = dil_data.loc[dil_data['concentration'] == best_decision,
                                      'decision_vector'].item()
            vector_high = dil_data.loc[dil_data['concentration'] == max_dilution,
                                       'decision_vector'].item()
            decision = decision_matrix[vector_high, vector_low].item()
            if decision in [best_decision, max_dilution]:
                val = dil_data.loc[dil_data['concentration'] == decision,
                                   analyte].item()
                well = dil_data.loc[dil_data['concentration'] == decision,
                                    'well'].item()
                error = dil_data.loc[dil_data['concentration'] == decision,
                                    'error'].item()
            elif decision == 'fail':
                val = 'fail'
                well = 'fail'
                error = np.nan
                error_pids[pid] = '{} failure'.format(analyte)
            else:
                raise ValueError("Unexpected decision value: {}".format(decision))
            other_dilutions = [val for val in patient_data['concentration'].unique()]
            other_dilutions = [float(val) for val in other_dilutions if val != 'fail']
            max_dilution = int(max(other_dilutions))
            df_decision = decision if decision != 'fail' else np.nan
            best_decision = decision
            best_df = best_df.append({'patient_id': pid, 'errors': error, analyte: val,
                                      '{}_dilution'.format(analyte): df_decision,
                                      '{}_well'.format(analyte): well,
                                      '{}_max_dilution'.format(analyte): max_dilution}, ignore_index=True)
            if decision == 'fail':
                break
        patient_dfs.append(best_df)
    patient_df = pd.concat(patient_dfs)
    patient_df['errors'] = patient_df['errors'].astype('object')
    analyte_dfs.append(patient_df)
final_df = reduce(lambda left, right: pd.merge(left, right, on=['patient_id', 'errors']), analyte_dfs)

HRP2_pg_ml


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


LDH_Pan_pg_ml
LDH_Pv_pg_ml
CRP_ng_ml


In [34]:
super_final = final_df.copy(deep=True)
for pid in error_pids.keys():
    error = error_pids[pid]
    pid_df = final_df.loc[final_df['patient_id'] == pid]
    pid_df['errors'] = pid_df['errors'].apply(lambda x: error if np.isnan(x) else x + ' ' + error)
    if len(pid_df) > 0:
        super_final = super_final.loc[super_final['patient_id'] != pid]
        super_final = super_final.append(pid_df)
super_final.tail()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


Unnamed: 0,patient_id,errors,HRP2_pg_ml,HRP2_pg_ml_dilution,HRP2_pg_ml_well,HRP2_pg_ml_max_dilution,LDH_Pan_pg_ml,LDH_Pan_pg_ml_dilution,LDH_Pan_pg_ml_well,LDH_Pan_pg_ml_max_dilution,LDH_Pv_pg_ml,LDH_Pv_pg_ml_dilution,LDH_Pv_pg_ml_well,LDH_Pv_pg_ml_max_dilution,CRP_ng_ml,CRP_ng_ml_dilution,CRP_ng_ml_well,CRP_ng_ml_max_dilution
133,pa-014-35,CRP_ng_ml failure,57.26,1,a12,125000,42.8,1,a12,125000,13.02,1,a12,125000,fail,,fail,125000
134,pa-014-42,CRP_ng_ml failure,1.70,1,e4,125000,16.58,1,e4,125000,8.17,1,e4,125000,fail,,fail,125000
137,pa-014-63,CRP_ng_ml failure,< 1.07,1,c10,50,19.01,1,c10,50,7.59,1,c10,50,fail,,fail,50
795,pa-088-35,CRP_ng_ml failure,465.05,50,b11,125000,20.74,1,a11,125000,7.77,1,a11,125000,fail,,fail,125000
854,pa-095-7,CRP_ng_ml failure,661.88,50,f7,125000,72.03,1,e7,125000,6.58,1,e7,125000,fail,,fail,125000


In [35]:
super_final['errors'].unique()

array([nan, 'CRP_ng_ml failure'], dtype=object)

In [36]:
final_ids = super_final['patient_id'].unique().tolist()
final_set = set(final_ids)
assert final_set == sample_set, print(sample_set - final_set)

In [122]:
def split_time(df):
    sub = df['patient_id'].split('-')
    try:
        time = int(sub[2])
        return time
    except IndexError:
        return 0

In [123]:
def remove_time(df):
    patient = df['patient_id'].split('-')
    return '{}-{}'.format(patient[0], patient[1])

In [135]:
def remove_day(x):
    if isinstance(x, str):
        x = x.replace('day ', '')
    return x

In [144]:
add_info = pd.read_stata('C:/Users/lzoeckler/Desktop/4plex/input_data/additional_info.dta')
add_info.to_csv('C:/Users/lzoeckler/Desktop/additional_UCSF_namibia_info.csv')
add_info.head()

Unnamed: 0,priority_level,sample_id,pa_id,boxnumber,position,day0_enrollee,days_since_tx,initial_sample,got_PQ,age,sex,fever48_r,enrolled_from,returned_with_fever,when_returned_with_fever,retreated,when_retreated,comments
0,1.0,PA-001,PA-001,1,7,1.0,0.0,1.0,1.0,2,Female,1.0,Health facility,,,,,
1,2.0,PA-001-3,PA-001,1,44,1.0,3.0,,1.0,2,Female,1.0,Health facility,,,,,
2,2.0,PA-001-7,PA-001,2,48,1.0,9.0,,1.0,2,Female,1.0,Health facility,,,,,
3,2.0,PA-001-14,PA-001,4,60,1.0,17.0,,1.0,2,Female,1.0,Health facility,,,,,
4,2.0,PA-001-21,PA-001,6,16,1.0,23.0,,1.0,2,Female,1.0,Health facility,,,,,


In [139]:
add_info = add_info.applymap(lambda x: x.lower() if isinstance(x, str) else x)
add_info.rename(columns={'sample_id': 'patient_id'}, inplace=True)
add_info.drop(['pa_id', 'priority_level', 'boxnumber', 'position', 'comments'], axis=1, inplace=True)
add_info['time_point_days'] = add_info.apply(split_time, axis=1)
add_info['patient_id'] = add_info.apply(remove_time, axis=1)
add_info.drop_duplicates(subset=['patient_id', 'time_point_days'], inplace=True, keep='last')
add_info['when_returned_with_fever'] = add_info['when_returned_with_fever'].apply(remove_day)
add_info['when_retreated'] = add_info['when_retreated'].apply(remove_day)
add_info.head()

Unnamed: 0,patient_id,day0_enrollee,days_since_tx,initial_sample,got_PQ,age,sex,fever48_r,enrolled_from,returned_with_fever,when_returned_with_fever,retreated,when_retreated,time_point_days
0,pa-001,1.0,0.0,1.0,1.0,2,female,1.0,health facility,,,,,0
1,pa-001,1.0,3.0,,1.0,2,female,1.0,health facility,,,,,3
2,pa-001,1.0,9.0,,1.0,2,female,1.0,health facility,,,,,7
3,pa-001,1.0,17.0,,1.0,2,female,1.0,health facility,,,,,14
4,pa-001,1.0,23.0,,1.0,2,female,1.0,health facility,,,,,21


In [140]:
time_df = super_final.copy(deep=True)
time_df['time_point_days'] = time_df.apply(split_time, axis=1)
time_df['patient_id'] = time_df.apply(remove_time, axis=1)
time_df = time_df.merge(add_info, how='inner', on=['patient_id', 'time_point_days'])
time_df.sort_values(['patient_id', 'time_point_days'], inplace=True)
time_df.set_index(['patient_id', 'time_point_days'], inplace=True)
time_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,errors,HRP2_pg_ml,HRP2_pg_ml_dilution,HRP2_pg_ml_well,HRP2_pg_ml_max_dilution,LDH_Pan_pg_ml,LDH_Pan_pg_ml_dilution,LDH_Pan_pg_ml_well,LDH_Pan_pg_ml_max_dilution,LDH_Pv_pg_ml,...,initial_sample,got_PQ,age,sex,fever48_r,enrolled_from,returned_with_fever,when_returned_with_fever,retreated,when_retreated
patient_id,time_point_days,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
pa-001,3,,318702.59,2500,e3,312500000,142.30,1,g6,312500000,15.35,...,,1.0,2,female,1.0,health facility,,,,
pa-001,7,,73737.79,2500,c5,125000,179.34,1,e12,125000,10.31,...,,1.0,2,female,1.0,health facility,,,,
pa-001,14,,4138.79,50,d8,50,< 14.41,1,c8,50,2.76,...,,1.0,2,female,1.0,health facility,,,,
pa-001,21,,1377.18,50,d12,50,38.94,1,c12,50,12.21,...,,1.0,2,female,1.0,health facility,,,,
pa-001,28,,2024.41,50,h2,50,29.86,1,g2,50,13.9,...,,1.0,2,female,1.0,health facility,,,,


In [141]:
time_df.to_csv('C:/Users/lzoeckler/Desktop/4plex/output_data/final_dilutions.csv')

In [142]:
partial_format = samples_data.copy(deep=True)
partial_format['time_point_days'] = partial_format.apply(split_time, axis=1)
partial_format['patient_id'] = partial_format.apply(remove_time, axis=1)
partial_format = partial_format.merge(add_info, how='left', on=['patient_id', 'time_point_days'])
partial_format.sort_values(['patient_id', 'time_point_days'], inplace=True)
partial_format.set_index(['patient_id', 'time_point_days'], inplace=True)
partial_format.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml,concentration,day0_enrollee,days_since_tx,initial_sample,got_PQ,age,sex,fever48_r,enrolled_from,returned_with_fever,when_returned_with_fever,retreated,when_retreated
patient_id,time_point_days,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
pa-001,3,g6,,145.95,142.30,15.35,5678.58,1,1.0,3.0,,1.0,2.0,female,1.0,health facility,,,,
pa-001,3,f3,,397468.45,< 1801250.00,< 291250.00,< 3342500.00,125000,1.0,3.0,,1.0,2.0,female,1.0,health facility,,,,
pa-001,3,e3,,318702.59,74360.14,10385.14,< 66850.00,2500,1.0,3.0,,1.0,2.0,female,1.0,health facility,,,,
pa-001,3,h3,,1537195777.01,119019730410.15,16185275422.09,286153421264.94,312500000,1.0,3.0,,1.0,2.0,female,1.0,health facility,,,,
pa-001,3,h6,,10273.61,2535.11,733.44,49491.76,50,1.0,3.0,,1.0,2.0,female,1.0,health facility,,,,


In [143]:
partial_format.to_csv('C:/Users/lzoeckler/Desktop/4plex/output_data/partially_formatted.csv')