In [1]:
import pandas as pd
import numpy as np
import math
from functools import partial, reduce

In [2]:
plex_data = pd.read_csv('C:/Users/lzoeckler/Desktop/4plex/clean_sample_dataset_neat.csv',
                        skiprows=8, names=['patient_id', 'type', 'well', 'error',
                                           'HRP2_pg_ml', 'LDH_Pan_pg_ml',
                                           'LDH_Pv_pg_ml', 'CRP_ng_ml'])
plex_data = plex_data.applymap(lambda x: x.lower() if isinstance(x, str) else x)
plex_data['patient_id'] = plex_data['patient_id'].fillna(method='ffill')
plex_data.head()

Unnamed: 0,patient_id,type,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml
0,calibrator (neat),reduced concentration (replicate 1),a1,,600.41,11062.32,571.43,9423.69
1,calibrator (neat),reduced concentration (replicate 2),b1,,483.68,9974.68,424.78,11129.36
2,calibrator (1:3),reduced concentration (replicate 1),a2,,219.66,3502.05,181.6,3171.26
3,calibrator (1:3),reduced concentration (replicate 2),b2,,196.66,3507.41,149.18,3156.21
4,calibrator (1:9),reduced concentration (replicate 1),a3,,65.56,1207.02,57.82,990.08


In [3]:
samples_data = plex_data.loc[plex_data['patient_id'].str.contains('pa-')]
samples_data = samples_data.drop('type', axis=1)
samples_data['concentration'] = samples_data['patient_id'].apply(lambda x: x.partition(' ')[-1])
samples_data['patient_id'] = samples_data['patient_id'].apply(lambda x: x.partition(' ')[0])
samples_data = samples_data.loc[(samples_data['concentration'] == '(neat)') | (samples_data['concentration'].str.contains('50x'))]
samples_data = samples_data.loc[~samples_data['concentration'].str.contains('low volume')]
samples_data = samples_data.sort_values(['patient_id', 'well'])
samples_data.head()

Unnamed: 0,patient_id,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml,concentration
24,pa-002,c7,,123.69,> 10514.00,> 497.04,59.29,(neat)
51,pa-002,d7,,6254.92,> 525700.00,11330.65,9543.64,50x (1:50)
25,pa-003,e8,,124.19,249.00,9.22,4666.60,(neat)
52,pa-003,f8,,11703.59,1257.65,316.83,4139.99,50x (1:50)
26,pa-004,e9,,121.96,> 10514.00,8.92,> 9574.00,(neat)


In [4]:
# threshhold values for various analytes
threshholds = {'HRP2_pg_ml': 330, 'LDH_Pan_pg_ml': 10514,
               'LDH_Pv_pg_ml': 497, 'CRP_ng_ml': 9574}

In [5]:
# positivity threshholds for various analytes
pos_threshholds = {'HRP2_pg_ml': 2.3, 'LDH_Pan_pg_ml': 47.8,
                   'LDH_Pv_pg_ml': 75.1, 'CRP_ng_ml': np.nan}

In [6]:
# Columns = [neat_above, neat_below, neat_LLQ, neat_ULQ, NA]
# Rows = [dil_above, dil_below, dil_LLQ, dil_ULQ, NA]
HRP2_matrix = np.array([['1:50', '1:50', '1:50', '1:50', '1:50'],
                        ['1:50', 'neat', 'neat', '1:50', 'fail'],
                        ['1:50', 'neat', 'neat', 'fail', 'fail'],
                        ['1:50', '1:50', 'fail', '1:50', '1:50'],
                        ['fail', '1:50', '1:50', 'fail', 'fail']])

LDH_Pan_matrix = np.array([['1:50', 'neat', 'neat', '1:50', '1:50'],
                           ['1:50', 'neat', 'neat', '1:50', 'fail'],
                           ['1:50', 'neat', 'neat', 'fail', 'fail'],
                           ['1:50', 'neat', 'fail', '1:50', '1:50'],
                           ['fail', 'neat', 'neat', 'fail', 'fail']])

LDH_Pv_matrix = np.array([['1:50', 'neat', 'neat', '1:50', '1:50'],
                          ['1:50', 'neat', 'neat', '1:50', 'fail'],
                          ['1:50', 'neat', 'neat', 'fail', 'fail'],
                          ['1:50', 'neat', 'fail', '1:50', '1:50'],
                          ['fail', 'neat', 'neat', 'fail', 'fail']])

CRP_matrix = np.array([['1:50', 'neat', 'neat', '1:50', '1:50'],
                       ['1:50', 'neat', 'neat', '1:50', 'fail'],
                       ['1:50', 'neat', 'neat', 'fail', 'fail'],
                       ['1:50', 'neat', 'fail', '1:50', '1:50'],
                       ['fail', 'neat', 'neat', 'fail', 'fail']])

# decisions for various analytes
decisions = {'HRP2_pg_ml': HRP2_matrix, 'LDH_Pan_pg_ml': LDH_Pan_matrix,
             'LDH_Pv_pg_ml': LDH_Pv_matrix, 'CRP_ng_ml': CRP_matrix}

In [7]:
def run_compare(df, analyte_val):
    above, below, LLQ, ULQ, NA = False, False, False, False, False
    val = df[analyte_val]
    try:
        float_val = float(val)
        if math.isnan(float_val):
            NA = True
        elif float_val > threshholds[analyte_val]:
            above = True
        elif float_val < threshholds[analyte_val]:
            below = True
    except:
        if '<' in val:
            LLQ = True
        elif '>' in val:
            ULQ = True
    finally:
        return(np.array([above, below, LLQ, ULQ, NA]))

In [8]:
# generate an empty list to fill with small dfs, which will be combined
final_dfs = []
# run counts for decision on what to keep
for analyte in threshholds.keys():
# for analyte in ['HRP2_pg_ml']:
    # create partial function for generating decision vectors
    partial_compare = partial(run_compare, analyte_val = analyte)
    # generate decision vectors
    samples_data['decision_vector'] = samples_data.apply(partial_compare, axis=1)
    # pull decision matrix for given analyte
    decision_matrix = decisions[analyte]
    # generate an empty list to fill with tiny dfs, which will be combined
    tiny_dfs = []
    # iterate over patient_ids
    for i in samples_data['patient_id'].unique().tolist():
#         tiny_df = pd.DataFrame(columns=['patient_id', analyte,
#                                         '{}_pos'.format(analyte)])
        tiny_df = pd.DataFrame(columns=['patient_id', analyte,
                                        '{}_dilution'.format(analyte),
                                        '{}_well'.format(analyte)])
        sub_data = samples_data.loc[samples_data['patient_id'] == i]
        neat_vector = sub_data.loc[sub_data['concentration'] == '(neat)',
                                   'decision_vector'].item()
        dil_vector = sub_data.loc[sub_data['concentration'].str.contains('50x'),
                                  'decision_vector'].item()
        decision = decision_matrix[neat_vector, dil_vector].item()
#         pos_val = 'negative'
        if decision == '1:50':
            val = sub_data.loc[sub_data['concentration'].str.contains('50x'),
                               analyte].item()
            well = sub_data.loc[sub_data['concentration'].str.contains('50x'),
                               'well'].item()
#             if val > pos_threshholds[analyte]:
#                 pos_val = 'positive'
        elif decision == 'neat':
            val = sub_data.loc[sub_data['concentration'] == '(neat)',
                               analyte].item()
            well = sub_data.loc[sub_data['concentration'] == '(neat)',
                                'well'].item()
#             if val > pos_threshholds[analyte]:
#                 pos_val = 'positive'
        elif decision == 'fail':
            val = np.nan
            well = np.nan
        else:
            raise ValueError("Unexpected decision value: {}".format(val))
#         tiny_df = tiny_df.append({'patient_id': i, analyte: val, '{}_pos': pos_val}, ignore_index=True)
        tiny_df = tiny_df.append({'patient_id': i, analyte: val,
                                  '{}_dilution'.format(analyte): decision,
                                  '{}_well'.format(analyte): well}, ignore_index=True)
        tiny_dfs.append(tiny_df)
    small_df = pd.concat(tiny_dfs)
    final_dfs.append(small_df)
output_df = reduce(lambda left, right: pd.merge(left, right, on='patient_id'), final_dfs)
output_df.head(10)

Unnamed: 0,patient_id,HRP2_pg_ml,HRP2_pg_ml_dilution,HRP2_pg_ml_well,LDH_Pan_pg_ml,LDH_Pan_pg_ml_dilution,LDH_Pan_pg_ml_well,LDH_Pv_pg_ml,LDH_Pv_pg_ml_dilution,LDH_Pv_pg_ml_well,CRP_ng_ml,CRP_ng_ml_dilution,CRP_ng_ml_well
0,pa-002,6254.92,1:50,d7,> 525700.00,1:50,d7,11330.65,1:50,d7,59.29,neat,c7
1,pa-003,11703.59,1:50,f8,249.00,neat,e8,9.22,neat,e8,4666.60,neat,e8
2,pa-004,> 16500.00,1:50,f9,120892.15,1:50,f9,8.92,neat,e9,32448.42,1:50,f9
3,pa-015,6454.22,1:50,d10,> 525700.00,1:50,d10,2043.7,1:50,d10,61946.70,1:50,d10
4,pa-016,7155.02,1:50,f1,178719.28,1:50,f1,1824.14,1:50,f1,26952.69,1:50,f1
5,pa-027,2725.30,1:50,d11,314.13,neat,c11,1190.91,1:50,d11,1546.73,neat,c11
6,pa-028,7745.05,1:50,d12,23062.54,1:50,d12,2377.62,1:50,d12,1148.75,neat,c12
7,pa-029,12462.18,1:50,f2,23.26,neat,e2,5.72,neat,e2,< 26.74,neat,e2
8,pa-030,6462.27,1:50,f3,77973.75,1:50,f3,2945.06,1:50,f3,559.11,neat,e3
9,pa-031,14212.10,1:50,f4,79.00,neat,e4,13.5,neat,e4,6272.12,neat,e4
