In [2]:
import pandas as pd
import numpy as np
import math
from functools import partial, reduce

In [3]:
plex_data = pd.read_csv('C:/Users/lzoeckler/Desktop/4plex/testing_data/other_dilutions_testing_input.csv',
                        skiprows=8, names=['patient_id', 'type', 'well', 'error',
                                           'HRP2_pg_ml', 'LDH_Pan_pg_ml',
                                           'LDH_Pv_pg_ml', 'CRP_ng_ml'])
plex_data = plex_data.applymap(lambda x: x.lower() if isinstance(x, str) else x)
plex_data['patient_id'] = plex_data['patient_id'].fillna(method='ffill')
plex_data.head()

Unnamed: 0,patient_id,type,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml
0,calibrator (neat),reduced concentration (replicate 1),g12,,590.00,9648.76,480.71,9535.24
1,calibrator (neat),reduced concentration (replicate 2),h12,,masked,11332.92,516.29,8460.31
2,calibrator (1:3),reduced concentration (replicate 1),g11,"mo2, mo3",187.97,masked,masked,5871.99
3,calibrator (1:3),reduced concentration (replicate 2),h11,,206.17,3547.27,166.06,2798.18
4,calibrator (1:9),reduced concentration (replicate 1),g10,,61.94,1088.41,50.93,1146.64


In [8]:
samples_data = plex_data.loc[plex_data['patient_id'].str.contains('pa-')]
samples_data = samples_data.drop('type', axis=1)
samples_data['concentration'] = samples_data['patient_id'].apply(lambda x: x.partition(' ')[-1])
samples_data['patient_id'] = samples_data['patient_id'].apply(lambda x: x.partition(' ')[0])
samples_data = samples_data.loc[(samples_data['concentration'].str.contains('2500x|125000x|6250000x|312500000x'))]
samples_data = samples_data.sort_values(['patient_id', 'well'])
samples_data.head()

Unnamed: 0,patient_id,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml,concentration
75,pa-005,c1,,< 334375000.00,16966230308.94,2946977316.25,< 8356250000.00,312500000x (1:312500000)
41,pa-005,c2,,7872641.80,8227621.88,1487028.16,3534114.59,125000x (1:125000)
58,pa-005,d1,,19073657.78,342607174.16,77321679.93,< 167125000.00,6250000x (1:6250000)
24,pa-005,d2,,517929.76,1617228.81,28553.25,< 66850.00,2500x (1:2500)
76,pa-017,a1,,< 334375000.00,10531449090.96,2651427271.66,< 8356250000.00,312500000x (1:312500000)


In [4]:
# threshhold values for various analytes
threshholds = {'HRP2_pg_ml': 330, 'LDH_Pan_pg_ml': 10514,
               'LDH_Pv_pg_ml': 497, 'CRP_ng_ml': 9574}

In [None]:
# constant to apply to the threshhold for different dilutions
dil_constants = {'2500x': 50, '125000x': 2500,
                 '6250000x': 125000, '312500000x': 6250000}

In [5]:
# positivity threshholds for various analytes
pos_threshholds = {'HRP2_pg_ml': 2.3, 'LDH_Pan_pg_ml': 47.8,
                   'LDH_Pv_pg_ml': 75.1, 'CRP_ng_ml': np.nan}

In [6]:
# Columns = [neat_above, neat_below, neat_LLQ, neat_ULQ, NA]
# Rows = [dil_above, dil_below, dil_LLQ, dil_ULQ, NA]
HRP2_matrix = np.array([['1:50', '1:50', '1:50', '1:50', '1:50'],
                        ['1:50', 'neat', 'neat', '1:50', 'fail'],
                        ['1:50', 'neat', 'neat', 'fail', 'fail'],
                        ['1:50', '1:50', 'fail', '1:50', '1:50'],
                        ['fail', '1:50', '1:50', 'fail', 'fail']])

LDH_Pan_matrix = np.array([['1:50', 'neat', 'neat', '1:50', '1:50'],
                           ['1:50', 'neat', 'neat', '1:50', 'fail'],
                           ['1:50', 'neat', 'neat', 'fail', 'fail'],
                           ['1:50', 'neat', 'fail', '1:50', '1:50'],
                           ['fail', 'neat', 'neat', 'fail', 'fail']])

LDH_Pv_matrix = np.array([['1:50', 'neat', 'neat', '1:50', '1:50'],
                          ['1:50', 'neat', 'neat', '1:50', 'fail'],
                          ['1:50', 'neat', 'neat', 'fail', 'fail'],
                          ['1:50', 'neat', 'fail', '1:50', '1:50'],
                          ['fail', 'neat', 'neat', 'fail', 'fail']])

CRP_matrix = np.array([['1:50', 'neat', 'neat', '1:50', '1:50'],
                       ['1:50', 'neat', 'neat', '1:50', 'fail'],
                       ['1:50', 'neat', 'neat', 'fail', 'fail'],
                       ['1:50', 'neat', 'fail', '1:50', '1:50'],
                       ['fail', 'neat', 'neat', 'fail', 'fail']])

# decisions for various analytes
decisions = {'HRP2_pg_ml': HRP2_matrix, 'LDH_Pan_pg_ml': LDH_Pan_matrix,
             'LDH_Pv_pg_ml': LDH_Pv_matrix, 'CRP_ng_ml': CRP_matrix}

In [7]:
def run_compare(df, analyte_val, dil_val):
    above, below, LLQ, ULQ, NA = False, False, False, False, False
    val = df[analyte_val]
    thresh_val = dil_constants[dil_val] * threshholds[analyte_val]
    try:
        float_val = float(val)
        if math.isnan(float_val):
            NA = True
        elif float_val > thresh_val:
            above = True
        elif float_val < thresh_val:
            below = True
    except:
        if '<' in val:
            LLQ = True
        elif '>' in val:
            ULQ = True
    finally:
        return(np.array([above, below, LLQ, ULQ, NA]))

In [8]:
# generate an empty list to fill with small dfs, which will be combined
final_dfs = []
# run counts for decision on what to keep
for analyte in threshholds.keys():
# for analyte in ['HRP2_pg_ml']:
    # create partial function for generating decision vectors
    partial_compare = partial(run_compare, analyte_val = analyte)
    # generate decision vectors
    samples_data['decision_vector'] = samples_data.apply(partial_compare, axis=1)
    # pull decision matrix for given analyte
    decision_matrix = decisions[analyte]
    # generate an empty list to fill with tiny dfs, which will be combined
    tiny_dfs = []
    # iterate over patient_ids
    for i in samples_data['patient_id'].unique().tolist():
#         tiny_df = pd.DataFrame(columns=['patient_id', analyte,
#                                         '{}_pos'.format(analyte)])
        tiny_df = pd.DataFrame(columns=['patient_id', analyte,
                                        '{}_dilution'.format(analyte),
                                        '{}_well'.format(analyte)])
        sub_data = samples_data.loc[samples_data['patient_id'] == i]
        vector_25 = sub_data.loc[sub_data['concentration'].str.contains('2500x'),
                                 'decision_vector'].item()
        vector_125 = sub_data.loc[sub_data['concentration'].str.contains('125000x'),
                                  'decision_vector'].item()
        vector_625 = sub_data.loc[sub_data['concentration'].str.contains('6250000x'),
                                  'decision_vector'].item()
        vector_3125 = sub_data.loc[sub_data['concentration'].str.contains('312500000x'),
                                   'decision_vector'].item()
        decision = decision_matrix[neat_vector, dil_vector].item()
#         pos_val = 'negative'
        if decision == '1:50':
            val = sub_data.loc[sub_data['concentration'].str.contains('50x'),
                               analyte].item()
            well = sub_data.loc[sub_data['concentration'].str.contains('50x'),
                               'well'].item()
#             if val > pos_threshholds[analyte]:
#                 pos_val = 'positive'
        elif decision == 'neat':
            val = sub_data.loc[sub_data['concentration'] == '(neat)',
                               analyte].item()
            well = sub_data.loc[sub_data['concentration'] == '(neat)',
                                'well'].item()
#             if val > pos_threshholds[analyte]:
#                 pos_val = 'positive'
        elif decision == 'fail':
            val = np.nan
            well = np.nan
        else:
            raise ValueError("Unexpected decision value: {}".format(val))
#         tiny_df = tiny_df.append({'patient_id': i, analyte: val, '{}_pos': pos_val}, ignore_index=True)
        tiny_df = tiny_df.append({'patient_id': i, analyte: val,
                                  '{}_dilution'.format(analyte): decision,
                                  '{}_well'.format(analyte): well}, ignore_index=True)
        tiny_dfs.append(tiny_df)
    small_df = pd.concat(tiny_dfs)
    final_dfs.append(small_df)
output_df = reduce(lambda left, right: pd.merge(left, right, on='patient_id'), final_dfs)
output_df.head(10)

Unnamed: 0,patient_id,HRP2_pg_ml,HRP2_pg_ml_dilution,HRP2_pg_ml_well,LDH_Pan_pg_ml,LDH_Pan_pg_ml_dilution,LDH_Pan_pg_ml_well,LDH_Pv_pg_ml,LDH_Pv_pg_ml_dilution,LDH_Pv_pg_ml_well,CRP_ng_ml,CRP_ng_ml_dilution,CRP_ng_ml_well
0,pa-002,6254.92,1:50,d7,> 525700.00,1:50,d7,11330.65,1:50,d7,59.29,neat,c7
1,pa-003,11703.59,1:50,f8,249.00,neat,e8,9.22,neat,e8,4666.60,neat,e8
2,pa-004,> 16500.00,1:50,f9,120892.15,1:50,f9,8.92,neat,e9,32448.42,1:50,f9
3,pa-015,6454.22,1:50,d10,> 525700.00,1:50,d10,2043.7,1:50,d10,61946.70,1:50,d10
4,pa-016,7155.02,1:50,f1,178719.28,1:50,f1,1824.14,1:50,f1,26952.69,1:50,f1
5,pa-027,2725.30,1:50,d11,314.13,neat,c11,1190.91,1:50,d11,1546.73,neat,c11
6,pa-028,7745.05,1:50,d12,23062.54,1:50,d12,2377.62,1:50,d12,1148.75,neat,c12
7,pa-029,12462.18,1:50,f2,23.26,neat,e2,5.72,neat,e2,< 26.74,neat,e2
8,pa-030,6462.27,1:50,f3,77973.75,1:50,f3,2945.06,1:50,f3,559.11,neat,e3
9,pa-031,14212.10,1:50,f4,79.00,neat,e4,13.5,neat,e4,6272.12,neat,e4
