In [111]:
import pandas as pd
import numpy as np
import math
from functools import partial, reduce

In [112]:
dfs = []
for fname in ['other_dilutions_input', 'neat_dilution_input']:
    plex_data = pd.read_csv('C:/Users/lzoeckler/Desktop/4plex/test_data/{}.csv'.format(fname),
                            skiprows=8, names=['patient_id', 'type', 'well', 'error',
                                               'HRP2_pg_ml', 'LDH_Pan_pg_ml',
                                               'LDH_Pv_pg_ml', 'CRP_ng_ml'])
    plex_data = plex_data.applymap(lambda x: x.lower() if isinstance(x, str) else x)
    plex_data['patient_id'] = plex_data['patient_id'].fillna(method='ffill')
    dfs.append(plex_data)
combined = pd.concat(dfs)
combined.head()

Unnamed: 0,patient_id,type,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml
0,calibrator (neat),reduced concentration (replicate 1),g12,,590.00,9648.76,480.71,9535.24
1,calibrator (neat),reduced concentration (replicate 2),h12,,masked,11332.92,516.29,8460.31
2,calibrator (1:3),reduced concentration (replicate 1),g11,"mo2, mo3",187.97,masked,masked,5871.99
3,calibrator (1:3),reduced concentration (replicate 2),h11,,206.17,3547.27,166.06,2798.18
4,calibrator (1:9),reduced concentration (replicate 1),g10,,61.94,1088.41,50.93,1146.64


In [113]:
samples_data = combined.loc[combined['patient_id'].str.contains('pa-')]
samples_data = samples_data.drop('type', axis=1)
samples_data['concentration'] = samples_data['patient_id'].apply(lambda x: x.partition(' ')[-1])
samples_data['patient_id'] = samples_data['patient_id'].apply(lambda x: x.partition(' ')[0])
samples_data = samples_data.loc[(samples_data['concentration'].str.contains('neat|50x|2500x|125000x|6250000x|312500000x'))]
samples_data = samples_data.loc[~samples_data['concentration'].str.contains('low volume')]
samples_data = samples_data.loc[~samples_data['well'].isnull()]
samples_data = samples_data.sort_values(['patient_id', 'well'])
samples_data.head()

Unnamed: 0,patient_id,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml,concentration
24,pa-002,c7,,123.69,> 10514.00,> 497.04,59.29,(neat)
51,pa-002,d7,,6254.92,> 525700.00,11330.65,9543.64,50x (1:50)
25,pa-003,e8,,124.19,249.00,9.22,4666.60,(neat)
52,pa-003,f8,,11703.59,1257.65,316.83,4139.99,50x (1:50)
26,pa-004,e9,,121.96,> 10514.00,8.92,> 9574.00,(neat)


In [114]:
samples_data.tail()

Unnamed: 0,patient_id,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml,concentration
77,pa-061,h11,,13695.67,448248.71,704.35,8648.41,50x (1:50)
91,pa-062,c5,,< 334375000.00,24645339916.28,4103903859.09,14691179077.97,312500000x (1:312500000)
57,pa-062,c6,,3244396.67,14908760.48,1463276.99,7820214.47,125000x (1:125000)
74,pa-062,d5,,7227386.34,137248891.51,25411361.92,318313406.87,6250000x (1:6250000)
40,pa-062,d6,,427319.03,3912840.21,35926.84,110043.15,2500x (1:2500)


In [115]:
# threshhold values for various analytes
threshholds = {'HRP2_pg_ml': 330, 'LDH_Pan_pg_ml': 10514,
               'LDH_Pv_pg_ml': 497, 'CRP_ng_ml': 9574}

In [116]:
# constant to apply to the threshhold for different dilutions
dil_constants = {'50x': 1, '2500x': 50, '125000x': 2500,
                 '6250000x': 125000, '312500000x': 6250000}

In [117]:
# positivity threshholds for various analytes
pos_threshholds = {'HRP2_pg_ml': 2.3, 'LDH_Pan_pg_ml': 47.8,
                   'LDH_Pv_pg_ml': 75.1, 'CRP_ng_ml': np.nan}

In [118]:
# dilution sets for various dilutions
dilution_sets = {'50x': ('neat', '50x', 'fail'), '2500x': ('50x', '2500x', 'fail'),
                 '125000x': ('2500x', '125000x', 'fail'),
                 '6250000x': ('125000x', '6250000x', 'fail'),
                 '312500000x': ('6250000x', '312500000x', 'fail')} 

In [119]:
def return_decisions(low, high, fail):
    # Columns = [neat_above, neat_below, neat_LLQ, neat_ULQ, NA]
    # Rows = [dil_above, dil_below, dil_LLQ, dil_ULQ, NA]
    HRP2_matrix = np.array([[high, high, high, high, high],
                            [high, low, low, high, fail],
                            [high, low, low, fail, fail],
                            [high, high, fail, high, high],
                            [fail, high, high, fail, fail]])

    other_matrix = np.array([[high, low, low, high, high],
                               [high, low, low, high, fail],
                               [high, low, low, fail, fail],
                               [high, low, fail, high, high],
                               [fail, low, low, fail, fail]])

    # decisions for various analytes
    decisions = {'HRP2_pg_ml': HRP2_matrix, 'LDH_Pan_pg_ml': other_matrix,
                 'LDH_Pv_pg_ml': other_matrix, 'CRP_ng_ml': other_matrix}
    return(decisions)

In [120]:
def run_compare(df, analyte_val, dil_val):
    above, below, LLQ, ULQ, NA = False, False, False, False, False
    val = df[analyte_val]
    thresh_val = dil_constants[dil_val] * threshholds[analyte_val]
    try:
        float_val = float(val)
        if math.isnan(float_val):
            NA = True
        elif float_val > thresh_val:
            above = True
        elif float_val < thresh_val:
            below = True
    except ValueError:
        if '<' in val:
            LLQ = True
        elif '>' in val:
            ULQ = True
    finally:
        return(np.array([above, below, LLQ, ULQ, NA]))

In [121]:
# generate an empty list to fill with small dfs, which will be combined
final_dfs = []
# run counts for decision on what to keep
for analyte in threshholds.keys():
# for analyte in ['HRP2_pg_ml']:
    dil_dfs = []
    for max_dilution in dil_constants.keys():
        # create partial function for generating decision vectors
        partial_compare = partial(run_compare, analyte_val=analyte, dil_val=max_dilution)
        # get number of dilutions
        dilution_number = len(samples_data['concentration'].unique().tolist())
        # generate decision vectors
        samples_data['decision_vector'] = samples_data.apply(partial_compare, axis=1)
        # pull decision matrix for given analyte and concentration
        low, high, fail = dilution_sets[max_dilution]
        decisions = return_decisions(low, high, fail)
        decision_matrix = decisions[analyte]
        # generate an empty list to fill with tiny dfs, which will be combined
        tiny_dfs = []
        # iterate over patient_ids
        for i in samples_data['patient_id'].unique().tolist():
#         for i in ['pa-059']:
            tiny_df = pd.DataFrame(columns=['patient_id', analyte,
                                            '{}_dilution'.format(analyte),
                                            '{}_well'.format(analyte)])
            tiny_df['comparison'] = '{} vs {}'.format(low, high)
            tiny_df = tiny_df[['patient_id', 'comparison', '{}_dilution'.format(analyte),
                               '{}_well'.format(analyte)]]
            sub_data = samples_data.loc[samples_data['patient_id'] == i]
            if len(sub_data) == dilution_number:
                vector_low = sub_data.loc[sub_data['concentration'].str.contains(low),
                                          'decision_vector'].item()
                vector_high = sub_data.loc[sub_data['concentration'].str.contains(high),
                                           'decision_vector'].item()
                decision = decision_matrix[vector_low, vector_high].item()
                if decision in [low, high]:
                    val = sub_data.loc[sub_data['concentration'].str.contains(decision),
                                       analyte].item()
                    well = sub_data.loc[sub_data['concentration'].str.contains(decision),
                                        'well'].item()
                elif decision == fail:
                    val = np.nan
                    well = np.nan
                else:
                    raise ValueError("Unexpected decision value: {}".format(decision))
                tiny_df = tiny_df.append({'patient_id': i, 'comparison': '{} vs {}'.format(low, high),
                                          analyte: val, '{}_dilution'.format(analyte): decision,
                                          '{}_well'.format(analyte): well}, ignore_index=True)
                tiny_dfs.append(tiny_df)
            else:
                continue
        tiny_df = pd.concat(tiny_dfs)
        dil_dfs.append(tiny_df)
    dil_df = pd.concat(dil_dfs)
    final_dfs.append(dil_df)
final_df = reduce(lambda left, right: pd.merge(left, right, on=['comparison', 'patient_id']), final_dfs)

In [122]:
final_df

Unnamed: 0,patient_id,comparison,HRP2_pg_ml_dilution,HRP2_pg_ml_well,HRP2_pg_ml,LDH_Pan_pg_ml_dilution,LDH_Pan_pg_ml_well,LDH_Pan_pg_ml,LDH_Pv_pg_ml_dilution,LDH_Pv_pg_ml_well,LDH_Pv_pg_ml,CRP_ng_ml_dilution,CRP_ng_ml_well,CRP_ng_ml
0,pa-045,neat vs 50x,50x,h4,8984.03,neat,g4,373.57,50x,h4,498.19,neat,g4,356.10
1,pa-047,neat vs 50x,50x,h6,11436.86,neat,g6,31.47,neat,g6,10.56,neat,g6,165.10
2,pa-057,neat vs 50x,50x,h7,6813.35,50x,h7,> 525700.00,50x,h7,5661.44,50x,h7,16234.27
3,pa-058,neat vs 50x,50x,h8,7526.27,50x,h8,> 525700.00,50x,h8,899.46,50x,h8,26632.43
4,pa-059,neat vs 50x,50x,h9,6729.35,50x,h9,> 525700.00,50x,h9,10090.90,50x,h9,13794.46
5,pa-045,50x vs 2500x,2500x,f8,247953.38,50x,h4,2016.41,2500x,f8,26892.38,50x,h4,< 1337.00
6,pa-047,50x vs 2500x,2500x,f4,136984.47,50x,h6,1131.36,50x,h6,223.61,50x,h6,< 1337.00
7,pa-057,50x vs 2500x,2500x,f2,473308.35,2500x,f2,4089072.61,2500x,f2,32593.31,50x,h7,16234.27
8,pa-058,50x vs 2500x,2500x,d12,> 825000.00,2500x,d12,3275820.37,50x,h8,899.46,50x,h8,26632.43
9,pa-059,50x vs 2500x,2500x,d10,322389.59,2500x,d10,12289636.96,2500x,d10,44283.19,50x,h9,13794.46
