In [1]:
import pandas as pd
import numpy as np
import math
from functools import partial, reduce
import os
import re

In [3]:
dfs = []
input_path = 'C:/Users/lzoeckler/Desktop/4plex/input_data/20190610'
for fname in os.listdir(input_path):
    plex_data = pd.read_csv('{}/{}'.format(input_path, fname),
                            skiprows=8, names=['patient_id', 'type', 'well', 'error',
                                               'HRP2_pg_ml', 'LDH_Pan_pg_ml',
                                               'LDH_Pv_pg_ml', 'CRP_ng_ml'])
    plex_data = plex_data.applymap(lambda x: x.lower() if isinstance(x, str) else x)
    plex_data['patient_id'] = plex_data['patient_id'].fillna(method='ffill')
    plex_data = plex_data[~plex_data['patient_id'].isnull()]
    dfs.append(plex_data)
combined = pd.concat(dfs)
combined = combined.loc[~combined['type'].isnull()]
combined = combined.loc[~combined['type'].str.contains('pixel')]
combined.head()

Unnamed: 0,patient_id,type,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml
0,calibrator (neat),reduced concentration (replicate 1),a1,,600.41,11064.05,571.43,9423.69
1,calibrator (neat),reduced concentration (replicate 2),b1,,483.68,9974.46,424.78,11129.36
2,calibrator (1:3),reduced concentration (replicate 1),a2,,219.66,3501.87,181.6,3171.26
3,calibrator (1:3),reduced concentration (replicate 2),b2,,196.66,3507.22,149.18,3156.21
4,calibrator (1:9),reduced concentration (replicate 1),a3,,65.56,1208.19,57.82,990.08


In [4]:
def fix_concentrations(df):
    con = df['concentration'].partition(':')[2]
    con = con.partition(')')[0]
    if len(con) != 0:
        return con
    else:
        return '1'

In [5]:
samples_data = combined.loc[combined['patient_id'].str.contains('pa-')]
samples_data = samples_data.drop('type', axis=1)
samples_data['concentration'] = samples_data['patient_id'].apply(lambda x: x.partition(' ')[-1])
samples_data['patient_id'] = samples_data['patient_id'].apply(lambda x: x.partition(' ')[0])
samples_data = samples_data.loc[(samples_data['concentration'].str.contains('neat|50|2500|125000|6250000|312500000'))]
samples_data = samples_data.loc[~samples_data['concentration'].str.contains('low volume')]
samples_data = samples_data.loc[~samples_data['well'].isnull()]
samples_data['concentration'] = samples_data.apply(fix_concentrations, axis=1)
samples_data = samples_data.sort_values(['patient_id', 'well'])
samples_data.head()

Unnamed: 0,patient_id,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml,concentration
33,pa-001-14,c8,,299.48,< 14.41,2.76,7064.05,1
67,pa-001-14,d8,,4138.79,< 720.50,< 116.50,3178.09,50
29,pa-001-21,c12,,> 330.00,38.94,12.21,2413.65,1
63,pa-001-21,d12,,1377.18,1593.58,227.59,< 1337.00,50
27,pa-001-28,g2,,> 330.00,29.86,13.90,92.87,1


In [6]:
samples_data.tail()

Unnamed: 0,patient_id,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml,concentration
89,pa-183-49,f8,,14637.17,< 720.50,365.66,< 1337.00,50
26,pa-183-7,e4,,454365.94,131428.70,15859.37,< 66850.00,2500
27,pa-183-7,f4,,1151591.32,6571435.11,1244894.95,5977663.09,125000
24,pa-183-7,g6,,139.85,148.18,15.97,7604.30,1
58,pa-183-7,h6,,7770.69,2360.87,646.51,7683.81,50


In [7]:
samples_data['concentration'].unique()

array(['1', '50', '2500', '125000', '6250000', '312500000'], dtype=object)

In [8]:
# threshhold values for various analytes
threshholds = {'HRP2_pg_ml': 330, 'LDH_Pan_pg_ml': 10514,
               'LDH_Pv_pg_ml': 497, 'CRP_ng_ml': 9574}

In [9]:
# constant to apply to the threshhold for different dilutions
dil_constants = {'50': 1, '2500': 50, '125000': 2500,
                 '6250000': 125000, '312500000': 6250000}

In [10]:
# positivity threshholds for various analytes
pos_threshholds = {'HRP2_pg_ml': 2.3, 'LDH_Pan_pg_ml': 47.8,
                   'LDH_Pv_pg_ml': 75.1, 'CRP_ng_ml': np.nan}

In [11]:
# dilution sets for various dilutions
dilution_sets = {'50': ('1', '50', 'fail'), '2500': ('50', '2500', 'fail'),
                 '125000': ('2500', '125000', 'fail'),
                 '6250000': ('125000', '6250000', 'fail'),
                 '312500000': ('6250000', '312500000', 'fail')} 

In [12]:
def return_decisions(low, high, fail='fail'):
    # Columns = [neat_above, neat_below, neat_LLQ, neat_ULQ, NA]
    # Rows = [dil_above, dil_below, dil_LLQ, dil_ULQ, NA]
    HRP2_matrix = np.array([[high, high, high, high, high],
                            [high, low, low, high, fail],
                            [high, low, low, fail, fail],
                            [high, high, fail, high, high],
                            [fail, high, high, fail, fail]])

    other_matrix = np.array([[high, low, low, high, high],
                               [high, low, low, high, fail],
                               [high, low, low, fail, fail],
                               [high, low, fail, high, high],
                               [fail, low, low, fail, fail]])

    # decisions for various analytes
    decisions = {'HRP2_pg_ml': HRP2_matrix, 'LDH_Pan_pg_ml': other_matrix,
                 'LDH_Pv_pg_ml': other_matrix, 'CRP_ng_ml': other_matrix}
    return(decisions)

In [13]:
def run_compare(df, analyte_val, dil_constant):
    above, below, LLQ, ULQ, NA = False, False, False, False, False
    val = df[analyte_val]
    thresh_val = dil_constant * threshholds[analyte_val]
    try:
        float_val = float(val)
        if math.isnan(float_val):
            NA = True
        elif float_val > thresh_val:
            above = True
        elif float_val < thresh_val:
            below = True
    except ValueError:
        if '<' in val:
            LLQ = True
        elif '>' in val:
            ULQ = True
    finally:
        return(np.array([above, below, LLQ, ULQ, NA]))

In [14]:
# generate an empty list to fill with small dfs, which will be combined
analyte_dfs = []
# run counts for decision on what to keep
for analyte in threshholds.keys():
# for analyte in ['HRP2_pg_ml']:
    patient_dfs = []
    # iterate over patient_ids
    for i in samples_data['patient_id'].unique():
#     for i in ['pa-004', 'pa-005']:
        patient_data = samples_data.loc[samples_data['patient_id'] == i]
        # generate an empty list to fill with dilution dfs, which will be combined
        dil_dfs = []
        # get number of dilutions
        dilution_values = [val for val in patient_data['concentration'].unique() if val != 1]
        # set initial best decision to neat (1)
        best_decision = '1'
        # iterate over dilution values
        for max_dilution in dilution_values:
#         for max_dilution in ['50']:
            # subset to dilutions
            dil_data = patient_data.loc[patient_data['concentration'].isin([best_decision, max_dilution])]
            # create partial function for generating decision vectors
            dil_constant = (int(max_dilution) / int(best_decision)) / 50
            partial_compare = partial(run_compare, analyte_val=analyte, dil_constant=dil_constant)
            # generate decision vectors
            dil_data['decision_vector'] = dil_data.apply(partial_compare, axis=1)
            # pull decision matrix for given analyte and concentrations
            decisions = return_decisions(best_decision, max_dilution)
            decision_matrix = decisions[analyte]
            # construct empty dataframe to hold best values
            best_df = pd.DataFrame(columns=['patient_id', analyte,
                                            '{}_dilution'.format(analyte),
                                            '{}_well'.format(analyte)])
            try:

                vector_low = dil_data.loc[dil_data['concentration'] == best_decision,
                                          'decision_vector'].item()
                vector_high = dil_data.loc[dil_data['concentration'] == max_dilution,
                                           'decision_vector'].item()
                decision = decision_matrix[vector_low, vector_high].item()
                if decision in [best_decision, max_dilution]:
                    val = dil_data.loc[dil_data['concentration'] == decision,
                                       analyte].item()
                    well = dil_data.loc[dil_data['concentration'] == decision,
                                        'well'].item()
                    best_decision = decision
                elif decision == 'fail':
                    val = np.nan
                    well = np.nan
                else:
                    raise ValueError("Unexpected decision value: {}".format(decision))
                best_df = best_df.append({'patient_id': i, analyte: val,
                                          '{}_dilution'.format(analyte): decision,
                                          '{}_well'.format(analyte): well}, ignore_index=True)
                best_decision = decision
                if decision == 'fail':
                    break
            except ValueError:
                print("ValueError:", analyte, max_dilution, i)
        patient_dfs.append(best_df)
    patient_df = pd.concat(patient_dfs)
    analyte_dfs.append(patient_df)
final_df = reduce(lambda left, right: pd.merge(left, right, on='patient_id'), analyte_dfs)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


ValueError: HRP2_pg_ml 1 pa-013-28
ValueError: HRP2_pg_ml 50 pa-013-28
ValueError: HRP2_pg_ml 2500 pa-013-28
ValueError: HRP2_pg_ml 125000 pa-013-28
ValueError: HRP2_pg_ml 1 pa-064-63
ValueError: HRP2_pg_ml 50 pa-064-63
ValueError: HRP2_pg_ml 2500 pa-064-63
ValueError: HRP2_pg_ml 125000 pa-064-63
ValueError: HRP2_pg_ml 1 pa-082-7
ValueError: HRP2_pg_ml 50 pa-082-7
ValueError: HRP2_pg_ml 2500 pa-082-7
ValueError: HRP2_pg_ml 125000 pa-082-7
ValueError: HRP2_pg_ml 2500 pa-093-35
ValueError: HRP2_pg_ml 1 pa-093-35
ValueError: HRP2_pg_ml 125000 pa-093-35
ValueError: HRP2_pg_ml 50 pa-093-35
ValueError: HRP2_pg_ml 1 pa-105-28
ValueError: HRP2_pg_ml 50 pa-105-28
ValueError: HRP2_pg_ml 2500 pa-105-28
ValueError: HRP2_pg_ml 125000 pa-105-28
ValueError: HRP2_pg_ml 2500 pa-106-56
ValueError: HRP2_pg_ml 125000 pa-106-56
ValueError: LDH_Pan_pg_ml 1 pa-013-28
ValueError: LDH_Pan_pg_ml 50 pa-013-28
ValueError: LDH_Pan_pg_ml 2500 pa-013-28
ValueError: LDH_Pan_pg_ml 125000 pa-013-28
ValueError: LDH_Pan_

In [15]:
final_df.head()

Unnamed: 0,patient_id,HRP2_pg_ml,HRP2_pg_ml_dilution,HRP2_pg_ml_well,LDH_Pan_pg_ml,LDH_Pan_pg_ml_dilution,LDH_Pan_pg_ml_well,LDH_Pv_pg_ml,LDH_Pv_pg_ml_dilution,LDH_Pv_pg_ml_well,CRP_ng_ml,CRP_ng_ml_dilution,CRP_ng_ml_well
0,pa-001-14,4138.79,50,d8,< 14.41,1,c8,2.76,1,c8,7064.05,1,c8
1,pa-001-21,1377.18,50,d12,38.94,1,c12,12.21,1,c12,2413.65,1,c12
2,pa-001-28,2024.41,50,h2,29.86,1,g2,13.9,1,g2,92.87,1,g2
3,pa-001-3,10273.61,50,h6,2535.11,50,h6,733.44,50,h6,49491.76,50,h6
4,pa-001-7,> 16500.00,50,f12,179.72,1,e12,10.35,1,e12,1021.09,1,e12


In [16]:
final_df.to_csv('C:/Users/lzoeckler/Desktop/4plex/output_data/final_dilutions.csv', index=False)

In [17]:
problem_patients = ['pa-013-28', 'pa-064-63', 'pa-082-7', 'pa-093-35', 'pa-105-28', 'pa-106-56']

In [19]:
combined.loc[combined['patient_id'].str.contains('pa-013-28')]

Unnamed: 0,patient_id,type,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml
26,pa-013-28 (neat),reduced concentration,c5,,111.45,64.09,9.49,2402.83
60,pa-013-28 50x (1:50),reduced concentration,d5,,7701.74,1023.80,305.23,2194.03
28,pa-013-28 (neat),reduced concentration,e1,,123.73,49.64,3.54,1658.42
29,pa-013-28 (1:50),reduced concentration,f1,,9945.48,< 720.50,134.73,< 1337.00
30,pa-013-28 (1:2500),reduced concentration,g1,,298772.67,< 36025.00,< 5825.00,< 66850.00
31,pa-013-28 (1:125000),reduced concentration,h1,,401669.68,< 1801250.00,667708.74,< 3342500.00


In [20]:
combined.loc[combined['patient_id'].str.contains('pa-064-63')]

Unnamed: 0,patient_id,type,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml
56,pa-064-63 (neat),reduced concentration,a9,,> 330.00,22.72,11.85,27.58
57,pa-064-63 (1:50),reduced concentration,b9,,1605.35,< 720.50,< 116.50,< 1337.00
32,pa-064-63 (neat),reduced concentration,e4,,205.16,< 14.41,< 2.33,27.70
33,pa-064-63 (1:50),reduced concentration,f4,,1243.41,< 720.50,< 116.50,< 1337.00
34,pa-064-63 (1:2500),reduced concentration,g4,,< 2675.00,< 36025.00,< 5825.00,< 66850.00
35,pa-064-63 (1:125000),reduced concentration,h4,,< 133750.00,< 1801250.00,293919.79,< 3342500.00


In [23]:
sevens = combined.loc[combined['patient_id'].str.contains('pa-082-7')]
sevens.loc[~sevens['patient_id'].str.contains('77|70')]

Unnamed: 0,patient_id,type,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml
39,pa-082-7 (neat),reduced concentration,c9,,154.55,58.67,10.10,1351.83
73,pa-082-7 50x (1:50),reduced concentration,d9,,13611.88,925.23,229.35,< 1337.00
24,pa-082-7 (neat),reduced concentration,a12,,166.16,103.70,8.19,511.04
25,pa-082-7 (1:50),reduced concentration,b12,,> 16500.00,1934.25,185.24,< 1337.00
26,pa-082-7 (1:2500),reduced concentration,c12,,58199.96,< 36025.00,< 5825.00,< 66850.00
27,pa-082-7 (1:125000),reduced concentration,d12,,< 133750.00,< 1801250.00,< 291250.00,< 3342500.00


In [24]:
combined.loc[combined['patient_id'].str.contains('pa-093-35')]

Unnamed: 0,patient_id,type,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml
28,pa-093-35 (neat),reduced concentration,g11,,266.66,34.00,15.72,83.41
62,pa-093-35 50x (1:50),reduced concentration,h11,,> 16500.00,904.89,626.50,< 1337.00
36,pa-093-35 (1:2500),reduced concentration,e1,,10043.28,< 36025.00,< 5825.00,< 66850.00
37,pa-093-35 (1:125000),reduced concentration,f1,,< 133750.00,< 1801250.00,< 291250.00,12512171.12
36,pa-093-35 (neat),reduced concentration,e3,,192.06,25.44,3.03,116.52
37,pa-093-35 (1:50),reduced concentration,f3,,5372.78,< 720.50,< 116.50,< 1337.00
38,pa-093-35 (1:2500),reduced concentration,g3,,7576.26,< 36025.00,< 5825.00,< 66850.00
39,pa-093-35 (1:125000),reduced concentration,h3,,< 133750.00,< 1801250.00,400377.74,< 3342500.00


In [25]:
combined.loc[combined['patient_id'].str.contains('pa-105-28')]

Unnamed: 0,patient_id,type,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml
42,pa-105-28 (neat),reduced concentration,g2,,138.04,15.62,14.96,< 26.74
58,pa-105-28 50x (1:50),reduced concentration,h2,,> 16500.00,818.08,506.24,< 1337.00
40,pa-105-28 (neat),reduced concentration,e2,,135.10,26.92,4.69,< 26.74
41,pa-105-28 (1:50),reduced concentration,f2,,12694.87,< 720.50,< 116.50,< 1337.00
42,pa-105-28 (1:2500),reduced concentration,g2,,49795.09,< 36025.00,5878.40,< 66850.00
43,pa-105-28 (1:125000),reduced concentration,h2,,< 133750.00,< 1801250.00,400377.74,< 3342500.00


In [26]:
combined.loc[combined['patient_id'].str.contains('pa-106-56')]

Unnamed: 0,patient_id,type,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,CRP_ng_ml
66,pa-106-56 (neat),reduced concentration,a8,,24.98,< 14.41,8.08,1479.06
67,pa-106-56 (1:50),reduced concentration,b8,,< 53.50,< 720.50,< 116.50,< 1337.00
68,pa-106-56 (1:2500),reduced concentration (replicate 1),c8,,< 2675.00,< 36025.00,< 5825.00,84772.18
69,pa-106-56 (1:2500),reduced concentration (replicate 2),c9,,< 2675.00,< 36025.00,29632.73,70748.31
70,pa-106-56 (1:125000),reduced concentration (replicate 1),d8,,< 133750.00,< 1801250.00,< 291250.00,5143351.02
71,pa-106-56 (1:125000),reduced concentration (replicate 2),d9,,< 133750.00,2626978.27,1143585.91,4938431.66
