In [140]:
import pandas as pd
import numpy as np
import math
from functools import partial, reduce
import os
import re

In [141]:
dfs = []
input_path = 'C:/Users/lzoeckler/Desktop/5plex/input_data/menzies_raw'
for fname in os.listdir(input_path):
    plex_data = pd.read_csv('{}/{}'.format(input_path, fname),
                            skiprows=13, names=['patient_id', 'type', 'well', 'error',
                                               'HRP2_pg_ml', 'LDH_Pan_pg_ml',
                                               'LDH_Pv_pg_ml', 'LDH_Pf_pg_ml',
                                               'CRP_ng_ml'])
    plex_data = plex_data.applymap(lambda x: x.lower() if isinstance(x, str) else x)
    plex_data['patient_id'] = plex_data['patient_id'].fillna(method='ffill')
    plex_data = plex_data[~plex_data['patient_id'].isnull()]
#     test = plex_data.loc[plex_data['patient_id'].str.contains('pa-001')]
#     if len(test) > 1: 
#         print(fname)
    dfs.append(plex_data)
combined = pd.concat(dfs)
combined = combined.loc[~combined['patient_id'].str.contains('ctrl')]
combined = combined.loc[~combined['type'].isnull()]
combined = combined.loc[~combined['type'].str.contains('replicate')]
combined.head()

Unnamed: 0,patient_id,type,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,LDH_Pf_pg_ml,CRP_ng_ml
15,qdm 1434 2019/03/07 edta wb (neat),reduced concentration,c5,,< 0.68,41.02,28.01,< 5.08,> 38000.00
16,qdm 1434 2019/03/07 edta wb (1:20),reduced concentration,d5,,< 13.60,< 327.20,< 93.80,< 101.60,87928.67
17,qdm 1479 2019/04/10 edta plas (neat),reduced concentration,c4,,5.14,962.33,526.7,15.76,> 38000.00
18,qdm 1479 2019/04/10 edta plas (1:20),reduced concentration,d4,,< 13.60,1056.63,350.9,< 101.60,> 760000.00
19,qem 31 2010/10/19 pfp (neat),reduced concentration,c2,,6.26,30554.78,19174.42,60.81,> 38000.00


In [142]:
def fix_concentrations(df):
    con = df['concentration'].partition(':')[2]
    con = con.partition(')')[0]
    if len(con) != 0:
        return con
    else:
        return '1'

In [143]:
samples_data = combined.copy(deep=True)
samples_data = samples_data.drop('type', axis=1)
samples_data['concentration'] = samples_data['patient_id'].apply(lambda x: x.split(' ')[-1])
samples_data['patient_id'] = samples_data['patient_id'].apply(lambda x: '_'.join(x.split(' ')[:-1]).replace('/', '_'))
samples_data = samples_data.loc[(samples_data['concentration'].str.contains('neat|20'))]
samples_data = samples_data.loc[~samples_data['well'].isnull()]
samples_data['concentration'] = samples_data.apply(fix_concentrations, axis=1)
samples_data = samples_data.sort_values(['patient_id', 'concentration'])
samples_data.head()

Unnamed: 0,patient_id,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,LDH_Pf_pg_ml,CRP_ng_ml,concentration
39,kk_103_2014_04_09_hep_plas,e11,,6.52,66763.83,17747.82,81.37,> 38000.00,1
40,kk_103_2014_04_09_hep_plas,f11,,23.05,82537.55,43492.8,227.52,> 760000.00,20
42,kk_103_2014_04_09_wblamp,g8,,3.02,> 67000.00,19185.53,71.94,> 38000.00,1
43,kk_103_2014_04_09_wblamp,h8,,< 13.60,1563.69,952.99,< 101.60,46420.38,20
41,kk_107_2014_04_15_hep_plas,e12,,2.82,900.77,479.96,22.38,> 38000.00,1


In [144]:
samples_data.loc[samples_data['CRP_ng_ml'].str.contains('<')].head()

Unnamed: 0,patient_id,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,LDH_Pf_pg_ml,CRP_ng_ml,concentration
21,qeck_37_2014_10_16_hep_plas,b11,,< 13.60,< 327.20,109.94,156.6,< 185.60,20
34,qem_150_2011_03_22_pfp,d8,,< 13.60,< 327.20,< 93.80,< 101.60,< 185.60,20
90,qem_205_2011_05_06_pfp,h12,,> 56000.00,106891.68,< 93.80,145979.26,< 185.60,20


In [145]:
samples_data['concentration'].unique()

array(['1', '20'], dtype=object)

In [146]:
sample_ids = samples_data['patient_id'].unique().tolist()
sample_set = set(sample_ids)

In [147]:
# threshold values for various analytes
thresholds = {'ulq': {'HRP2_pg_ml': 2800, 'LDH_Pan_pg_ml': 67000,
                      'LDH_Pv_pg_ml': 19200, 'LDH_Pf_pg_ml': 20800,
                      'CRP_ng_ml': 38000},
              'llq': {'HRP2_pg_ml': .68, 'LDH_Pan_pg_ml': 16.36,
                      'LDH_Pv_pg_ml': 4.96, 'LDH_Pf_pg_ml': 5.08,
                      'CRP_ng_ml': 9.28}}

In [148]:
# positivity thresholds for various analytes
pos_thresholds = {'HRP2_pg_ml': 2.3, 'LDH_Pan_pg_ml': 47.8,
                   'LDH_Pv_pg_ml': 75.1, 'CRP_ng_ml': np.nan}

In [149]:
no_duplicates = samples_data.drop_duplicates(subset=['patient_id', 'concentration'], keep=False)
no_duplicates.head()

Unnamed: 0,patient_id,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,LDH_Pf_pg_ml,CRP_ng_ml,concentration
39,kk_103_2014_04_09_hep_plas,e11,,6.52,66763.83,17747.82,81.37,> 38000.00,1
40,kk_103_2014_04_09_hep_plas,f11,,23.05,82537.55,43492.8,227.52,> 760000.00,20
42,kk_103_2014_04_09_wblamp,g8,,3.02,> 67000.00,19185.53,71.94,> 38000.00,1
43,kk_103_2014_04_09_wblamp,h8,,< 13.60,1563.69,952.99,< 101.60,46420.38,20
41,kk_107_2014_04_15_hep_plas,e12,,2.82,900.77,479.96,22.38,> 38000.00,1


In [150]:
for err in no_duplicates['error'].unique():
    print(err)
    print(type(err))

nan
<class 'float'>


In [151]:
def return_decisions(low, high, fail='fail'):
#     # For HRP2 assay only. If Neat value is less than 100, 20x value is
#     # more than 10x of neat value or more than ULOQ, set "Alert"
#     # Columns = neat: [LLQ, real #, ULQ or within 20% ULQ, Alert, NA]
#     # Rows = dilution: [LLQ or within 20x LLQ, real #, ULQ or within 20% ULQ, Alert, NA]
#     hrp2_matrix = np.array([[low, low, fail, low, fail],
#                             [low, low, fail, low, fail],
#                             [fail, fail, high, alert, fail],
#                             [alert, alert, alert, alert, fail]
#                             [fail, fail, fail, fail, fail]])
    # Columns = neat: [LLQ, real #, ULQ or within 20% ULQ, NA]
    # Rows = dilution: [LLQ or within 20x LLQ, real #, ULQ or within 20% ULQ, NA]
    other_matrix = np.array([[low, low, fail, fail],
                             [low, low, high, fail],
                             [fail, fail, high, fail],
                             [fail, fail, fail, fail]])
    # decisions for various analytes
    decisions = {'HRP2_pg_ml': other_matrix, 'LDH_Pan_pg_ml': other_matrix,
                 'LDH_Pv_pg_ml': other_matrix, 'LDH_Pf_pg_ml': other_matrix,
                 'CRP_ng_ml': other_matrix}
    return(decisions)

In [152]:
def run_compare(df, analyte_val, dil_val):
    # Columns = neat: [LLQ, real #, ULQ or within 20% ULQ, NA]
    # Rows = dilution: [LLQ or within 20x LLQ, real #, ULQ or within 20% ULQ, NA]
    LLQ, real, ULQ, NA = False, False, False, False
    val = df[analyte_val]
    ulq_val = int(dil_val) * thresholds['ulq'][analyte_val]
    llq_val = int(dil_val) * thresholds['llq'][analyte_val]
    try:
        float_val = float(val)
        if math.isnan(float_val):
            NA = True
        elif (dil_val == '20') and (float_val < 20*llq_val):
            LLQ = True
        elif float_val > (.8*ulq_val):
            ULQ = True
        else:
            real = True
    except ValueError:
        if '<' in val:
            LLQ = True
        elif '>' in val:
            ULQ = True
        else:
            raise ValueError("Unexpected value: {}".format(val))
    finally:
        return(np.array([LLQ, real, ULQ, NA]))

In [168]:
analyte_dfs = []
error_pids = {}
for analyte in thresholds['ulq'].keys():
# for analyte in ['LDH_Pan_pg_ml']:
    print(analyte)
    patient_dfs = []
    for pid in no_duplicates['patient_id'].unique():
#     for pid in ['kk_103_2014_04_09_wblamp']:
        patient_data = no_duplicates.loc[no_duplicates['patient_id'] == pid]
        dilution_values = sorted([val for val in patient_data['concentration'].unique() if val != '1'], key=len)
        best_decision = '1'
        for current_dilution in dilution_values:
            best_dil_data = patient_data.loc[patient_data['concentration'].isin([best_decision])]
            current_dil_data = patient_data.loc[patient_data['concentration'].isin([current_dilution])]
#             print('best decision', best_decision)
#             print('current dilution', current_dilution)
            partial_compare_best = partial(run_compare, analyte_val=analyte, dil_val=best_decision)
#             print('partial best', partial_compare_best)
            partial_compare_current = partial(run_compare, analyte_val=analyte, dil_val=current_dilution)
#             print('partial current', partial_compare_current)
            best_dil_data['decision_vector'] = best_dil_data.apply(partial_compare_best, axis=1)
            current_dil_data['decision_vector'] = current_dil_data.apply(partial_compare_current, axis=1)
#             print(current_dil_data['decision_vector'])
            decisions = return_decisions(best_decision, current_dilution)
            decision_matrix = decisions[analyte]
#             print('decision_matrix:', '\n', decision_matrix)
            best_df = pd.DataFrame(columns=['patient_id', 'errors', analyte,
                                            '{}_dilution'.format(analyte),
                                            '{}_well'.format(analyte),
                                            '{}_neat_val'.format(analyte),
                                            '{}_20x_val'.format(analyte)])
            vector_best = best_dil_data.loc[best_dil_data['concentration'] == best_decision,
                                            'decision_vector'].item()
            vector_current = current_dil_data.loc[current_dil_data['concentration'] == current_dilution,
                                                  'decision_vector'].item()
#             print('1 vector', vector_best)
#             print('20 vector', vector_current)
            decision = decision_matrix[vector_current, vector_best].item()
#             print('final decision', decision)
            if decision in [best_decision, current_dilution]:
                if decision == best_decision:
                    dil_data = best_dil_data
                elif decision == current_dilution:
                    dil_data = current_dil_data
                val = dil_data.loc[dil_data['concentration'] == decision,
                                   analyte].item()
                well = dil_data.loc[dil_data['concentration'] == decision,
                                    'well'].item()
                error = dil_data.loc[dil_data['concentration'] == decision,
                                    'error'].item()
                val_for = patient_data.loc[patient_data['concentration'] == '1',
                                           analyte].item()
                val_20_for = patient_data.loc[patient_data['concentration'] == '20',
                                              analyte].item()
            elif decision == 'fail':
                val = 'fail'
                well = 'fail'
                error = np.nan
                val_for = patient_data.loc[patient_data['concentration'] == '1',
                                           analyte].item()
                val_20_for = patient_data.loc[patient_data['concentration'] == '20',
                                              analyte].item()
                try:
                    error_pids[pid] += ', {} failure'.format(analyte)
                except KeyError:
                    error_pids[pid] = '{} failure'.format(analyte)
            else:
                raise ValueError("Unexpected decision value: {}".format(decision))
            other_dilutions = [val for val in patient_data['concentration'].unique()]
            other_dilutions = [float(val) for val in other_dilutions if val not in ('fail', '***')]
            max_dilution = int(max(other_dilutions))
            df_decision = decision if decision not in ('fail', '***') else np.nan
            best_decision = decision
            best_df = best_df.append({'patient_id': pid, 'errors': error, analyte: val,
                                      '{}_dilution'.format(analyte): df_decision,
                                      '{}_well'.format(analyte): well,
                                      '{}_neat_val'.format(analyte): val_for,
                                      '{}_20x_val'.format(analyte): val_20_for}, ignore_index=True)
            if decision == 'fail':
                break
        patient_dfs.append(best_df)
    patient_df = pd.concat(patient_dfs)
    patient_df['errors'] = patient_df['errors'].astype('object')
    analyte_dfs.append(patient_df)
final_df = reduce(lambda left, right: pd.merge(left, right, on=['patient_id', 'errors']), analyte_dfs)

HRP2_pg_ml


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


LDH_Pan_pg_ml
LDH_Pv_pg_ml
LDH_Pf_pg_ml
CRP_ng_ml


In [169]:
# For HRP2 assay only. If Neat value is less than 100, 20x value is more than
# 10x of neat value or more than ULOQ, set "Alert"

In [170]:
alert_df = final_df.loc[~final_df['HRP2_pg_ml_neat_val'].str.contains('>')]
alert_df = alert_df.loc[~alert_df['HRP2_pg_ml_neat_val'].str.contains('<')]
alert_df = alert_df.loc[alert_df['HRP2_pg_ml_neat_val'] != 'fail']
alert_df = alert_df.loc[alert_df['HRP2_pg_ml_neat_val'].astype(float) < 100]
alert_ulq = alert_df.loc[alert_df['HRP2_pg_ml_20x_val'].str.contains('>')]
alert_10x = alert_df.loc[~alert_df['HRP2_pg_ml_20x_val'].str.contains('>')]
alert_10x = alert_10x.loc[~alert_10x['HRP2_pg_ml_20x_val'].str.contains('<')]
alert_10x = alert_10x.loc[alert_10x['HRP2_pg_ml_20x_val'].astype(float) > 
                          (alert_10x['HRP2_pg_ml_neat_val'].astype(float) * 10)]
alert_df = pd.concat([alert_ulq, alert_10x])
alert_df['HRP2_pg_ml'] = 'alert'
alert_df['HRP2_pg_ml_dilution'] = np.nan
alert_df['HRP2_pg_ml_well'] = 'alert'
alert_df.head()

Unnamed: 0,patient_id,errors,HRP2_pg_ml,HRP2_pg_ml_dilution,HRP2_pg_ml_well,HRP2_pg_ml_neat_val,HRP2_pg_ml_20x_val,LDH_Pan_pg_ml,LDH_Pan_pg_ml_dilution,LDH_Pan_pg_ml_well,...,LDH_Pf_pg_ml,LDH_Pf_pg_ml_dilution,LDH_Pf_pg_ml_well,LDH_Pf_pg_ml_neat_val,LDH_Pf_pg_ml_20x_val,CRP_ng_ml,CRP_ng_ml_dilution,CRP_ng_ml_well,CRP_ng_ml_neat_val,CRP_ng_ml_20x_val
303,qem_117_2011_02_15_pfp,,alert,,alert,3.55,> 56000.00,7343.24,1,e1,...,33.73,1,e1,33.73,2807.57,> 760000.00,20,f1,> 38000.00,> 760000.00
15,kk_43_2013_05_06_wblamp,,alert,,alert,3.38,61.47,> 1340000.00,20,f8,...,215.46,1,e8,215.46,1383.84,> 760000.00,20,f8,> 38000.00,> 760000.00
25,kk_55_2013_07_29_wblamp,,alert,,alert,3.46,34.76,> 1340000.00,20,h1,...,291.32,1,g1,291.32,1334.81,298054.3,20,h1,> 38000.00,298054.3
35,mk_47_2014_03_03_wblamp,,alert,,alert,2.4,41.18,> 1340000.00,20,f5,...,232.57,1,e5,232.57,2034.77,72739.55,20,f5,> 38000.00,72739.55
51,qdm_1446_2019_03_11_edta_wb,,alert,,alert,2.07,21.85,97997.16,20,h2,...,166.56,1,g2,166.56,269.6,31031.87,20,h2,> 38000.00,31031.87


In [171]:
alert_patients = alert_df['patient_id'].tolist()
final_df = final_df.loc[~final_df['patient_id'].isin(alert_patients)]
final_df = pd.concat([alert_df, final_df])
final_df.head()

Unnamed: 0,patient_id,errors,HRP2_pg_ml,HRP2_pg_ml_dilution,HRP2_pg_ml_well,HRP2_pg_ml_neat_val,HRP2_pg_ml_20x_val,LDH_Pan_pg_ml,LDH_Pan_pg_ml_dilution,LDH_Pan_pg_ml_well,...,LDH_Pf_pg_ml,LDH_Pf_pg_ml_dilution,LDH_Pf_pg_ml_well,LDH_Pf_pg_ml_neat_val,LDH_Pf_pg_ml_20x_val,CRP_ng_ml,CRP_ng_ml_dilution,CRP_ng_ml_well,CRP_ng_ml_neat_val,CRP_ng_ml_20x_val
303,qem_117_2011_02_15_pfp,,alert,,alert,3.55,> 56000.00,7343.24,1,e1,...,33.73,1,e1,33.73,2807.57,> 760000.00,20,f1,> 38000.00,> 760000.00
15,kk_43_2013_05_06_wblamp,,alert,,alert,3.38,61.47,> 1340000.00,20,f8,...,215.46,1,e8,215.46,1383.84,> 760000.00,20,f8,> 38000.00,> 760000.00
25,kk_55_2013_07_29_wblamp,,alert,,alert,3.46,34.76,> 1340000.00,20,h1,...,291.32,1,g1,291.32,1334.81,298054.3,20,h1,> 38000.00,298054.3
35,mk_47_2014_03_03_wblamp,,alert,,alert,2.4,41.18,> 1340000.00,20,f5,...,232.57,1,e5,232.57,2034.77,72739.55,20,f5,> 38000.00,72739.55
51,qdm_1446_2019_03_11_edta_wb,,alert,,alert,2.07,21.85,97997.16,20,h2,...,166.56,1,g2,166.56,269.6,31031.87,20,h2,> 38000.00,31031.87


In [172]:
#             elif decision == '***':
#                 val = '***'
#                 val_for = dil_data.loc[dil_data['concentration'] == '1',
#                                    analyte].item()
#                 val_per_ULQ = 100 * (float(val_for) / (thresholds[analyte]))
#                 val_20_for = dil_data.loc[dil_data['concentration'] == '20',
#                                    analyte].item()
#                 val_20_per_ULQ = 100 * (float(val_20_for) / (20 * thresholds[analyte]))
#                 well = dil_data.loc[dil_data['concentration'] == '1',
#                                     'well'].item()
#                 error = np.nan
#                 error_pids[pid] = 'ALERT'

#             if decision == '***':
#                 break

In [173]:
super_final = final_df.copy(deep=True)
for pid in error_pids.keys():
    error = error_pids[pid]
    pid_df = final_df.loc[final_df['patient_id'] == pid]
    pid_df['errors'] = pid_df['errors'].apply(lambda x: error if np.isnan(x) else x + ' ' + error)
    if len(pid_df) > 0:
        super_final = super_final.loc[super_final['patient_id'] != pid]
        super_final = super_final.append(pid_df)
print(len(super_final))
super_final.head()

504


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


Unnamed: 0,patient_id,errors,HRP2_pg_ml,HRP2_pg_ml_dilution,HRP2_pg_ml_well,HRP2_pg_ml_neat_val,HRP2_pg_ml_20x_val,LDH_Pan_pg_ml,LDH_Pan_pg_ml_dilution,LDH_Pan_pg_ml_well,...,LDH_Pf_pg_ml,LDH_Pf_pg_ml_dilution,LDH_Pf_pg_ml_well,LDH_Pf_pg_ml_neat_val,LDH_Pf_pg_ml_20x_val,CRP_ng_ml,CRP_ng_ml_dilution,CRP_ng_ml_well,CRP_ng_ml_neat_val,CRP_ng_ml_20x_val
15,kk_43_2013_05_06_wblamp,,alert,,alert,3.38,61.47,> 1340000.00,20,f8,...,215.46,1,e8,215.46,1383.84,> 760000.00,20,f8,> 38000.00,> 760000.00
25,kk_55_2013_07_29_wblamp,,alert,,alert,3.46,34.76,> 1340000.00,20,h1,...,291.32,1,g1,291.32,1334.81,298054.3,20,h1,> 38000.00,298054.3
35,mk_47_2014_03_03_wblamp,,alert,,alert,2.4,41.18,> 1340000.00,20,f5,...,232.57,1,e5,232.57,2034.77,72739.55,20,f5,> 38000.00,72739.55
51,qdm_1446_2019_03_11_edta_wb,,alert,,alert,2.07,21.85,97997.16,20,h2,...,166.56,1,g2,166.56,269.6,31031.87,20,h2,> 38000.00,31031.87
52,qdm_1446_2019_03_12_edta_wb,,alert,,alert,0.75,19.21,294.91,1,g3,...,10.21,1,g3,10.21,< 101.60,126693.78,20,h3,> 38000.00,126693.78


In [174]:
super_final['errors'].unique()

array([nan, 'HRP2_pg_ml failure',
       'HRP2_pg_ml failure, LDH_Pf_pg_ml failure',
       'HRP2_pg_ml failure, LDH_Pan_pg_ml failure, LDH_Pv_pg_ml failure',
       'HRP2_pg_ml failure, LDH_Pv_pg_ml failure',
       'LDH_Pan_pg_ml failure, LDH_Pv_pg_ml failure', 'CRP_ng_ml failure'],
      dtype=object)

In [175]:
# final_ids = super_final['patient_id'].unique().tolist()
# final_set = set(final_ids)
# assert final_set == sample_set, print(sample_set - final_set)

In [176]:
def split_time(df, run):
    if run:
        sub = df['patient_id'].split('_')
        try:
            time = '_'.join(sub[2:])
            return time
        except IndexError:
            return 0
    else: raise KeyError('BUSTOOOO')

In [177]:
def remove_time(df):
    patient = df['patient_id'].split('_')
    return '_'.join(patient[0:2])

In [178]:
time_df = super_final.copy(deep=True)
# time_df['date'] = time_df.apply(split_time, axis=1, run=True)
# time_df['patient_id'] = time_df.apply(remove_time, axis=1)
time_df.sort_values(['patient_id'], inplace=True)
time_df.set_index(['patient_id'], inplace=True)
time_df.head()

Unnamed: 0_level_0,errors,HRP2_pg_ml,HRP2_pg_ml_dilution,HRP2_pg_ml_well,HRP2_pg_ml_neat_val,HRP2_pg_ml_20x_val,LDH_Pan_pg_ml,LDH_Pan_pg_ml_dilution,LDH_Pan_pg_ml_well,LDH_Pan_pg_ml_neat_val,...,LDH_Pf_pg_ml,LDH_Pf_pg_ml_dilution,LDH_Pf_pg_ml_well,LDH_Pf_pg_ml_neat_val,LDH_Pf_pg_ml_20x_val,CRP_ng_ml,CRP_ng_ml_dilution,CRP_ng_ml_well,CRP_ng_ml_neat_val,CRP_ng_ml_20x_val
patient_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
kk_103_2014_04_09_hep_plas,,6.52,1,e11,6.52,23.05,82537.55,20.0,f11,66763.83,...,81.37,1,e11,81.37,227.52,> 760000.00,20,f11,> 38000.00,> 760000.00
kk_103_2014_04_09_wblamp,"LDH_Pan_pg_ml failure, LDH_Pv_pg_ml failure",3.02,1,g8,3.02,< 13.60,fail,,fail,> 67000.00,...,71.94,1,g8,71.94,< 101.60,46420.38,20,h8,> 38000.00,46420.38
kk_107_2014_04_15_hep_plas,,2.82,1,e12,2.82,13.99,900.77,1.0,e12,900.77,...,22.38,1,e12,22.38,156.56,67851.5,20,f12,> 38000.00,67851.5
kk_107_2014_04_15_wblamp,,2.12,1,g9,2.12,< 13.60,1563.88,1.0,g9,1563.88,...,10.7,1,g9,10.7,385.06,48678.79,20,h9,> 38000.00,48678.79
kk_113_2014_05_12_hep_plas,,0.93,1,c11,0.93,< 13.60,218.1,1.0,c11,218.1,...,8.51,1,c11,8.51,142.87,85202.89,20,d11,> 38000.00,85202.89


In [179]:
time_df.to_csv('C:/Users/lzoeckler/Desktop/newcomparefunc_dilutions.csv')

In [69]:
partial_format = samples_data.copy(deep=True)
# partial_format['time_point_days'] = partial_format.apply(split_time, axis=1)
# partial_format['patient_id'] = partial_format.apply(remove_time, axis=1)
# partial_format = partial_format.merge(add_info, how='left', on=['patient_id', 'time_point_days'])
partial_format.sort_values(['patient_id'], inplace=True)
partial_format.set_index(['patient_id'], inplace=True)
partial_format.head()

Unnamed: 0_level_0,well,error,HRP2_pg_ml,LDH_Pan_pg_ml,LDH_Pv_pg_ml,LDH_Pf_pg_ml,CRP_ng_ml,concentration
patient_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
kk_103_2014_04_09_hep_plas,e11,,6.52,66763.83,17747.82,81.37,> 38000.00,1
kk_103_2014_04_09_hep_plas,f11,,23.05,82537.55,43492.8,227.52,> 760000.00,20
kk_103_2014_04_09_wblamp,g8,,3.02,> 67000.00,19185.53,71.94,> 38000.00,1
kk_103_2014_04_09_wblamp,h8,,< 13.60,1563.69,952.99,< 101.60,46420.38,20
kk_107_2014_04_15_hep_plas,e12,,2.82,900.77,479.96,22.38,> 38000.00,1


In [71]:
partial_format.to_csv('C:/Users/lzoeckler/Desktop/partially_formatted.csv')