In [1]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pyodbc
import warnings
from matplotlib.lines import Line2D
import scipy.stats
import matplotlib.ticker as mtick
warnings.filterwarnings('ignore')

from IPython.display import HTML
config = dict(server='ABR-RIPLEYA-D1',
    port=      1433,
    database=  'neumodxVVLabDB',
    )

conn_str = ('SERVER={server};' +
            'Database={database};' +
            'TRUSTED_CONNECTION=yes')

conn = pyodbc.connect(
    r'DRIVER={SQL Server};' +
    conn_str.format(**config)
    )
# pd.set_option("display.max_columns", None)
# pd.set_option("display.max_rows", None)



In [2]:
colorDict  = {1:'#FF0000',#Red 1
              2:'#00B050',#Green 2
              3:'#0070C0',#`Blue 3
              4:'#7030A0',#Purple 4
              5:'#808080',#Light Grey 5
              6:'#FF6600',#Orange 6
              7:'#FFCC00',#Yellow 7
              8:'#9999FF',#Light Purple 8
              9:'#333333',#Black 9
              10:'#808000',#Goldish 10
              11:'#FF99CC',#Hot Pink 11
              12:'#003300',#Dark Green 12
            }

##Function to Get Channel Data (i.e. Green Raw, Red Normalized etc.)
def getSQLData_Channel(color, dtype, assay, start_time_filter, end_date_filter,system,assay_version,cartridge_lot_filter, cartridge_lot_filter_b, cartridge_lot_filter_c, cartridge_lot_filter_d):
    query = ("Select * from [dbo].[NeuMoDx_"+color+"_"+dtype+"_Master]"+
             "where ([Result Code] like '%"+assay+"%') and [Start Time]>'"+start_time_filter+"'"+
             " and [Start Time] < '"+end_date_filter+"' and [N500 Serial Number] like '%"+system+"%'"+
             " and [Assay Version] like '%"+assay_version+
             "%' and ([Cartridge Barcode] like '%"+cartridge_lot_filter+
             "%' OR [Cartridge Barcode] like '%"+cartridge_lot_filter_b+
             "%' OR [Cartridge Barcode] like '%"+cartridge_lot_filter_c+
             "%' OR [Cartridge Barcode] like '%"+cartridge_lot_filter_d+"%')")
    print("Query:", query)
    df = pd.read_sql(query, conn)
    return df


##Function to Get Channel Data (i.e. Green Raw, Red Normalized etc.)             
def getSQLData_COC(assay, start_time_filter, end_date_filter, system, assay_version, cartridge_lot_filter, cartridge_lot_filter_b, cartridge_lot_filter_c, cartridge_lot_filter_d):
    query = ("Select * from [dbo].[NeuMoDx_System_Master_Table]"+
             " where ([Result Code] like '%"+assay+"%') and [Start Date Time]>'"+start_time_filter+"'"+
             " and [Start Date Time] < '"+end_date_filter+"' and [N500 Serial Number] like '%"+system+"%'"+
             " and [Assay Version] like '%"+assay_version+"%' and ([Pcr Cartridge Barcode] like '%"+cartridge_lot_filter+
             "%' OR [Pcr Cartridge Barcode] like '%"+cartridge_lot_filter_b+
             "%' OR [Pcr Cartridge Barcode] like '%"+cartridge_lot_filter_c+
             "%' OR [Pcr Cartridge Barcode] like '%"+cartridge_lot_filter_d+"%')")
    print("Query:", query)
    df = pd.read_sql(query, conn)
    
    return df.set_index(['PCR Module Serial', 'Pcr Cartridge Lane'])

def flatten(t):
    return [item for sublist in t for item in sublist]
def xlfile():
    xlfile = []
    for fn in os.listdir():
        if fn[-4:] == "xlsx" and fn[:13] == "RawDataExport":
            xlfile.append(fn)
    return xlfile
def mylist():
    mylist = []
    for file in xlfile():
        df = pd.read_excel(file, 1) 
        mylist.append(df['Test Guid'].tolist())
    return flatten(mylist)

mylist = mylist()

def generateSummaryTable(data, channel):
    SummaryDataBase = data.sort_values('Start Time')

    stats = ['Ct', 'End Point Fluorescence', 'Max Peak Height', 'EPR']
    results_Stats = ['Amp','NotAmp','NoResult','IND', 'UNR', 'Sample Size']
    #SummaryDataBase['Run Number'] = np.nan
    for mod in SummaryDataBase.index.unique(0):
        SummaryDataBase.loc[mod, 'Time Difference'] = SummaryDataBase.loc[mod, 'Start Time'].diff().values / np.timedelta64(1, 's')
        SummaryDataBase.loc[mod, 'Time Difference'] = np.where(SummaryDataBase.loc[mod, 'Time Difference'].isnull(),50000,SummaryDataBase.loc[mod, 'Time Difference'])
        SummaryDataBase.loc[mod, 'New Run?'] = np.where(SummaryDataBase.loc[mod, 'Time Difference']>3600,True,False) 
        SummaryDataBase.loc[(mod, (SummaryDataBase['New Run?']==1)), 'Run Number'] = np.arange(1, len(SummaryDataBase.loc[mod, SummaryDataBase['New Run?']==1,:])+1)
        SummaryDataBase.loc[mod,['Run Number','Start Time','Time Difference','New Run?']] = SummaryDataBase.loc[mod,['Run Number','Start Time','Time Difference','New Run?']].fillna(method='ffill').values

    SummaryDataBase['NotAmp'] = np.where(SummaryDataBase['Target Result']=='TargetNotAmplified',1,0)
    SummaryDataBase['Amp'] = np.where(SummaryDataBase['Target Result']=='TargetAmplified',1,0)
    SummaryDataBase['NoResult'] = np.where(SummaryDataBase['Target Result']=='NoResult',1,0)
    SummaryDataBase['IND'] = np.where(SummaryDataBase['Target Result']=='TargetIndeterminate',1,0)
    SummaryDataBase['UNR'] = np.where(SummaryDataBase['Target Result']=='TargetUnresolved',1,0)
    SummaryDataBase['Abort'] = np.where(SummaryDataBase['Target Result']=='TargetAborted',1,0)
    SummaryDataBase['Sample Size'] = SummaryDataBase[['NotAmp','Amp','NoResult','IND','UNR']].sum(axis=1)

    SummaryDataBase.loc[:,['NotAmp','Amp','IND','UNR','NoResult','Sample Size']]



    SummaryDataBase.reset_index(inplace=True)
    SummaryDataBase.set_index(['XPCR Module Serial','Run Number'],inplace=True)
    SummaryDataBase.loc[:,stats] =  SummaryDataBase.loc[:,stats].astype(float)

    agg_dict = {}
    agg_dict['Start Time'] = ['first','last']
    for stat in results_Stats:
        agg_dict[stat] = ['sum']
    for stat in stats:
        agg_dict[stat] = ['mean','std','count']
    agg_dict

    SummaryDataBase_by_run_agg = SummaryDataBase[stats+results_Stats+['Start Time']].groupby(level=[0,1]).agg(agg_dict)



    SummaryDataBase_by_mod_agg = SummaryDataBase[stats+results_Stats+['Start Time']].groupby(level=[0]).agg(agg_dict)


    SummaryDataBase_by_mod_agg['Run Number'] = 'Combined'
    SummaryDataBase_by_mod_agg.set_index('Run Number',append=True,inplace=True)

    SummaryDataBase_agg = pd.concat([SummaryDataBase_by_run_agg,SummaryDataBase_by_mod_agg],axis=0).sort_index()

    SummaryDataBase_agg[('Ct Calls','')] = SummaryDataBase_agg[('Ct','count')]
    for param in SummaryDataBase_agg.columns.unique(0):
        if param in stats:
            SummaryDataBase_agg[(param, '%CV')] = SummaryDataBase_agg[(param, 'std')] / SummaryDataBase_agg[(param, 'mean')]
            SummaryDataBase_agg.drop([(param,'count')],axis=1,inplace=True)
    SummaryDataBase_agg.sort_index(axis=1,inplace=True)
    SummaryDataBase_agg.set_index('Ct Calls',append=True,inplace=True)
    SummaryDataBase_agg.columns = pd.MultiIndex.from_tuples([(channel,x,y) for x,y in SummaryDataBase_agg.columns])
    SummaryDataBaseagg = SummaryDataBase_agg.loc[:,channel]
    SummaryDataBaseagg[('% Amp', '')] = SummaryDataBaseagg[('Amp', 'sum')] / (SummaryDataBaseagg[('Amp', 'sum')] + SummaryDataBaseagg[('NotAmp', 'sum')])
    SummaryDataBaseagg = SummaryDataBaseagg.loc[:,['Start Time', '% Amp', 'Amp','NotAmp','NoResult', 'IND', 'UNR', 'Sample Size', 'Ct', 'End Point Fluorescence', 'Max Peak Height', 'EPR']].droplevel(2,axis=0)
    SummaryDataBaseagg.columns = pd.MultiIndex.from_tuples([('Start Time', 'first sample'),
                                                 ('Start Time', 'last sample'),
                                                 ('% AMP', ''),
                                                 ('Amp', '') ,
                                                 ('NotAmp', '') ,
                                                 ('NoResult', ''),
                                                 ('UNR', '') ,
                                                 ('IND', '') ,
                                                 ('Sample Size', ''),
                                                 ('Ct', '%CV') ,
                                                 ('Ct', 'mean') ,
                                                 ('Ct', 'std') ,
                                                 ('End Point Fluorescence', '%CV') ,
                                                 ('End Point Fluorescence', 'mean') ,
                                                 ('End Point Fluorescence', 'std'),
                                                 ('Max Peak Height', '%CV') ,
                                                 ('Max Peak Height', 'mean') ,
                                                 ('Max Peak Height', 'std') ,
                                                 ('EPR', '%CV') ,
                                                 ('EPR', 'mean') ,
                                                 ('EPR', 'std')])
    return SummaryDataBaseagg, SummaryDataBase

NormDataDict_PCR = {}
RawDataDict_PCR = {}
SecondDataDict_PCR = {}
stardate = '2021-05-13'
enddate = '2021-10-05'
assayname = 'FRS1'
##getSQLData_COC args (Result Code, Start Date, End Date, N500 Serial Number, cartridge lot 1, cartridge lot 2, cartridge lot 3, cartridge lot 4)
COC_PCR = getSQLData_COC(assayname, stardate, enddate, '', '', '', '', '', '')
##Excluding V1045
#COC_PCR = COC_PCR[(COC_PCR['Extraction Module Serial'] != 'V1045')]

##Filter by test-guid
filter1 = COC_PCR['Test Guid'].isin(mylist)

COC_PCR = COC_PCR[filter1]

##getSQLData_Channel args (Result Code, Channel, Start Date, End Date, N500 Serial Number, Assay Version, cartridge lot 1, cartridge lot 2, cartridge lot 3, cartridge lot 4)
for channel in ['Green','Yellow', 'Orange', 'Far_Red', 'Red']:
    
#     NormDataDict_PCR[channel] = getSQLData_Channel(channel,'Normalized',assayname, stardate, enddate, '', '', '', '', '', '')
    RawDataDict_PCR[channel] = getSQLData_Channel(channel,'Raw', assayname, stardate, enddate, '', '', '', '', '', '')
#     SecondDataDict_PCR[channel]= getSQLData_Channel(channel,'2nd',assayname, stardate, enddate, '', '', '', '', '', '')
    ##Excluding V1045
    #NormDataDict_PCR[channel] = NormDataDict_PCR[channel][(NormDataDict_PCR[channel]['XPCR Module Serial'] != 'V1045')]
    #RawDataDict_PCR[channel] = RawDataDict_PCR[channel][(RawDataDict_PCR[channel]['XPCR Module Serial'] != 'V1045')]
    #SecondDataDict_PCR[channel] = SecondDataDict_PCR[channel][(SecondDataDict_PCR[channel]['XPCR Module Serial'] != 'V1045')]
    ##Excluding V1045    
#     NormDataDict_PCR[channel].set_index(['XPCR Module Serial','Cartridge Lane',  'Test Guid'],inplace=True)
    RawDataDict_PCR[channel].set_index(['XPCR Module Serial','Cartridge Lane', 'Test Guid'],inplace=True)
#     SecondDataDict_PCR[channel].set_index(['XPCR Module Serial','Cartridge Lane', 'Test Guid'],inplace=True)
#     NormDataDict_PCR[channel].sort_index(inplace=True)

Query: Select * from [dbo].[NeuMoDx_System_Master_Table] where ([Result Code] like '%FRS1%') and [Start Date Time]>'2021-05-13' and [Start Date Time] < '2021-10-05' and [N500 Serial Number] like '%%' and [Assay Version] like '%%' and ([Pcr Cartridge Barcode] like '%%' OR [Pcr Cartridge Barcode] like '%%' OR [Pcr Cartridge Barcode] like '%%' OR [Pcr Cartridge Barcode] like '%%')
Query: Select * from [dbo].[NeuMoDx_Green_Raw_Master]where ([Result Code] like '%FRS1%') and [Start Time]>'2021-05-13' and [Start Time] < '2021-10-05' and [N500 Serial Number] like '%%' and [Assay Version] like '%%' and ([Cartridge Barcode] like '%%' OR [Cartridge Barcode] like '%%' OR [Cartridge Barcode] like '%%' OR [Cartridge Barcode] like '%%')
Query: Select * from [dbo].[NeuMoDx_Yellow_Raw_Master]where ([Result Code] like '%FRS1%') and [Start Time]>'2021-05-13' and [Start Time] < '2021-10-05' and [N500 Serial Number] like '%%' and [Assay Version] like '%%' and ([Cartridge Barcode] like '%%' OR [Cartridge Ba

In [3]:
##Define Lots to Plot
COC_PCR['Cartridge Lot'] = COC_PCR['Pcr Cartridge Barcode'].str[18:24]
lots = COC_PCR['Cartridge Lot'].unique().tolist()

#Define Channels to Plot
channels = ['Yellow', 'Green', 'Orange','Far_Red','Red']

#Define Targets to Include in Plots
#target_names = ['FluA', 'FluB', 'SARS-CoV-2', 'RSV', 'SPC2']

#Define Target / Channel Combinations
#targets = {'Green':'FluB',
          # 'Yellow':'FluA',
          # 'Orange':'SARS CoV-2',
          # 'Far_Red':'RSV',
          # 'Red':'SPC2'}

exclude_list = []

channelDataDict = {}
channelData = {}    
mods = ['V949']
COC_PCR = COC_PCR.loc[([x for x in COC_PCR.index.unique(0) if pd.isnull(x)==False],
                       [x for x in COC_PCR.index.unique(1) if pd.isnull(x)==False]),:]

#DF = generateLotTable(COC_PCR).join(generateSummaryTable(RawDataDict_PCR['Yellow'],'Yellow'))
channelData = {}    

channelData['Green'] = generateSummaryTable(RawDataDict_PCR['Green'], 'Green')
channelData['Yellow'] =  generateSummaryTable(RawDataDict_PCR['Yellow'], 'Yellow')
channelData['Orange'] =  generateSummaryTable(RawDataDict_PCR['Orange'], 'Orange')
channelData['Far_Red'] =  generateSummaryTable(RawDataDict_PCR['Far_Red'], 'Far_Red')
channelData['Red'] =  generateSummaryTable(RawDataDict_PCR['Red'], 'Red')

for channel in channels:

    channelData[channel][1].loc[:,'Cartridge Lot'] = channelData[channel][1].loc[:,'Cartridge Barcode'].str[18:24]

    channelplotdata = channelData[channel][1]#.join(treatment_key)
    channelplotdata = channelplotdata.loc[~channelplotdata['Test Guid'].isin(exclude_list),:]
    channelplotdata.reset_index(inplace=True)
    channelplotdata.set_index(['XPCR Module Serial', 'Run Number', 'Cartridge Lane'],inplace=True)
    
    channelDataDict[channel] = channelplotdata.set_index(['Cartridge Lot'],append=True).loc[:,['Ct', 'Blank Reading', 'Dark Reading', 'Readings 5', 'End Point Fluorescence', 'Max Peak Height', 'EPR', 'Target Result', 'Test Guid', 'Replicate Result']]
    channelDataDict[channel].columns = pd.MultiIndex.from_product([[channel],channelDataDict[channel].columns])
    
    ##Remove Entries that do not have a XPCR Module (i.e. No Result Samples)
    if np.nan in channelplotdata.index.unique(0):
        channelplotdata.drop(np.nan,inplace=True,axis=0)
        
data = pd.concat([channelDataDict[df] for df in channelDataDict]).sort_index()


In [157]:
COC_PCR['Cartridge Lot'] = COC_PCR['Pcr Cartridge Barcode'].str[18:24]
COC_PCR['Cartridge Serial'] = COC_PCR['Pcr Cartridge Barcode'].str[28:32]
COC_PCR['Extraction Plate Lot'] = COC_PCR['Capture Plate Barcode'].str[18:24]
COC_PCR['Test Strip Lot'] = COC_PCR['Test Strip NeuMoDx Barcode'].str[18:24]
COC_PCR['Buffer Lot'] = COC_PCR['Buffer Barcode'].str[18:24]
COC_PCR['Wash Lot'] = COC_PCR['Wash Reagent Barcode'].str[18:24]
COC_PCR['Release Lot'] = COC_PCR['Release Reagent Barcode'].str[18:24]


##Get Target Results from data
TargetResults = data.reset_index().set_index(('Yellow', 'Test Guid')).loc[:,(slice(None),'Target Result')]
TargetResults.columns = ['Yellow Target Result', 'Green Target Result', 'Orange Target Result', 'Far Red Target Result', 'Red Target Result']
TargetResults.replace({'TargetAmplified':'AMP',
                       'TargetNotAmplified':'NotAmp',
                       'TargetIndeterminate':'IND',
                       'TargetUnresolved':'UNR',
                       'NoResult':'NR'},inplace=True)
#TargetResults.index.names = ['Test Guid']

EPRs = data.reset_index().set_index(('Yellow', 'Test Guid')).loc[:,(slice(None),'EPR')]
EPRs.columns = ['Yellow Target EPR', 'Green Target EPR', 'Orange Target EPR', 'Far Red Target EPR', 'Red Target EPR']
EPRs.index.names = ['Test Guid']
MPHs = data.reset_index().set_index(('Yellow', 'Test Guid')).loc[:,(slice(None),'Max Peak Height')]
MPHs.columns = ['Yellow Target Max Peak Height', 'Green Target Max Peak Height', 'Orange Target Max Peak Height', 'Far Red Target Max Peak Height', 'Red Target Max Peak Height']
MPHs.index.names = ['Test Guid']

LineData = COC_PCR.reset_index().set_index('Test Guid').join(TargetResults).join(EPRs).join(MPHs).loc[:,[ 'N500 Serial Number', 'Sample ID',
                       'Start Date Time', 'Cartridge Lot', 'Buffer Lot', 'Extraction Plate Lot', 'Test Strip Lot', 'Wash Lot', 'Release Lot',  'Overall Result',
                       'Yellow Target Result', 'Yellow Target Ct', 'Yellow Target EP', 'Yellow Target EPR', 'Yellow Target Max Peak Height', 'Yellow Target Flag',
                       'Green Target Result', 'Green Target Ct', 'Green Target EP', 'Green Target EPR', 'Green Target Max Peak Height', 'Green Target Flag',
                       'Orange Target Result', 'Orange Target Ct', 'Orange Target EP', 'Orange Target EPR', 'Orange Target Max Peak Height', 'Orange Target Flag',
                       'Far Red Target Result','Far Red Target Ct', 'Far Red Target EP', 'Far Red Target EPR', 'Far Red Target Max Peak Height', 'Far Red Target Flag',
                       'Red Target Result', 'Red Target Ct', 'Red Target EP','Red Target EPR', 'Red Target Max Peak Height', 'Red Target Flag']]\
                        .sort_values(['Start Date Time'])

LineData.drop_duplicates(inplace = True)
LineData = LineData.round({'Yellow Target Ct':2,
            'Yellow Target EP':0,
            'Yellow Target Max Peak Height':0,
            'Yellow Target EPR':2,
            'Green Target Ct':2,
            'Green Target EP':0,
            'Green Target Max Peak Height':0,
            'Green Target EPR':2,
            'Orange Target Ct':2,
            'Orange Target EP':0,
            'Green Target Max Peak Height':0,
            'Green Target EPR':2,
            'Far Red Target Ct':2,
            'Far Red Target EP':0,
            'Far Red Target Max Peak Height':0,
            'Far Red Target EPR':2,
            'Red Target Ct':2,
            'Red Target EP':0,
            'Red Target Max Peak Height':0,
            'Red Target EPR':2})

flag_dict = {'1000 \(Error, User Aborted Test\)':'1000',
            '1002 \(Error, Insufficient Raw Readings\)':'1002',
            '1004 \(Informational, Fixed Baseline Used\)':'1004',
            '1005 \(Informational, End Point Fluorescence Met Failed\)':'1005',
            '1006 \(Informational, Peak Location Failed\)':'1006',
            '1012 \(Error, Outlier Removal Failed\)':'1012',
            '1013 \(Informational, Fill Check Failed\)':'1013',
            '1020 \(Informational, Peak Not Detected\)':'1020',
            '1024 \(Error, Starting Fluorescence Exceeded\)':'1024',
            '1025 \(Informational, Below EPR Threshold\)':'1025',
            '1029 \(Informational, Repeat Testing Recommended\)':'1029',
            '1031 \(Error, EPR Threshold Not Met\)':'1031',
            '1034 \(Informational, Inhibition Detected\)':'1034',
            '1035 \(Informational, Ct Below Fixed Baseline Start\)':'1035',
            '1037 \(Informational, A very early amplification was potentially detected in the samples. It may be beneficial to dilute the starting sample 1:1000 and repeat.\)':'1037',
            '1038 \(Informational, Single Point Normalization Applied\)':'1038',
            '1039 \(Informational, Overall EPR Threshold Check Failed\)':'1039',
            '1040 \(Informational, Derivative Data Filter Implemented\)':'1040',
            '2010 \(Error, System Error PCR Only\)':'2010',
            '2035 \(Error, User Shutdown\)':'2035',
            '2300 \(Error, Module Error\)':'2300',
            '2307 \(Error, Cartridge not detected in XPCR module\)':'2307',
            '2316 \(Error, Lane Error\)':'2316',
            '2610 \(Informational, Delays in sample processing\)':'2610',
            '2618 \(Error, Maximum LhpA Reschedules Attempted\)':'2618',
            '2619 \(Error, Maximum LhpB Reschedules Attempted\)':'2619',
            '2620 \(Error, Maximum LhpC Reschedules Attempted\)':'2620',
            '5075 \(Error, XPCR Module Failure\)':'5057',
            '7007 \(Error, Instrument movement error.\)':'7007',
            '7160 \(Warning, Sample aspiration failure\)':'7160',
            '7161 \(Warning, No sample detected\)':'7161',
            '7162 \(Warning, Quantity not sufficient\)':'7162',
            '7163 \(Error, Clot detected\)':'7163',
            '7340 \(Warning, Sample dispense failure\)':'7340',
            '7620 \(Warning, Cartridge dispense failure\)':'7620'}
    
for key in flag_dict:
    LineData = LineData.replace(regex=[key], value=flag_dict[key])

LineData.to_csv('Line_Com.csv', encoding='utf-8', index=False)


In [50]:
import re
##Function that returns a list of only the number codes for flags for a given channel            
def shortFlag(flag_dict, flag_col):
#     flag_dict = dict((re.escape(k),v) for k, v in flag_dict.items())
    
    for i in range(len(flag_col)): 
        if flag_col[i] != None:
            for k in flag_dict:
                flag_col[i].replace(k, flag_dict[k], inplace=True)
        else:
            flag_col[i] = "cool"
    return 

In [154]:
dfm = LineData.copy()
flags = ['Green Target Flag','Yellow Target Flag', 'Orange Target Flag', 'Far Red Target Flag', 'Red Target Flag']
for flag in flags:
    flag_dict = {'1000 \(Error, User Aborted Test\)':'1000',
                '1002 \(Error, Insufficient Raw Readings\)':'1002',
                '1004 \(Informational, Fixed Baseline Used\)':'1004',
                '1005 \(Informational, End Point Fluorescence Met Failed\)':'1005',
                '1006 \(Informational, Peak Location Failed\)':'1006',
                '1012 \(Error, Outlier Removal Failed\)':'1012',
                '1013 \(Informational, Fill Check Failed\)':'1013',
                '1020 \(Informational, Peak Not Detected\)':'1020',
                '1024 \(Error, Starting Fluorescence Exceeded\)':'1024',
                '1025 \(Informational, Below EPR Threshold\)':'1025',
                '1029 \(Informational, Repeat Testing Recommended\)':'1029',
                '1031 \(Error, EPR Threshold Not Met\)':'1031',
                '1034 \(Informational, Inhibition Detected\)':'1034',
                '1035 \(Informational, Ct Below Fixed Baseline Start\)':'1035',
                '1037 \(Informational, A very early amplification was potentially detected in the samples. It may be beneficial to dilute the starting sample 1:1000 and repeat.\)':'1037',
                '1038 \(Informational, Single Point Normalization Applied\)':'1038',
                '1039 \(Informational, Overall EPR Threshold Check Failed\)':'1039',
                '1040 \(Informational, Derivative Data Filter Implemented\)':'1040',
                '2010 \(Error, System Error PCR Only\)':'2010',
                '2035 \(Error, User Shutdown\)':'2035',
                '2300 \(Error, Module Error\)':'2300',
                '2307 \(Error, Cartridge not detected in XPCR module\)':'2307',
                '2316 \(Error, Lane Error\)':'2316',
                '2610 \(Informational, Delays in sample processing\)':'2610',
                '2618 \(Error, Maximum LhpA Reschedules Attempted\)':'2618',
                '2619 \(Error, Maximum LhpB Reschedules Attempted\)':'2619',
                '2620 \(Error, Maximum LhpC Reschedules Attempted\)':'2620',
                '5075 \(Error, XPCR Module Failure\)':'5057',
                '7007 \(Error, Instrument movement error.\)':'7007',
                '7160 \(Warning, Sample aspiration failure\)':'7160',
                '7161 \(Warning, No sample detected\)':'7161',
                '7162 \(Warning, Quantity not sufficient\)':'7162',
                '7163 \(Error, Clot detected\)':'7163',
                '7340 \(Warning, Sample dispense failure\)':'7340',
                '7620 \(Warning, Cartridge dispense failure\)':'7620'}
    
    for key in flag_dict:
        dfm = dfm.replace(regex=[key], value=flag_dict[key])
    print(dfm[flag].head(10))
    

9a4fb668-fdb3-eb11-8660-1866da4c156d    None
a44fb668-fdb3-eb11-8660-1866da4c156d    None
ae4fb668-fdb3-eb11-8660-1866da4c156d    None
b84fb668-fdb3-eb11-8660-1866da4c156d    None
c24fb668-fdb3-eb11-8660-1866da4c156d    None
cc4fb668-fdb3-eb11-8660-1866da4c156d    None
d64fb668-fdb3-eb11-8660-1866da4c156d    None
e04fb668-fdb3-eb11-8660-1866da4c156d    None
e8edbee1-fdb3-eb11-8660-1866da4c156d    None
f2edbee1-fdb3-eb11-8660-1866da4c156d    None
Name: Green Target Flag, dtype: object
9a4fb668-fdb3-eb11-8660-1866da4c156d    1020
a44fb668-fdb3-eb11-8660-1866da4c156d    1020
ae4fb668-fdb3-eb11-8660-1866da4c156d    1020
b84fb668-fdb3-eb11-8660-1866da4c156d    1020
c24fb668-fdb3-eb11-8660-1866da4c156d    1020
cc4fb668-fdb3-eb11-8660-1866da4c156d    1020
d64fb668-fdb3-eb11-8660-1866da4c156d    1020
e04fb668-fdb3-eb11-8660-1866da4c156d    1020
e8edbee1-fdb3-eb11-8660-1866da4c156d    1020
f2edbee1-fdb3-eb11-8660-1866da4c156d    1020
Name: Yellow Target Flag, dtype: object
9a4fb668-fdb3-eb11-86

In [126]:
flag_dict = {r'1000 \(Error, User Aborted Test\)':'1000',
                r'1002 \(Error, Insufficient Raw Readings\)':'1002',
                r'1004 \(Informational, Fixed Baseline Used\)':'1004',
                r'1005 \(Informational, End Point Fluorescence Met Failed\)':'1005',
                r'1006 \(Informational, Peak Location Failed\)':'1006',
                r'1012 \(Error, Outlier Removal Failed\)':'1012',
                r'1013 \(Informational, Fill Check Failed\)':'1013',
                r'1020 \(Informational, Peak Not Detected\)':'1020',
                r'1024 \(Error, Starting Fluorescence Exceeded\)':'1024',
                r'1025 \(Informational, Below EPR Threshold\)':'1025',
                r'1029 \(Informational, Repeat Testing Recommended\)':'1029',
                r'1031 \(Error, EPR Threshold Not Met\)':'1031',
                r'1034 \(Informational, Inhibition Detected\)':'1034',
                r'1035 \(Informational, Ct Below Fixed Baseline Start\)':'1035',
                r'1037 \(Informational, A very early amplification was potentially detected in the samples. It may be beneficial to dilute the starting sample 1:1000 and repeat.\)':r'1037',
                r'1038 \(Informational, Single Point Normalization Applied\)':'1038',
                r'1039 \(Informational, Overall EPR Threshold Check Failed\)':'1039',
                r'1040 \(Informational, Derivative Data Filter Implemented\)':'1040',
                r'2010 \(Error, System Error PCR Only\)':'2010',
                r'2035 \(Error, User Shutdown\)':'2035',
                r'2300 \(Error, Module Error\)':'2300',
                r'2307 \(Error, Cartridge not detected in XPCR module\)':'2307',
                r'2316 \(Error, Lane Error\)':'2316',
                r'2610 \(Informational, Delays in sample processing\)':'2610',
                r'2618 \(Error, Maximum LhpA Reschedules Attempted\)':'2618',
                r'2619 \(Error, Maximum LhpB Reschedules Attempted\)':'2619',
                r'2620 \(Error, Maximum LhpC Reschedules Attempted\)':'2620',
                r'5075 \(Error, XPCR Module Failure\)':'5057',
                r'7007 \(Error, Instrument movement error.\)':'7007',
                r'7160 \(Warning, Sample aspiration failure\)':'7160',
                r'7161 \(Warning, No sample detected\)':'7161',
                r'7162 \(Warning, Quantity not sufficient\)':'7162',
                r'7163 \(Error, Clot detected\)':'7163',
                r'7340 \(Warning, Sample dispense failure\)':'7340',
                r'7620 \(Warning, Cartridge dispense failure\)':'7620'}

In [155]:
dfm

Unnamed: 0,N500 Serial Number,Sample ID,Start Date Time,Cartridge Lot,Buffer Lot,Extraction Plate Lot,Test Strip Lot,Wash Lot,Release Lot,Overall Result,...,Far Red Target EP,Far Red Target EPR,Far Red Target Max Peak Height,Far Red Target Flag,Red Target Result,Red Target Ct,Red Target EP,Red Target EPR,Red Target Max Peak Height,Red Target Flag
9a4fb668-fdb3-eb11-8660-1866da4c156d,N000012,G30001,2021-05-13 11:10:59.927,106631,110371,109630,11240X,112142,110027,Positive,...,,,,1020,NotAmp,26.63,6295.0,1.15,,
a44fb668-fdb3-eb11-8660-1866da4c156d,N000012,G30004,2021-05-13 11:11:00.257,106631,110371,109630,11240X,112142,110027,Positive,...,,,,"1020, 1039",NotAmp,26.89,5609.0,1.12,,
ae4fb668-fdb3-eb11-8660-1866da4c156d,N000012,G30007,2021-05-13 11:11:00.307,106631,110371,109630,11240X,112142,110027,Positive,...,,,,"1020, 1039",NotAmp,27.28,3589.0,1.10,,1040
b84fb668-fdb3-eb11-8660-1866da4c156d,N000012,G30010,2021-05-13 11:11:00.350,106631,110371,109630,11240X,112142,110027,Positive,...,,,,1020,NotAmp,27.02,4055.0,1.10,,
c24fb668-fdb3-eb11-8660-1866da4c156d,N000012,H30001,2021-05-13 11:11:00.393,106631,109905,109630,11240X,112142,110027,Positive,...,,,,1020,NotAmp,26.26,4876.0,1.14,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2478f7a6-6025-ec11-85e0-5cf3709f05c3,12000067,H50013,2021-10-04 18:16:03.193,114289,112653,113380,11501Z,114869,114953,Negative,...,,,,1020,NotAmp,26.81,3893.0,1.10,,
241cb177-6125-ec11-85e0-5cf3709f05c3,12000067,H50004,2021-10-04 18:21:52.700,114289,112653,113380,11501Z,114869,114953,Positive,...,,,,"1020, 1039",NotAmp,27.07,3220.0,1.06,,
2e1cb177-6125-ec11-85e0-5cf3709f05c3,12000067,H50009,2021-10-04 18:21:52.743,114289,112653,113380,11501Z,114869,114953,Positive,...,,,,1020,NotAmp,25.77,3181.0,1.08,,
381cb177-6125-ec11-85e0-5cf3709f05c3,12000067,H50013,2021-10-04 18:21:52.777,114289,112653,113380,11501Z,114869,114953,Positive,...,,,,"1020, 1039",NotAmp,26.32,3269.0,1.08,,
