In [1]:
######################################
#                                    #
# antimicrobial_imputation_TOH.ipynb #
# Created 2024-05-19                 #
# Akhil Garg, akhil@akhilgarg.ca     #
#                                    #
######################################

import pandas as pd
#pd.options.mode.chained_assignment = None  # default='warn'

# Data input and processing

In [2]:
# Changes made to original Excel sheet:
    # Added a "FINAL" suffix to columns. Some had trailing spaces to remove
    # Converted _PRIOR suffix to _PGNO
    # Changed GN_PRIOR_POSITIVE to PriorGNOrg
    # Renamed some antibiotics: 
        # AMOXICILLIN-CLAVULANIC ACID, CEFTOLOZANE / TAZOBACTAM, PIPERACILLIN / TAZOBACTAM, TRIMETHOPRIM/SULFA
    # Added MOXIFLOXACIN_FINAL and MOXIFLOXACIN_PGNO blank columns
    # Renamed CULTURE_RESULT to FINAL

data = pd.read_excel('2024-05-20 AMR_data_TOH.xlsx')
data

Unnamed: 0,AgeCat,Sex,CULTURE_DESCRIPTION,FINAL,ADMIT_SVC,Acquisition,RecentHospitalization,ClinicalESBL,PriorAMG,PriorCarbapenem,...,CIPROFLOXACIN_PGNO,ERTAPENEM_PGNO,GENTAMICIN_PGNO,LEVOFLOXACIN_PGNO,MOXIFLOXACIN_PGNO,MEROPENEM_PGNO,NITROFURANTOIN_PGNO,PIPERACILLIN / TAZOBACTAM_PGNO,TRIMETHOPRIM/SULFA_PGNO,TOBRAMYCIN_PGNO
0,85-90,Female,URINE QUANTITATIVE CULTURE,Citrobacter freundii complex,Surgical,Community,0,0,0,0,...,,,,,,,,,,
1,85-90,Female,ROUTINE CULTURE (AEROBIC SWAB),Pseudomonas aeruginosa,Medical,Community,1,0,0,0,...,,,,,,,,,,
2,<40,Female,URINE QUANTITATIVE CULTURE,Escherichia coli,Surgical,Community,0,0,0,0,...,,,,,,,,,,
3,>90,Male,URINE QUANTITATIVE CULTURE,Klebsiella pneumoniae,Medical,Community,1,0,0,0,...,,,,,,,,,,
4,65-70,Male,BLOOD CULTURE,Klebsiella (Enterobacter) aerogenes,Surgical,Community,0,0,0,0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1563,70-75,Male,BLOOD CULTURE,Klebsiella pneumoniae,Medical,Community,0,0,0,0,...,,,,,,,,,,
1564,70-75,Male,BLOOD CULTURE,Klebsiella pneumoniae,Medical,Community,0,0,0,0,...,,,,,,,,,,
1565,55-60,Male,AEROBIC/ANAEROBIC CULTURE,Enterobacter cloacae complex,Surgical,Hospital,1,0,0,0,...,Susceptible,,Susceptible,,,,,Susceptible,Susceptible,Susceptible
1566,70-75,Male,URINE QUANTITATIVE CULTURE,Proteus mirabilis,Medical,Community,0,0,0,0,...,,,,,,,,,,


In [3]:
# Strip whitespace out of column names
data = data.rename(columns=lambda x: x.strip())

# Change Female to F and Male to M
data = data.replace({'Female':'F','Male':'M'})

# Change capitalization of Susceptible/Intermediate/Resistant
data.loc[:,'AMIKACIN_FINAL':'TOBRAMYCIN_PGNO'] = data.loc[:,'AMIKACIN_FINAL':'TOBRAMYCIN_PGNO'].map(
    lambda s: s.upper() if type(s)==str else s)

# Change blanks in PriorGNOrg to zeros
data['PriorGNOrg'] = data['PriorGNOrg'].fillna(0)

# Imputation

In [4]:
# Convert all INTERMEDIATE isolates to RESISTANT
data = data.replace('INTERMEDIATE','RESISTANT')

In [5]:
# Create blank columns that can be filled with imputed data

# data.loc[:,['Penicillin_PGNO_imp','Amoxiclav_PGNO_imp','Ampicillin_PGNO_imp','Cloxacillin_PGNO_imp',
#             'Piptaz_PGNO_imp',
#             'Cefazolin_PGNO_imp','Ceftriaxone_PGNO_imp','Ceftazidime_PGNO_imp',
#             'Ertapenem_PGNO_imp','Meropenem_PGNO_imp',
#             'Ciprofloxacin_PGNO_imp','Levofloxacin_PGNO_imp','Moxifloxacin_PGNO_imp',
#             'Daptomycin_PGNO_imp','Vancomycin_PGNO_imp','Linezolid_PGNO_imp',
#             'Amikacin_PGNO_imp', 'Tobramycin_PGNO_imp','Gentamicin_PGNO_imp',
#             'Clindamycin_PGNO_imp','Doxycycline_PGNO_imp',
#             'Erythromycin_PGNO_imp', 'TMPSMX_PGNO_imp']] = 'no_data'

data.loc[:,['Penicillin_FINAL_imp','Amoxiclav_FINAL_imp','Ampicillin_FINAL_imp','Cloxacillin_FINAL_imp',
            'Piptaz_FINAL_imp',
            'Cefazolin_FINAL_imp','Ceftriaxone_FINAL_imp','Ceftazidime_FINAL_imp',
            'Ertapenem_FINAL_imp','Meropenem_FINAL_imp',
            'Ciprofloxacin_FINAL_imp','Levofloxacin_FINAL_imp','Moxifloxacin_FINAL_imp',
            'Daptomycin_FINAL_imp','Vancomycin_FINAL_imp','Linezolid_FINAL_imp',
            'Amikacin_FINAL_imp', 'Tobramycin_FINAL_imp','Gentamicin_FINAL_imp',
            'Clindamycin_FINAL_imp','Doxycycline_FINAL_imp',
            'Erythromycin_FINAL_imp', 'TMPSMX_FINAL_imp']] = 'no_data'
data

Unnamed: 0,AgeCat,Sex,CULTURE_DESCRIPTION,FINAL,ADMIT_SVC,Acquisition,RecentHospitalization,ClinicalESBL,PriorAMG,PriorCarbapenem,...,Daptomycin_FINAL_imp,Vancomycin_FINAL_imp,Linezolid_FINAL_imp,Amikacin_FINAL_imp,Tobramycin_FINAL_imp,Gentamicin_FINAL_imp,Clindamycin_FINAL_imp,Doxycycline_FINAL_imp,Erythromycin_FINAL_imp,TMPSMX_FINAL_imp
0,85-90,F,URINE QUANTITATIVE CULTURE,Citrobacter freundii complex,Surgical,Community,0,0,0,0,...,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data
1,85-90,F,ROUTINE CULTURE (AEROBIC SWAB),Pseudomonas aeruginosa,Medical,Community,1,0,0,0,...,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data
2,<40,F,URINE QUANTITATIVE CULTURE,Escherichia coli,Surgical,Community,0,0,0,0,...,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data
3,>90,M,URINE QUANTITATIVE CULTURE,Klebsiella pneumoniae,Medical,Community,1,0,0,0,...,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data
4,65-70,M,BLOOD CULTURE,Klebsiella (Enterobacter) aerogenes,Surgical,Community,0,0,0,0,...,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1563,70-75,M,BLOOD CULTURE,Klebsiella pneumoniae,Medical,Community,0,0,0,0,...,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data
1564,70-75,M,BLOOD CULTURE,Klebsiella pneumoniae,Medical,Community,0,0,0,0,...,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data
1565,55-60,M,AEROBIC/ANAEROBIC CULTURE,Enterobacter cloacae complex,Surgical,Hospital,1,0,0,0,...,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data
1566,70-75,M,URINE QUANTITATIVE CULTURE,Proteus mirabilis,Medical,Community,0,0,0,0,...,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data,no_data


In [6]:
antibiotic_list = ['Cefazolin','Ceftriaxone','Ceftazidime','Piptaz','Meropenem','Ciprofloxacin','Tobramycin','TMPSMX']


# For TOH data, there is no imputation of prior gram negative organisms, so copy these from the original columns
data['Meropenem_PGNO_imp']     = data['MEROPENEM_PGNO']                .fillna('no_data')
data['Piptaz_PGNO_imp']        = data['PIPERACILLIN / TAZOBACTAM_PGNO'].fillna('no_data')
data['Ceftriaxone_PGNO_imp']   = data['CEFTRIAXONE_PGNO']              .fillna('no_data')
data['Ceftazidime_PGNO_imp']   = data['CEFTAZIDIME_PGNO']              .fillna('no_data')
data['Cefazolin_PGNO_imp']     = data['CEFAZOLIN_PGNO']                .fillna('no_data')
data['Ciprofloxacin_PGNO_imp'] = data['CIPROFLOXACIN_PGNO']            .fillna('no_data')
data['Tobramycin_PGNO_imp']    = data['TOBRAMYCIN_PGNO']               .fillna('no_data')
data['TMPSMX_PGNO_imp']        = data['TRIMETHOPRIM/SULFA_PGNO']       .fillna('no_data')

In [7]:
data['Meropenem_PGNO_imp'].value_counts()

Meropenem_PGNO_imp
no_data        1484
SUSCEPTIBLE      79
RESISTANT         5
Name: count, dtype: int64

In [8]:
# Imputation rules for penicillin

drug = 'Penicillin'
for suffix in ['_PGNO', '_FINAL']:

    # For the TOH dataset, there is no imputation for the previous gram negative organism
    # And gram positives are ignored entirely
    if suffix == '_PGNO' : 
        organism = 'PriorGNOrg'
        continue
    elif suffix == '_FINAL': organism = 'FINAL'
    
    #Imputation of various organisms
    data.loc[(data[organism] == 'Escherichia coli'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Stenotrophomonas maltophilia'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Bacteroides fragilis'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Klebsiella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Pseudomonas'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Proteus'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Enterobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Morganella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Citrobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Serratia'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Acinetobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Pseudomonas'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Candida'),drug+suffix+'_imp'] = 'RESISTANT'

    data.loc[(data[organism] == 'Enterococcus faecium'),drug+suffix+'_imp'] = 'RESISTANT'

    # Try-except clause in case the antibiotic was not tested
    try:
        data.loc[(data[organism].str.startswith('Staphylococcus')) & 
                 (data[['CEFAZOLIN'+suffix,'CLOXACILLIN'+suffix,'AMPICILLIN'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT'
    except KeyError: pass

    data.loc[data[organism].str.startswith('Streptococcus'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'    
    data.loc[(data[organism] == 'Streptococcus pneumoniae') & 
             (data[['CEFTRIAXONE'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'    
    data.loc[(data[organism].str.contains('viridans')) &
             (data[['CEFTRIAXONE'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'    

    data.loc[(data[organism].str.startswith('Actinomyces')) &
             (data[['AMPICILLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'    

    
    # Final imputation: if the lab tests for the sample, this should take precedence
    data.loc[(data['PENICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data['PENICILLIN'+suffix] == 'INTERMEDIATE'),
             drug+suffix+'_imp'] = 'INTERMEDIATE'
    data.loc[(data['PENICILLIN'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'

In [9]:
# Imputation rules for cloxacillin

drug = 'Cloxacillin'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'

    # Imputation of various organisms
    data.loc[(data[organism] == 'Escherichia coli'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Citrobacter freundii'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Listeria monocytogenes'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Stenotrophomonas maltophilia'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Klebsiella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Pseudomonas'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Proteus'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Enterobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Morganella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Acinetobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Bacteroides'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Enterococcus'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Candida'),drug+suffix+'_imp'] = 'RESISTANT'
    
    # Staph
    data.loc[(data[organism].str.startswith('Staphylococcus')) & 
             (data[['CEFAZOLIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism].str.startswith('Staphylococcus')) & 
             (data[['CEFAZOLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    
    # Strep
    data.loc[(data[organism].str.startswith('Streptococcus')) & 
             (data['PENICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.contains('viridans',case=False,regex=False)) & 
             (data['PENICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.contains('viridans',case=False,regex=False)) & 
             (data['CEFTRIAXONE'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Streptococcus agalactiae'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Streptococcus pyogenes'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'    

    # Final imputation: if the lab tests for the sample, this should take precedence
    try:
        data.loc[(data['CLOXACILLIN'+suffix] == 'SUSCEPTIBLE'),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
        data.loc[(data['CLOXACILLIN'+suffix] == 'INTERMEDIATE'),
                 drug+suffix+'_imp'] = 'INTERMEDIATE'
        data.loc[(data['CLOXACILLIN'+suffix] == 'RESISTANT'),
                 drug+suffix+'_imp'] = 'RESISTANT'
    except KeyError: pass

In [10]:
# Imputation rules for ampicillin

drug = 'Ampicillin'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'

    # Imputation of various organisms
    
    # E. coli
    try:
        data.loc[(data[organism] == 'Escherichia coli') & 
                 (data[['AMOXICILLIN/CLAVULANIC ACID'+suffix,'PIPERACILLIN / TAZOBACTAM'+suffix,
                        'CEFAZOLIN'+suffix,'CEFTRIAXONE'+suffix,'CEFTAZIDIME'+suffix,
                        'MEROPENEM'+suffix,'ERTAPENEM'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT'   
    except KeyError: pass
        
    # Klebsiella
    data.loc[data[organism].str.startswith('Klebsiella'),drug+suffix+'_imp'] = 'RESISTANT'
    
    # Proteus
    try:
        data.loc[(data[organism].str.startswith('Proteus')) & 
                 (data[['AMOXICILLIN/CLAVULANIC ACID'+suffix,'PIPERACILLIN / TAZOBACTAM'+suffix,
                        'CEFAZOLIN'+suffix,'CEFTRIAXONE'+suffix,'CEFTAZIDIME'+suffix,
                        'MEROPENEM'+suffix,'ERTAPENEM'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT' 
    except KeyError: pass
        
    data.loc[(data[organism] == 'Proteus mirabilis') & 
             (data[['CEFAZOLIN'+suffix,'CEFTRIAXONE'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'  
    data.loc[(data[organism] == 'Proteus vulgaris'),drug+suffix+'_imp'] = 'RESISTANT'
    
    # Intrinsically resistant gram negatives
    # ampC organisms
    data.loc[data[organism].str.startswith('Citrobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Enterobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Morganella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Serratia'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Providencia'),drug+suffix+'_imp'] = 'RESISTANT'
    
    # others
    data.loc[(data[organism] == 'Pseudomonas aeruginosa'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Bacteroides fragilis'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Stenotrophomonas maltophilia'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Acinetobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    
    # Gram positives
    data.loc[(data[organism] == 'Listeria monocytogenes'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    
    # Staph
    try:
        data.loc[(data[organism].str.startswith('Staphylococcus')) & 
                 (data[['PENICILLIN'+suffix,'CEFAZOLIN'+suffix,'CLOXACILLIN'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT'
        data.loc[(data[organism].str.startswith('Staphylococcus')) & 
                 (data[['PENICILLIN'+suffix,'CEFAZOLIN'+suffix,'CLOXACILLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    except KeyError: pass
        
    # Strep
    data.loc[data[organism].str.startswith('Streptococcus'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Streptococcus pneumoniae') & 
             (data['PENICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'    
    data.loc[(data[organism] == 'Streptococcus pneumoniae') & 
             (data['PENICILLIN'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism].str.contains('viridans',case=False,regex=False)) & 
             (data['PENICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.contains('viridans',case=False,regex=False)) & 
             (data['PENICILLIN'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Streptococcus agalactiae'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Streptococcus pyogenes'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    
    # Actinomyces
    data.loc[(data[organism].str.startswith('Actinomyces')) & 
             (data['PENICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'     
    data.loc[(data[organism].str.startswith('Actinomyces')) & 
             (data['PENICILLIN'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    
    # Candida
    data.loc[(data[organism].str.startswith('Candida')),drug+suffix+'_imp'] = 'RESISTANT'
    
    # Final imputation: if the lab tests for the sample, this should take precedence    
    data.loc[(data['AMPICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data['AMPICILLIN'+suffix] == 'INTERMEDIATE'),
             drug+suffix+'_imp'] = 'INTERMEDIATE'
    data.loc[(data['AMPICILLIN'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'

In [11]:
# Imputation rules for amoxicillin-clavulanic acid

drug = 'Amoxiclav'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'
    
    
    # E. coli
    data.loc[(data[organism] == 'Escherichia coli') & 
             (data['AMPICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'

    try:
        data.loc[(data[organism] == 'Escherichia coli') & 
                 (data[['CEFTRIAXONE'+suffix,'CEFTAZIDIME'+suffix,'MEROPENEM'+suffix,'ERTAPENEM'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT'
    except KeyError: pass
    
    # K. pneumoniae
    try:
        data.loc[(data[organism] == 'Klebsiella pneumoniae') & 
                 (data[['CEFTRIAXONE'+suffix,'CEFTAZIDIME'+suffix,'MEROPENEM'+suffix,'ERTAPENEM'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT'
    except KeyError: pass

    # Proteus
    data.loc[(data[organism].str.startswith('Proteus')) & 
             (data['AMPICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    try:
        data.loc[(data[organism] == 'Proteus mirabilis') & 
                 (data[['CEFTRIAXONE'+suffix,'CEFTAZIDIME'+suffix,'MEROPENEM'+suffix,'ERTAPENEM'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT'
    except KeyError: pass
    data.loc[data[organism] == 'Proteus vulgaris',drug+suffix+'_imp'] = 'RESISTANT'

    # ampC organisms
    data.loc[data[organism].str.startswith('Citrobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Enterobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Morganella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Serratia'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Providencia'),drug+suffix+'_imp'] = 'RESISTANT'

    # Pseudomonas
    data.loc[data[organism] == 'Pseudomonas aeruginosa',drug+suffix+'_imp'] = 'RESISTANT'

    # Yersinia
    data.loc[data[organism].str.startswith('Yersinia'),drug+suffix+'_imp'] = 'RESISTANT'

    # Enterococcus
    data.loc[(data[organism].str.startswith('Enterococcus')) & 
             (data['AMPICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'

    # Staph
    try:
        data.loc[(data[organism].str.startswith('Staphylococcus')) & 
                 (data[['PENICILLIN'+suffix,'CEFAZOLIN'+suffix,'CLOXACILLIN'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT'
        data.loc[(data[organism].str.startswith('Staphylococcus')) & 
                 (data[['PENICILLIN'+suffix,'CEFAZOLIN'+suffix,'CLOXACILLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    except KeyError: pass
        
    # Strep
    data.loc[data[organism].str.startswith('Streptococcus'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Streptococcus pneumoniae') & 
             (data[['PENICILLIN'+suffix,'AMPICILLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.contains('viridans',case=False,regex=False)) & 
             (data['PENICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.contains('viridans',case=False,regex=False)) & 
             (data['CEFTRIAXONE'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Streptococcus agalactiae'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Streptococcus pyogenes'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'

    # Candida
    data.loc[(data[organism].str.startswith('Candida')),drug+suffix+'_imp'] = 'RESISTANT'

    # Stenotrophomonas
    data.loc[(data[organism] == 'Stenotrophomonas maltophilia'),drug+suffix+'_imp'] = 'RESISTANT'

    # Actinomyces
    data.loc[(data[organism].str.startswith('Actinomyces')) & 
             (data[['PENICILLIN'+suffix,'AMPICILLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Actinomyces')) & 
             (data[['PENICILLIN'+suffix,'AMPICILLIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'

    # Final imputation: if the lab tests for the sample, this should take precedence
    try:
        data.loc[(data['AMOXICILLIN/CLAVULANIC ACID'+suffix] == 'SUSCEPTIBLE'),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
        data.loc[(data['AMOXICILLIN/CLAVULANIC ACID'+suffix] == 'INTERMEDIATE'),
                 drug+suffix+'_imp'] = 'INTERMEDIATE'
        data.loc[(data['AMOXICILLIN/CLAVULANIC ACID'+suffix] == 'RESISTANT'),
                 drug+suffix+'_imp'] = 'RESISTANT'
    except KeyError: pass

In [12]:
# Imputation rules for piperacillin-tazobactam

drug = 'Piptaz'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'
    
    
    # E. coli
    try:
        data.loc[(data[organism] == 'Escherichia coli') & 
                 (data[['MEROPENEM'+suffix,'ERTAPENEM'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT'
    except KeyError: pass
    try:
        data.loc[(data[organism] == 'Escherichia coli') & 
                 (data[['CEFAZOLIN'+suffix,'AMOXICILLIN/CLAVULANIC ACID'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    except KeyError: pass
    
    # K. pneumoniae
    try:
        data.loc[(data[organism] == 'Klebsiella pneumoniae') & 
                 (data[['MEROPENEM'+suffix,'ERTAPENEM'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT'    
    except KeyError: pass
    data.loc[(data[organism] == 'Klebsiella pneumoniae') & 
             (data[['CEFAZOLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'    
    data.loc[(data[organism] == 'Klebsiella oxytoca') & 
             (data[['CEFAZOLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
 
    # Proteus
    try:
        data.loc[(data[organism].str.startswith('Proteus')) & 
                 (data[['AMPICILLIN'+suffix,'AMOXICILLIN/CLAVULANIC ACID'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
        data.loc[(data[organism] == 'Proteus mirabilis') & 
                 (data[['CEFAZOLIN'+suffix,'AMOXICILLIN/CLAVULANIC ACID'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    except KeyError: pass

    # Enterococcus
    data.loc[(data[organism].str.startswith('Enterococcus')) & 
             (data['AMPICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'    
    data.loc[(data[organism].str.startswith('Enterococcus')) & 
             (data['AMPICILLIN'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'

    # Staph
    try:
        data.loc[(data[organism].str.startswith('Staphylococcus')) & 
                 (data[['CEFAZOLIN'+suffix,'CLOXACILLIN'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT'
        data.loc[(data[organism].str.startswith('Staphylococcus')) & 
                 (data[['CEFAZOLIN'+suffix,'CLOXACILLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    except KeyError: pass

    # Strep
    data.loc[data[organism].str.startswith('Streptococcus'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Streptococcus pneumoniae') & 
             (data[['PENICILLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.contains('viridans',case=False,regex=False)) & 
             (data['PENICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Streptococcus agalactiae'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Streptococcus pyogenes'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'

    # Candida
    data.loc[(data[organism].str.startswith('Candida')),drug+suffix+'_imp'] = 'RESISTANT'

    # Stenotrophomonas
    data.loc[(data[organism] == 'Stenotrophomonas maltophilia'),drug+suffix+'_imp'] = 'RESISTANT'

    # Actinomyces
    data.loc[(data[organism].str.startswith('Actinomyces')) & 
             (data[['PENICILLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Actinomyces')) & 
             (data[['PENICILLIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'

    # Final imputation: if the lab tests for the sample, this should take precedence
    data.loc[(data['PIPERACILLIN / TAZOBACTAM'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data['PIPERACILLIN / TAZOBACTAM'+suffix] == 'INTERMEDIATE'),
             drug+suffix+'_imp'] = 'INTERMEDIATE'
    data.loc[(data['PIPERACILLIN / TAZOBACTAM'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'

In [13]:
# Imputation rules for cefazolin

drug = 'Cefazolin'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'

    # Imputation of various organisms
    
    # E. coli
    try:
        data.loc[(data[organism] == 'Escherichia coli') & 
                 (data[['CEFTRIAXONE'+suffix,'CEFTAZIDIME'+suffix,
                        'MEROPENEM'+suffix,'ERTAPENEM'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT' 
    except KeyError: pass
        
    data.loc[(data[organism] == 'Escherichia coli') & 
             (data['AMPICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE' 

    # Klebsiella
    data.loc[(data[organism].str.startswith('Klebsiella')) & 
             (data[['CEFTRIAXONE'+suffix,'PIPERACILLIN / TAZOBACTAM'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'
    try:
        data.loc[(data[organism] == 'Klebsiella pneumoniae') & 
                 (data[['CEFTRIAXONE'+suffix,'PIPERACILLIN / TAZOBACTAM'+suffix,
                        'MEROPENEM'+suffix,'ERTAPENEM'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT'   
    except KeyError: pass
    
    data.loc[(data[organism] == 'Klebsiella oxytoca') & 
             (data[['CEFTRIAXONE'+suffix,'PIPERACILLIN / TAZOBACTAM'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Klebsiella oxytoca') & 
             (data['CEFTRIAXONE'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'  
        
    # Proteus
    data.loc[(data[organism].str.startswith('Proteus')) & 
             (data['AMPICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE' 
    data.loc[(data[organism].str.startswith('Proteus')) & 
             (data['CEFTRIAXONE'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT' 
    data.loc[(data[organism] == 'Proteus mirabilis') & 
             (data[['CEFAZOLIN'+suffix,'CEFTRIAXONE'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'  
    data.loc[(data[organism] == 'Proteus vulgaris'),drug+suffix+'_imp'] = 'RESISTANT'
    
    # Intrinsically resistant gram negatives
    # ampC organisms
    data.loc[data[organism].str.startswith('Citrobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Enterobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Morganella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Serratia'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Providencia'),drug+suffix+'_imp'] = 'RESISTANT'
    
    # others
    data.loc[(data[organism] == 'Pseudomonas aeruginosa'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Bacteroides fragilis'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Stenotrophomonas maltophilia'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Acinetobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    
    # Gram positives
    data.loc[(data[organism] == 'Listeria monocytogenes'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Enterococcus'),drug+suffix+'_imp'] = 'RESISTANT'
    
    # Staph
    try:
        data.loc[(data[organism].str.startswith('Staphylococcus')) & 
                 (data[['CEFAZOLIN'+suffix,'CLOXACILLIN'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT'
        data.loc[(data[organism].str.startswith('Staphylococcus')) & 
                 (data[['CEFAZOLIN'+suffix,'CLOXACILLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    except KeyError: pass
    
    # Strep
    data.loc[data[organism].str.startswith('Streptococcus'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Streptococcus pneumoniae') & 
             (data['PENICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'    
    data.loc[(data[organism] == 'Streptococcus pneumoniae') & 
             (data['PENICILLIN'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism].str.contains('viridans',case=False,regex=False)) & 
             (data['PENICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.contains('viridans',case=False,regex=False)) & 
             (data['CEFTRIAXONE'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Streptococcus agalactiae'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Streptococcus pyogenes'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    
    # Candida
    data.loc[(data[organism].str.startswith('Candida')),drug+suffix+'_imp'] = 'RESISTANT'
    
    # Final imputation: if the lab tests for the sample, this should take precedence    
    data.loc[(data['CEFAZOLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data['CEFAZOLIN'+suffix] == 'INTERMEDIATE'),
             drug+suffix+'_imp'] = 'INTERMEDIATE'
    data.loc[(data['CEFAZOLIN'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'    

In [14]:
# Imputation rules for ceftriaxone

drug = 'Ceftriaxone'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'

    # Imputation of various organisms
    
    # E. coli 
    try:
        data.loc[(data[organism] == 'Escherichia coli') & 
                 (data[['MEROPENEM'+suffix,'ERTAPENEM'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT' 
    except KeyError: pass
    
    data.loc[(data[organism] == 'Escherichia coli') & 
             (data[['CEFAZOLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE' 
    
    # Klebsiella
    data.loc[(data[organism].str.startswith('Klebsiella')) & 
             (data[['CEFAZOLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE' 
    try:
        data.loc[(data[organism] == 'Klebsiella pneumoniae') & 
                 (data[['MEROPENEM'+suffix,'ERTAPENEM'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT'
    except KeyError: pass
            
    # Other gram negatives
    data.loc[(data[organism].str.startswith('Proteus')) & 
             (data[['AMPICILLIN'+suffix,'CEFAZOLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE' 
        
    data.loc[(data[organism].str.startswith('Bacteroides')),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Pseudomonas aeruginosa'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Stenotrophomonas maltophilia'),drug+suffix+'_imp'] = 'RESISTANT'
    
    # Gram positives
    data.loc[(data[organism] == 'Listeria monocytogenes'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Enterococcus'),drug+suffix+'_imp'] = 'RESISTANT'
    
    # Staph
    try:
        data.loc[(data[organism].str.startswith('Staphylococcus')) & 
                 (data[['CEFAZOLIN'+suffix,'CLOXACILLIN'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT'
        data.loc[(data[organism].str.startswith('Staphylococcus')) & 
                 (data[['CEFAZOLIN'+suffix,'CLOXACILLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    except KeyError: pass
    
    # Strep
    data.loc[data[organism].str.startswith('Streptococcus'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Streptococcus')) & 
             (data['PENICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.contains('viridans',case=False,regex=False)) & 
             (data['PENICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Streptococcus agalactiae'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Streptococcus pyogenes'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    
    # Actinomyces
    data.loc[(data[organism].str.startswith('Actinomyces')) & 
             (data[['PENICILLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    
    # Candida
    data.loc[(data[organism].str.startswith('Candida')),drug+suffix+'_imp'] = 'RESISTANT'
    
    # Final imputation: if the lab tests for the sample, this should take precedence    
    data.loc[(data['CEFTRIAXONE'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data['CEFTRIAXONE'+suffix] == 'INTERMEDIATE'),
             drug+suffix+'_imp'] = 'INTERMEDIATE'
    data.loc[(data['CEFTRIAXONE'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'    

In [15]:
# Imputation rules for ceftazidime

drug = 'Ceftazidime'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'

    # Imputation of various organisms
    
    # E. coli
    try:
        data.loc[(data[organism] == 'Escherichia coli') & 
                 (data[['MEROPENEM'+suffix,'ERTAPENEM'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT' 
    except KeyError: pass
    
    data.loc[(data[organism] == 'Escherichia coli') & 
             (data[['CEFAZOLIN'+suffix,'CEFTRIAXONE'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE' 

    # Klebsiella
    data.loc[(data[organism].str.startswith('Klebsiella')) & 
             (data[['CEFAZOLIN'+suffix,'CEFTRIAXONE'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE' 
    try:
        data.loc[(data[organism] == 'Klebsiella pneumoniae') & 
                 (data[['MEROPENEM'+suffix,'ERTAPENEM'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT'
    except KeyError: pass
            
    # Other gram negatives
    data.loc[(data[organism] == 'Bacteroides fragilis'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Stenotrophomonas maltophilia'),drug+suffix+'_imp'] = 'RESISTANT'
    
    # Gram positives
    data.loc[(data[organism] == 'Listeria monocytogenes'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Enterococcus'),drug+suffix+'_imp'] = 'RESISTANT'
    
    # Staph
    try:
        data.loc[(data[organism].str.startswith('Staphylococcus')) & 
                 (data[['CEFAZOLIN'+suffix,'CLOXACILLIN'+suffix,'CEFTRIAXONE'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT'
    except KeyError: pass
    
    # Candida
    data.loc[(data[organism].str.startswith('Candida')),drug+suffix+'_imp'] = 'RESISTANT'
    
    # Final imputation: if the lab tests for the sample, this should take precedence    
    data.loc[(data['CEFTAZIDIME'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data['CEFTAZIDIME'+suffix] == 'INTERMEDIATE'),
             drug+suffix+'_imp'] = 'INTERMEDIATE'
    data.loc[(data['CEFTAZIDIME'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'    

In [16]:
# Imputation rules for ertapenem

drug = 'Ertapenem'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'

    # Imputation of various organisms

    # Klebsiella
    data.loc[(data[organism].str.startswith('Klebsiella')) & 
             (data[['CEFAZOLIN'+suffix,'CEFTRIAXONE'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE' 
    data.loc[(data[organism] == 'Klebsiella pneumoniae') & 
             (data[['CEFAZOLIN'+suffix,'CEFTRIAXONE'+suffix,'MEROPENEM'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'
            
    # Proteus
    data.loc[(data[organism].str.startswith('Proteus')) & 
             (data[['CEFTRIAXONE'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE' 
    data.loc[(data[organism] == 'Proteus mirabilis') & 
             (data[['CEFAZOLIN'+suffix,'CEFTRIAXONE'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE' 
    data.loc[(data[organism] == 'Proteus mirabilis') & 
             (data[['MEROPENEM'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'     
    data.loc[(data[organism] == 'Proteus vulgaris') & 
             (data[['CEFTRIAXONE'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE' 
    data.loc[(data[organism] == 'Proteus vulgaris') & 
             (data[['MEROPENEM'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'
    
    # Other gram negatives
    data.loc[(data[organism].str.startswith('Citrobacter')) &
             (data['MEROPENEM'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism].str.startswith('Enterobacter')) &
             (data['MEROPENEM'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Enterobacter cloacae') & 
             (data[['CEFAZOLIN'+suffix,'CEFTRIAXONE'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'     
    data.loc[(data[organism].str.startswith('Morganella')) &
             (data['MEROPENEM'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism].str.startswith('Serratia')) &
             (data['MEROPENEM'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT' 
    data.loc[data[organism].str.startswith('Pseudomonas'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Acinetobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism] == 'Stenotrophomonas maltophilia',drug+suffix+'_imp'] = 'RESISTANT'
    
    # Staph
    try:
        data.loc[(data[organism].str.startswith('Staphylococcus')) & 
                 (data[['CEFAZOLIN'+suffix,'CLOXACILLIN'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT'
        data.loc[(data[organism].str.startswith('Staphylococcus')) & 
                 (data[['CEFAZOLIN'+suffix,'CLOXACILLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    except KeyError: pass    
    
    # Strep
    data.loc[(data[organism].str.startswith('Streptococcus')) & 
             (data['PENICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.contains('viridans',case=False,regex=False)) & 
             (data['PENICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Streptococcus agalactiae'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Streptococcus pyogenes'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    
    # Enterococcus
    data.loc[data[organism].str.startswith('Enterococcus'),drug+suffix+'_imp'] = 'RESISTANT'
    
    # Actinomyces
    data.loc[(data[organism].str.startswith('Actinomyces')) & 
             (data[['MEROPENEM'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    
    # Candida
    data.loc[(data[organism].str.startswith('Candida')),drug+suffix+'_imp'] = 'RESISTANT'
    
    
    # Final imputation: if the lab tests for the sample, this should take precedence    
    try:
        data.loc[(data['ERTAPENEM'+suffix] == 'SUSCEPTIBLE'),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
        data.loc[(data['ERTAPENEM'+suffix] == 'INTERMEDIATE'),
                 drug+suffix+'_imp'] = 'INTERMEDIATE'
        data.loc[(data['ERTAPENEM'+suffix] == 'RESISTANT'),
                 drug+suffix+'_imp'] = 'RESISTANT'   
    except KeyError: pass

In [17]:
# Imputation rules for meropenem

drug = 'Meropenem'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'

    # Imputation of various organisms

    # E. coli
    try:
        data.loc[(data[organism] == 'Escherichia coli') & 
                 (data[['CEFAZOLIN'+suffix,'CEFTRIAXONE'+suffix,'ERTAPENEM'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'     
    except KeyError: pass        
    
    # Klebsiella
    try:
        data.loc[(data[organism].str.startswith('Klebsiella')) & 
                 (data[['CEFAZOLIN'+suffix,'CEFTRIAXONE'+suffix,'ERTAPENEM'+suffix,]] == 'SUSCEPTIBLE').any(axis=1),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE' 
    except KeyError: pass
            
    # Proteus
    data.loc[(data[organism].str.startswith('Proteus')) & 
             (data[['CEFTRIAXONE'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE' 
    try:
        data.loc[(data[organism] == 'Proteus mirabilis') & 
                 (data[['ERTAPENEM'+suffix,'CEFTRIAXONE'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE' 
        data.loc[(data[organism] == 'Proteus vulgaris') & 
                 (data[['CEFTRIAXONE'+suffix,'ERTAPENEM'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    except KeyError: pass
    
    # Other gram negatives
    try:
        data.loc[(data[organism].str.startswith('Citrobacter')) &
                 (data['ERTAPENEM'+suffix] == 'SUSCEPTIBLE'),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
        data.loc[(data[organism].str.startswith('Enterobacter')) &
                 (data['ERTAPENEM'+suffix] == 'SUSCEPTIBLE'),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
        data.loc[(data[organism] == 'Enterobacter cloacae') & 
                 (data[['CEFAZOLIN'+suffix,'CEFTRIAXONE'+suffix,'ERTAPENEM'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'     
        data.loc[(data[organism].str.startswith('Morganella')) &
                 (data['ERTAPENEM'+suffix] == 'SUSCEPTIBLE'),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
        data.loc[(data[organism].str.startswith('Serratia')) &
                 (data['ERTAPENEM'+suffix] == 'SUSCEPTIBLE'),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE' 
    except KeyError: pass
        
    data.loc[data[organism] == 'Stenotrophomonas maltophilia',drug+suffix+'_imp'] = 'RESISTANT'

    # Enterococcus
    data.loc[(data[organism].str.startswith('Enterococcus')) &
             (data[['AMPICILLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Enterococcus')) &
             (data[['AMPICILLIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'    
    
    # Staph
    try:
        data.loc[(data[organism].str.startswith('Staphylococcus')) & 
                 (data[['CEFAZOLIN'+suffix,'CLOXACILLIN'+suffix]] == 'RESISTANT').any(axis=1),
                 drug+suffix+'_imp'] = 'RESISTANT'
        data.loc[(data[organism].str.startswith('Staphylococcus')) & 
                 (data[['CEFAZOLIN'+suffix,'CLOXACILLIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
        data.loc[(data[organism] == 'Staphylococcus epidermidis') & 
                 (data[['CEFAZOLIN'+suffix,'CLOXACILLIN'+suffix,
                        'PIPERACILLIN / TAZOBACTAM'+suffix,'AMOXICILLIN/CLAVULANIC ACID'+suffix,
                        'ERTAPENEM'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'    
        data.loc[(data[organism] == 'Staphylococcus epidermidis') & 
                 (data[['CEFAZOLIN'+suffix,'CLOXACILLIN'+suffix,
                        'PIPERACILLIN / TAZOBACTAM'+suffix,'AMOXICILLIN/CLAVULANIC ACID'+suffix,
                        'ERTAPENEM'+suffix]] == 'RESISTANT').any(axis=1),
                         drug+suffix+'_imp'] = 'RESISTANT'    
    except KeyError: pass        
    
    # Strep
    data.loc[(data[organism].str.startswith('Streptococcus')) & 
             (data['PENICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.contains('viridans',case=False,regex=False)) & 
             (data['PENICILLIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Streptococcus agalactiae'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Streptococcus pyogenes'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    
    # Actinomyces
    data.loc[(data[organism].str.startswith('Actinomyces')) & 
             (data[['MEROPENEM'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    
    # Candida
    data.loc[(data[organism].str.startswith('Candida')),drug+suffix+'_imp'] = 'RESISTANT'
    
    
    # Final imputation: if the lab tests for the sample, this should take precedence    
    data.loc[(data['MEROPENEM'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data['MEROPENEM'+suffix] == 'INTERMEDIATE'),
             drug+suffix+'_imp'] = 'INTERMEDIATE'
    data.loc[(data['MEROPENEM'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'    

In [18]:
# Imputation rules for ciprofloxacin

drug = 'Ciprofloxacin'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'

    # Imputation of various organisms
    
    # Gram negatives
    data.loc[(data[organism].str.startswith('Klebsiella')) & 
             (data[['LEVOFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Klebsiella')) & 
             (data[['LEVOFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'    

    data.loc[(data[organism] == 'Proteus mirabilis') & 
             (data[['LEVOFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Proteus mirabilis') & 
             (data[['LEVOFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'   
    
    data.loc[(data[organism].str.startswith('Citrobacter')) & 
             (data[['LEVOFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Citrobacter')) & 
             (data[['LEVOFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'  
    
    data.loc[(data[organism].str.startswith('Morganella')) & 
             (data[['LEVOFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Morganella')) & 
             (data[['LEVOFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'     
    
    data.loc[(data[organism].str.startswith('Enterobacter')) & 
             (data[['LEVOFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Enterobacter')) & 
             (data[['LEVOFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'     
    
    data.loc[data[organism].str.startswith('Bacteroides'),drug+suffix+'_imp'] = 'RESISTANT'

    # Gram positives
    data.loc[(data[organism].str.startswith('Enterococcus')) & 
             (data[['LEVOFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Enterococcus')) & 
             (data[['LEVOFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT' 

    data.loc[(data[organism] == 'Listeria monocytogenes'),drug+suffix+'_imp'] = 'RESISTANT'
    
    data.loc[(data[organism].str.startswith('Staphylococcus')) & 
             (data[['LEVOFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Staphylococcus')) & 
             (data[['LEVOFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'     

    data.loc[(data[organism].str.startswith('Streptococcus')) & 
             (data[['LEVOFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Streptococcus')) & 
             (data[['LEVOFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT' 
    data.loc[(data[organism].str.contains('viridans',case=False,regex=False)) & 
             (data[['LEVOFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.contains('viridans',case=False,regex=False)) & 
             (data[['LEVOFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'
    
    # Candida
    data.loc[(data[organism].str.startswith('Candida')),drug+suffix+'_imp'] = 'RESISTANT'
    

    # Final imputation: if the lab tests for the sample, this should take precedence
    data.loc[(data['CIPROFLOXACIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data['CIPROFLOXACIN'+suffix] == 'INTERMEDIATE'),
             drug+suffix+'_imp'] = 'INTERMEDIATE'
    data.loc[(data['CIPROFLOXACIN'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'

In [19]:
# Imputation rules for levofloxacin

drug = 'Levofloxacin'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'

    # Imputation of various organisms
    
    # Gram negatives
    data.loc[(data[organism] == 'Escherichia coli') & 
             (data[['CIPROFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Escherichia coli') & 
             (data[['CIPROFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'  
    
    data.loc[(data[organism].str.startswith('Klebsiella')) & 
             (data[['CIPROFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Klebsiella')) & 
             (data[['CIPROFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'    

    data.loc[(data[organism] == 'Proteus mirabilis') & 
             (data[['CIPROFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Proteus mirabilis') & 
             (data[['CIPROFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'       
    data.loc[(data[organism] == 'Proteus vulgaris') & 
             (data[['CIPROFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'
    
    data.loc[(data[organism].str.startswith('Citrobacter')) & 
             (data[['CIPROFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Citrobacter')) & 
             (data[['CIPROFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'  
        
    data.loc[(data[organism].str.startswith('Morganella')) & 
             (data[['CIPROFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Morganella')) & 
             (data[['CIPROFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'     
    
    data.loc[(data[organism].str.startswith('Enterobacter')) & 
             (data[['CIPROFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Enterobacter')) & 
             (data[['CIPROFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Enterobacter aerogenes') & 
             (data[['CIPROFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Enterobacter aerogenes') & 
             (data[['CIPROFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'     

    data.loc[(data[organism].str.startswith('Serratia')) & 
             (data[['CIPROFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Serratia')) & 
             (data[['CIPROFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'   
    
    data.loc[(data[organism] == 'Pseudomonas aeruginosa') & 
             (data[['CIPROFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'  
    
    # Gram positives
    data.loc[(data[organism].str.startswith('Enterococcus')) & 
             (data[['CIPROFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Enterococcus')) & 
             (data[['CIPROFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT' 
    
    data.loc[(data[organism].str.startswith('Staphylococcus')) & 
             (data[['CIPROFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'    
    data.loc[(data[organism].str.startswith('Staphylococcus')) & 
             (data[['CIPROFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'     
    data.loc[(data[organism] == 'Staphylococcus aureus') & 
             (data[['CIPROFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Staphylococcus aureus') & 
             (data[['MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT' 
    data.loc[(data[organism] == 'Staphylococcus lugdunensis') & 
             (data[['MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT' 
    data.loc[(data[organism] == 'Staphylococcus epidermidis') & 
             (data[['MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'     
    
    data.loc[(data[organism].str.startswith('Streptococcus')) & 
             (data[['MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT' 
    data.loc[(data[organism].str.contains('viridans',case=False,regex=False)) & 
             (data[['MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'
    
    data.loc[data[organism].str.startswith('Actinomyces'), drug+suffix+'_imp'] = 'RESISTANT' 
    
    # Candida
    data.loc[(data[organism].str.startswith('Candida')),drug+suffix+'_imp'] = 'RESISTANT'
    

    # Final imputation: if the lab tests for the sample, this should take precedence
    data.loc[(data['LEVOFLOXACIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data['LEVOFLOXACIN'+suffix] == 'INTERMEDIATE'),
             drug+suffix+'_imp'] = 'INTERMEDIATE'
    data.loc[(data['LEVOFLOXACIN'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'

In [20]:
# Imputation rules for moxifloxacin

drug = 'Moxifloxacin'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'

    # Imputation of various organisms
    
    # Gram negatives
    data.loc[(data[organism] == 'Escherichia coli') & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Escherichia coli') & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'  
    
    data.loc[(data[organism].str.startswith('Klebsiella')) & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Klebsiella')) & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'    

    data.loc[(data[organism] == 'Proteus mirabilis') & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Proteus mirabilis') & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'       
    data.loc[(data[organism] == 'Proteus vulgaris') & 
             (data[['CIPROFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'
    
    data.loc[(data[organism].str.startswith('Citrobacter')) & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Citrobacter')) & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'  
        
    data.loc[(data[organism].str.startswith('Morganella')) & 
             (data[['CIPROFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Morganella')) & 
             (data[['CIPROFLOXACIN'+suffix,'MOXIFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'     
    
    data.loc[(data[organism].str.startswith('Enterobacter')) & 
             (data[['CIPROFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Enterobacter')) & 
             (data[['CIPROFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Enterobacter aerogenes') & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Enterobacter aerogenes') & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'     
    
    data.loc[(data[organism].str.startswith('Morganella')) & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Morganella')) & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'  
    
    data.loc[(data[organism].str.startswith('Serratia')) & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Serratia')) & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'  
    
    data.loc[(data[organism].str.startswith('Yersinia')) & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Yersinia')) & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'      
    
    data.loc[data[organism] == 'Pseudomonas aeruginosa',drug+suffix+'_imp'] = 'RESISTANT'  
    
    data.loc[(data[organism] == 'Haemophilus influenzae') & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    
    # Gram positives
    data.loc[(data[organism].str.startswith('Enterococcus')) & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.startswith('Enterococcus')) & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT' 
    
    data.loc[(data[organism].str.startswith('Staphylococcus')) & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'    
    data.loc[(data[organism].str.startswith('Staphylococcus')) & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'     
    data.loc[(data[organism] == 'Staphylococcus aureus') & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism] == 'Staphylococcus aureus') & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT' 
    data.loc[(data[organism] == 'Staphylococcus lugdunensis') & 
             (data[['LEVOFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'    
    
    data.loc[(data[organism].str.startswith('Streptococcus')) & 
             (data[['LEVOFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT' 
    data.loc[(data[organism].str.startswith('Streptococcus')) & 
             (data[['LEVOFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'     
    data.loc[(data[organism].str.contains('viridans',case=False,regex=False)) & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'SUSCEPTIBLE').any(axis=1),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data[organism].str.contains('viridans',case=False,regex=False)) & 
             (data[['CIPROFLOXACIN'+suffix,'LEVOFLOXACIN'+suffix]] == 'RESISTANT').any(axis=1),
             drug+suffix+'_imp'] = 'RESISTANT'
    
    data.loc[data[organism].str.startswith('Actinomyces'), drug+suffix+'_imp'] = 'RESISTANT' 
    
    # Candida
    data.loc[(data[organism].str.startswith('Candida')),drug+suffix+'_imp'] = 'RESISTANT'
    

    # Final imputation: if the lab tests for the sample, this should take precedence
    data.loc[(data['MOXIFLOXACIN'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data['MOXIFLOXACIN'+suffix] == 'INTERMEDIATE'),
             drug+suffix+'_imp'] = 'INTERMEDIATE'
    data.loc[(data['MOXIFLOXACIN'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT'

In [21]:
# Imputation rules for clindamycin

drug = 'Clindamycin'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'

    #Imputation of various organisms
    data.loc[(data[organism] == 'Escherichia coli'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Citrobacter freundii'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Listeria monocytogenes'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Stenotrophomonas maltophilia'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Serratia marcescens'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Klebsiella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Pseudomonas'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Proteus'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Enterobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Morganella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Acinetobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Enterococcus'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Candida'),drug+suffix+'_imp'] = 'RESISTANT'

    # Final imputation: if the lab tests for the sample, this should take precedence
    try:
        data.loc[(data['CLINDAMYCIN'+suffix] == 'SUSCEPTIBLE'),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
        data.loc[(data['CLINDAMYCIN'+suffix] == 'INTERMEDIATE'),
                 drug+suffix+'_imp'] = 'INTERMEDIATE'
        data.loc[(data['CLINDAMYCIN'+suffix] == 'RESISTANT'),
                 drug+suffix+'_imp'] = 'RESISTANT'
    except KeyError: pass

In [22]:
# Imputation rules for doxycycline

drug = 'Doxycycline'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'

    # Imputation of various organisms
    data.loc[data[organism].str.startswith('Proteus'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Pseudomonas'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Candida'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism] == 'Bacteroides fragilis',drug+suffix+'_imp'] = 'RESISTANT'

    # Final imputation: if the lab tests for the sample, this should take precedence
    try:
        data.loc[(data['DOXYCYCLINE'+suffix] == 'SUSCEPTIBLE'),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
        data.loc[(data['DOXYCYCLINE'+suffix] == 'INTERMEDIATE'),
                 drug+suffix+'_imp'] = 'INTERMEDIATE'
        data.loc[(data['DOXYCYCLINE'+suffix] == 'RESISTANT'),
                 drug+suffix+'_imp'] = 'RESISTANT'
    except KeyError: pass        

In [23]:
# Imputation rules for erythromycin
drug = 'Erythromycin'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'

    # Imputation of various organisms
    data.loc[data[organism].str.startswith('Klebsiella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Pseudomonas'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Enterobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Citrobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Acinetobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Candida'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism] == 'Bacteroides fragilis',drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism] == 'Eschericia coli',drug+suffix+'_imp'] = 'RESISTANT'    
    data.loc[data[organism] == 'Proteus vulgaris',drug+suffix+'_imp'] = 'RESISTANT'    
    data.loc[data[organism] == 'Serratia marcescens',drug+suffix+'_imp'] = 'RESISTANT'    
    data.loc[data[organism] == 'Stenotrophomonas maltophilia',drug+suffix+'_imp'] = 'RESISTANT'    

    # Final imputation: if the lab tests for the sample, this should take precedence
    try:
        data.loc[(data['ERYTHROMYCIN'+suffix] == 'SUSCEPTIBLE'),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
        data.loc[(data['ERYTHROMYCIN'+suffix] == 'INTERMEDIATE'),
                 drug+suffix+'_imp'] = 'INTERMEDIATE'
        data.loc[(data['ERYTHROMYCIN'+suffix] == 'RESISTANT'),
                 drug+suffix+'_imp'] = 'RESISTANT'
    except KeyError: pass        

In [24]:
# Imputation rules for amikacin

drug = 'Amikacin'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'

    #Imputation of various organisms
    data.loc[data[organism].str.startswith('Salmonella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Bacteroides'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Enterococcus'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Staphylococcus'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Clostridium'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Actinomyces'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Candida'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.contains('Streptococcus',case=False,regex=False),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Listeria monocytogenes'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Stenotrophomonas maltophilia'),drug+suffix+'_imp'] = 'RESISTANT'

    # Final imputation: if the lab tests for the sample, this should take precedence
    try:
        data.loc[(data['AMIKACIN'+suffix] == 'SUSCEPTIBLE'),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
        data.loc[(data['AMIKACIN'+suffix] == 'INTERMEDIATE'),
                 drug+suffix+'_imp'] = 'INTERMEDIATE'
        data.loc[(data['AMIKACIN'+suffix] == 'RESISTANT'),
                 drug+suffix+'_imp'] = 'RESISTANT'
    except KeyError: pass

In [25]:
# Imputation rules for gentamicin

drug = 'Gentamicin'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'

    # Imputation of various organisms
    data.loc[data[organism].str.startswith('Salmonella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Bacteroides'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Enterococcus'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Streptococcus'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Staphylococcus'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Clostridium'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Actinomyces'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Candida'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism] == 'Listeria monocytogenes',drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism] == 'Stenotrophomonas maltophilia',drug+suffix+'_imp'] = 'RESISTANT'    

    # Final imputation: if the lab tests for the sample, this should take precedence
    try:
        data.loc[(data['GENTAMICIN'+suffix] == 'SUSCEPTIBLE'),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
        data.loc[(data['GENTAMICIN'+suffix] == 'INTERMEDIATE'),
                 drug+suffix+'_imp'] = 'INTERMEDIATE'
        data.loc[(data['GENTAMICIN'+suffix] == 'RESISTANT'),
                 drug+suffix+'_imp'] = 'RESISTANT'
    except KeyError: pass        

In [26]:
# Imputation rules for tobramycin

drug = 'Tobramycin'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'

    # Imputation of various organisms
    data.loc[data[organism].str.startswith('Salmonella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Bacteroides'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Enterococcus'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Streptococcus'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Staphylococcus'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Clostridium'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Actinomyces'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Candida'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism] == 'Listeria monocytogenes',drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism] == 'Stenotrophomonas maltophilia',drug+suffix+'_imp'] = 'RESISTANT'    

    # Final imputation: if the lab tests for the sample, this should take precedence
    try:
        data.loc[(data['TOBRAMYCIN'+suffix] == 'SUSCEPTIBLE'),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
        data.loc[(data['TOBRAMYCIN'+suffix] == 'INTERMEDIATE'),
                 drug+suffix+'_imp'] = 'INTERMEDIATE'
        data.loc[(data['TOBRAMYCIN'+suffix] == 'RESISTANT'),
                 drug+suffix+'_imp'] = 'RESISTANT'
    except KeyError: pass        

In [27]:
# Imputation rules for daptomycin

drug = 'Daptomycin'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'

    # Imputation of various organisms
    data.loc[(data[organism] == 'Escherichia coli'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Listeria monocytogenes'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Stenotrophomonas maltophilia'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Klebsiella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Pseudomonas'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Proteus'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Enterobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Morganella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Citrobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Serratia'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Moraxella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Providencia'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Acinetobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Bacteroides'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Enterococcus'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Campylobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Shigella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Candida'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.contains('Salmonella',case=False,regex=False),drug+suffix+'_imp'] = 'RESISTANT'

    data.loc[data[organism].str.startswith('Enterococcus'),drug+suffix+'_imp'] = 'SENSITIVE'
    data.loc[data[organism].str.startswith('Staphylococcus'),drug+suffix+'_imp'] = 'SENSITIVE'
    data.loc[data[organism].str.startswith('Streptococcus'),drug+suffix+'_imp'] = 'SENSITIVE'
    data.loc[data[organism].str.startswith('Corynebacterium'),drug+suffix+'_imp'] = 'SENSITIVE'

    # Final imputation: if the lab tests for the sample, this should take precedence
    try:
        data.loc[(data['DAPTOMYCIN'+suffix] == 'SUSCEPTIBLE'),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
        data.loc[(data['DAPTOMYCIN'+suffix] == 'INTERMEDIATE'),
                 drug+suffix+'_imp'] = 'INTERMEDIATE'
        data.loc[(data['DAPTOMYCIN'+suffix] == 'RESISTANT'),
                 drug+suffix+'_imp'] = 'RESISTANT'
    except KeyError: pass    

In [28]:
# Imputation rules for linezolid

drug = 'Linezolid'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'

    #Imputation of various organisms
    data.loc[(data[organism] == 'Escherichia coli'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Stenotrophomonas maltophilia'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Hemophilus influenzae'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Klebsiella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Pseudomonas'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Proteus'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Enterobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Morganella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Moraxella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Citrobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Serratia'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Acinetobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Providencia'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Pseudomonas'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Salmonella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Campylobacter'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Shigella'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Bacteroides'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Candida'),drug+suffix+'_imp'] = 'RESISTANT'
    
    data.loc[data[organism].str.startswith('Enterococcus'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[data[organism].str.startswith('Streptococcus'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[data[organism].str.startswith('Staphylococcus'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[data[organism].str.startswith('Corynebacterium'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    

    # Final imputation: if the lab tests for the sample, this should take precedence
    try:
        data.loc[(data['LINEZOLID'+suffix] == 'SUSCEPTIBLE'),
                 drug+suffix+'_imp'] = 'SUSCEPTIBLE'
        data.loc[(data['LINEZOLID'+suffix] == 'INTERMEDIATE'),
                 drug+suffix+'_imp'] = 'INTERMEDIATE'
        data.loc[(data['LINEZOLID'+suffix] == 'RESISTANT'),
                 drug+suffix+'_imp'] = 'RESISTANT'
    except KeyError: pass        

In [29]:
# Imputation rules for vancomycin

# drug = 'Vancomycin'
# for suffix in ['_PGPO', '_PGNO', '_FINAL']:

#     if   suffix == '_PGNO' : continue # vancomycin does not target gram negatives
#     elif suffix == '_PGPO' : organism = 'PriorGPOrg'
#     elif suffix == '_FINAL': organism = 'FINAL'

#     #Imputation of various organisms
#     data.loc[(data[organism] == 'Escherichia coli'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[(data[organism] == 'Stenotrophomonas maltophilia'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[(data[organism] == 'Hemophilus influenzae'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[data[organism].str.startswith('Klebsiella'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[data[organism].str.startswith('Pseudomonas'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[data[organism].str.startswith('Proteus'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[data[organism].str.startswith('Enterobacter'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[data[organism].str.startswith('Neisseria'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[data[organism].str.startswith('Morganella'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[data[organism].str.startswith('Moraxella'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[data[organism].str.startswith('Citrobacter'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[data[organism].str.startswith('Serratia'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[data[organism].str.startswith('Acinetobacter'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[data[organism].str.startswith('Providencia'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[data[organism].str.startswith('Pseudomonas'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[data[organism].str.startswith('Salmonella'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[data[organism].str.startswith('Campylobacter'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[data[organism].str.startswith('Clostridium'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[data[organism].str.startswith('Shigella'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[data[organism].str.startswith('Bacteroides'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[data[organism].str.startswith('Lactobacillus'),drug+suffix+'_imp'] = 'RESISTANT'
#     data.loc[data[organism].str.startswith('Candida'),drug+suffix+'_imp'] = 'RESISTANT'
    
#     data.loc[data[organism].str.startswith('Enterococcus'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
#     data.loc[data[organism].str.startswith('Streptococcus'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
#     data.loc[data[organism].str.startswith('Staphylococcus'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
#     data.loc[data[organism].str.startswith('Corynebacterium'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
#     data.loc[data[organism].str.startswith('Granulicatella'),drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    

#     # Final imputation: if the lab tests for the sample, this should take precedence
#     data.loc[(data['VANCOMYCIN'+suffix] == 'SUSCEPTIBLE'),
#              drug+suffix+'_imp'] = 'SUSCEPTIBLE'
#     data.loc[(data['VANCOMYCIN'+suffix] == 'INTERMEDIATE'),
#              drug+suffix+'_imp'] = 'INTERMEDIATE'
#     data.loc[(data['VANCOMYCIN'+suffix] == 'RESISTANT'),
#              drug+suffix+'_imp'] = 'RESISTANT'

In [30]:
# Imputation rules for trimethoprim-sulfamethoxazole

drug = 'TMPSMX'
for suffix in ['_PGNO', '_FINAL']:

    if   suffix == '_PGNO' : continue
    elif suffix == '_FINAL': organism = 'FINAL'

    #Imputation of various organisms
    data.loc[(data[organism] == 'Pseudomonas aeruginosa'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[(data[organism] == 'Bacteroides fragilis'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Enterococcus'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Actinomyces'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Clostridium'),drug+suffix+'_imp'] = 'RESISTANT'
    data.loc[data[organism].str.startswith('Candida'),drug+suffix+'_imp'] = 'RESISTANT'

    # Final imputation: if the lab tests for the sample, this should take precedence
    data.loc[(data['TRIMETHOPRIM/SULFA'+suffix] == 'SUSCEPTIBLE'),
             drug+suffix+'_imp'] = 'SUSCEPTIBLE'
    data.loc[(data['TRIMETHOPRIM/SULFA'+suffix] == 'INTERMEDIATE'),
             drug+suffix+'_imp'] = 'INTERMEDIATE'
    data.loc[(data['TRIMETHOPRIM/SULFA'+suffix] == 'RESISTANT'),
             drug+suffix+'_imp'] = 'RESISTANT' 

In [31]:
# If no prior gram positive organism, then prior gram positive imputations are not applicable
data['PriorGNOrg'] = data['PriorGNOrg'].astype('string')
data.loc[data['PriorGNOrg'] == '0.0',
             ['PriorGNOrg','Penicillin_PGNO_imp','Ampicillin_PGNO_imp','Cloxacillin_PGNO_imp',
             'Piptaz_PGNO_imp','Amoxiclav_PGNO_imp',
             'Cefazolin_PGNO_imp','Ceftriaxone_PGNO_imp','Ceftazidime_PGNO_imp',
             'Ertapenem_PGNO_imp','Meropenem_PGNO_imp',
             'Ciprofloxacin_PGNO_imp','Levofloxacin_PGNO_imp','Moxifloxacin_PGNO_imp',
             'Daptomycin_PGNO_imp','Vancomycin_PGNO_imp','Linezolid_PGNO_imp',
             'Amikacin_PGNO_imp', 'Tobramycin_PGNO_imp','Gentamicin_PGNO_imp',
             'Clindamycin_PGNO_imp','Doxycycline_PGNO_imp',
             'Erythromycin_PGNO_imp', 'TMPSMX_PGNO_imp']] = 'not_applicable'

# Processing and output

In [32]:
data.to_excel('2024-05-20 AMR_imputation_TOH.xlsx',freeze_panes=(1,0),index=False)
