In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from datetime import datetime, date
from matplotlib.lines import Line2D

In [None]:
def load(file_name):
    df = pd.read_csv(file_name)
    return df

In [None]:
def remove(df):
    columns = tuple(df.columns[16:])
    columns = list(('sex','dob','smptype')+columns)
    df = df[columns]
    mask = np.column_stack([~df['organism'].str.contains(r"Candida|Aspergillus terreus", na=False)])
    df = df.loc[mask.any(axis=1)]
    df = df[~df['organism'].isin(['No Organism','No Pathogen','-'])]
    df = df[df.columns.drop(list(df.filter(regex='Unnamed')))]
    return df

In [None]:
def clean(df):
    def age(dob,admdat):
        dob = datetime.strptime(dob, "%d-%m-%Y").date()
        adm = datetime.strptime(admdat, "%d-%m-%Y").date()
        return adm.year - dob.year - ((adm.month, 
                                          adm.day) < (dob.month, 
                                                        dob.day))
    def get_month(df):
        months = []
        for i in df['smpcoldat']:
            try:
                months.append(i.strftime("%B-%Y"))
            except:
                try:
                    months.append(datetime.strptime(i, '%d-%m-%Y').strftime("%B-%Y"))
                except:
                    months.append(months[-1])
        return months
    df['Age'] = df.apply(lambda row: age(row['dob'], row['admdat']), axis=1)
    df['Month'] = get_month(df)
    en = LabelEncoder()
    df['smpno'] = en.fit_transform(df['smptype'])
    df = df.fillna('-')
    return df

In [None]:
def aggregate(df,month,monthwise = False):
    #d = df[df['organism']==org]
    if monthwise==True:
        d = df[df['Month']==month]
    d = df.replace('-',np.nan)
    d = d.dropna(axis=1, how='all')
    new_df = pd.DataFrame()
    new_df['organism'] = [d['organism'].to_list()[0]]*3
    new_df['Sensitivity'] = ['R','I','S']
    for i in d.columns[4:-3]:
            sum = 0
            try:
                sum = sum + d[i].value_counts()['R']
            except:
                pass
            try:
                sum = sum + d[i].value_counts()['S']
            except:
                pass
            try:
                sum = sum + d[i].value_counts()['I']
            except:
                pass
            try:
                R = (d[i].value_counts()['R']/sum)*100
            except:
                R = 0
            try:
                S = (d[i].value_counts()['S']/sum)*100
            except:
                S = 0
            try:
                I = (d[i].value_counts()['I']/sum)*100
            except:
                I = 0

            new_df[i] = [R,I,S]
            
    return new_df

In [None]:
def info(df):
    sns.histplot(df['Age'],kde=True,bins=df['Age'].nunique())
    plt.xlim(left=0,right=df['Age'].max())
    plt.xticks(np.arange(0,df['Age'].max()+10,10))
    #plt.title('Age Distribution')
    plt.tight_layout()
    plt.savefig('Age Distribution.jpg', format = 'jpg', dpi=1200, bbox_inches='tight')
    plt.show()
    
    _, text, autotext = plt.pie(df['sex'].value_counts(), labels=df['sex'].value_counts().rename(index={'M':"Male", 'F':"Female"}).index,
            autopct="%0.1f%%", labeldistance=0.7, textprops={'fontsize': 12})
    text[0].set_fontsize(12)
    text[1].set_fontsize(12)
    #text[0].set_weight('bold')
    #text[1].set_weight('bold')
    #plt.title('Gender Distribution')
    plt.tight_layout()
    plt.savefig('Gender Distribution.jpg', format = 'jpg', dpi=1200, bbox_inches='tight')
    plt.show()
    
    #bar plot vertical
    #f, (ax, ax2) = plt.subplots(2, 1, sharex=True, figsize=(15,8))

    #sns.barplot(df['organism'].value_counts().index,df['organism'].value_counts(), ax=ax).set(ylabel=None)
    #sns.barplot(df['organism'].value_counts().index,df['organism'].value_counts(), ax = ax2)
    #ax.set_ylim(160, 350)  
    #ax2.set_ylim(0, 50)  

    #ax.spines['bottom'].set_visible(False)
    #ax2.spines['top'].set_visible(False)
    #ax.xaxis.tick_top()
    #ax.tick_params(labeltop=False)  
    #ax2.xaxis.tick_bottom()
    #ax2.set_xticklabels(ax2.get_xticklabels(), rotation=40, ha="right", fontsize = 6)
    #ax2.set_ylabel('Organisms',position=(1,1), size=12)

    #axis_break1 = 100
    #axis_break2 = 160
    #x_min = -0.75
    #x_max = len(df['organism'].value_counts().index)
    #l = 0.2  # "break" line length
    #kwargs = dict(color="k", clip_on=False, linewidth=1)
    #ax.plot((x_min - l, x_min + l), (axis_break2, axis_break2), **kwargs)# top-left
    #ax.plot((x_max - l, x_max + l), (axis_break2, axis_break2), **kwargs)# top-right
    #ax2.plot((x_min - l, x_min + l), (axis_break1, axis_break1), **kwargs)# bottom-left
    #ax2.plot((x_max - l, x_max + l), (axis_break1, axis_break1), **kwargs)# bottom-right

    #plt.xlim(left=-1, right=85)
    
    #Horizontal Bar
    #f, (ax2, ax) = plt.subplots(1, 2, sharey=True, figsize=(10,20))

    #sns.barplot(df['organism'].value_counts(),df['organism'].value_counts().index, ax=ax, orient='h').set(xlabel=None)
    #sns.barplot(df['organism'].value_counts(),df['organism'].value_counts().index, ax = ax2, orient='h').set(ylabel=None, xlabel=None)
    #ax.set_xlim(int(df['organism'].value_counts().max()/3), df['organism'].value_counts().max()+5)  
    #ax2.set_xlim(0, int(df['organism'].value_counts().max()/3)-150)  
#
    #ax.spines['left'].set_visible(False)
    #ax2.spines['right'].set_visible(False)
    #ax.yaxis.tick_right()
    #ax.tick_params(labeltop=False)  
    #ax2.yaxis.tick_left()
    #ax2.set_yticklabels(ax2.get_yticklabels(), rotation=40, ha="right", fontsize = 8)
    #ax2.set_ylabel('Organisms', size=12)

    #axis_break1 = 100
    #axis_break2 = 160
    #x_min = -0.75
    #x_max = len(df['organism'].value_counts().index)
    #l = 0.2  # "break" line length
    #kwargs = dict(color="k", clip_on=False, linewidth=1)
    #ax.plot( (axis_break2, axis_break2),(x_min - l, x_min + l), **kwargs)# top-left
    #ax.plot( (axis_break2, axis_break2),(x_max - l, x_max + l), **kwargs)# top-right
    #ax2.plot( (axis_break1, axis_break1),(x_min - l, x_min + l), **kwargs)# bottom-left
    #ax2.plot( (axis_break1, axis_break1),(x_max - l, x_max + l), **kwargs)# bottom-right

    #plt.ylim(bottom=-1, top=df['organism'].nunique()+1)
    
    #plt.tight_layout()
    #plt.savefig('Organism Distribution.jpg', format = 'jpg', dpi=1200, bbox_inches='tight')

    #plt.show()
    
    
    #Double Break
    
    f, (ax2,ax1, ax) = plt.subplots(1, 3, sharey=True, figsize=(10,20))

    sns.barplot(df['organism'].value_counts(),df['organism'].value_counts().index, ax=ax, orient='h').set(xlabel=None)
    sns.barplot(df['organism'].value_counts(),df['organism'].value_counts().index, ax = ax2, orient='h').set(ylabel=None, xlabel=None)
    sns.barplot(df['organism'].value_counts(),df['organism'].value_counts().index, ax = ax1, orient='h').set(ylabel=None, xlabel=None)
    ax.set_xlim(int(df['organism'].value_counts().max()/2)-200, df['organism'].value_counts().max()+5) 
    ax1.set_xlim(int(df['organism'].value_counts().max()/4)-100,int(df['organism'].value_counts().max()/3)) 
    ax2.set_xlim(0, int(df['organism'].value_counts().max()/4)-200)  

    ax.spines['left'].set_visible(False)
    ax2.spines['right'].set_visible(False)
    ax1.spines['left'].set_visible(False)
    ax1.spines['right'].set_visible(False)
    #ax.yaxis.tick_right()
    #ax.tick_params(labeltop=False) 
    ax.tick_params(right=False) 
    ax.tick_params(left=False) 
    #ax1.yaxis.tick_right()
    #ax1.tick_params(labeltop=False)  
    #ax1.yaxis.tick_left()
    ax1.tick_params(left=False)
    ax2.yaxis.tick_left()
    ax2.set_yticklabels(ax2.get_yticklabels(), rotation=40, ha="right", fontsize = 8)
    ax2.set_ylabel('Organisms', size=12)

    axis_break1 = int(df['organism'].value_counts().max()/2)-200
    axis_break2 = int(df['organism'].value_counts().max()/4)-100
    axis_break3 = int(df['organism'].value_counts().max()/3)
    axis_break4 = int(df['organism'].value_counts().max()/4)-200
    x_min = -0.75
    x_max = len(df['organism'].value_counts().index)
    l = 0.2  # "break" line length
    kwargs = dict(color="k", clip_on=False, linewidth=1)
    ax.plot( (axis_break1, axis_break1),(x_min - l, x_min + l), **kwargs)# top-left
    ax.plot( (axis_break1, axis_break1),(x_max - l, x_max + l), **kwargs)# top-right
    ax1.plot( (axis_break2, axis_break2),(x_min - l, x_min + l), **kwargs)# top-left
    ax1.plot( (axis_break2, axis_break2),(x_max - l, x_max + l), **kwargs)# top-right
    ax1.plot( (axis_break3, axis_break3),(x_min - l, x_min + l), **kwargs)# top-left
    ax1.plot( (axis_break3, axis_break3),(x_max - l, x_max + l), **kwargs)# top-right
    ax2.plot( (axis_break4, axis_break4),(x_min - l, x_min + l), **kwargs)# bottom-left
    ax2.plot( (axis_break4, axis_break4),(x_max - l, x_max + l), **kwargs)# bottom-right

    plt.ylim(bottom=-1, top=df['organism'].nunique()+1)

    plt.tight_layout()
    plt.savefig('Organism Distribution_2 breaks.jpg', format = 'jpg', dpi=1200, bbox_inches='tight')

    plt.show()

In [None]:
def sample(df, i):
    plt.pie(df[df['organism']==i]['smptype'].value_counts(),
        #labels=df[df['organism']==i]['smptype'].unique(),
        autopct=lambda p: "{:.1f}%".format(p) if p > 3 else None
        ,textprops={'fontsize': 6}, colors=sns.color_palette('Set2')+sns.color_palette('Accent')+sns.color_palette('Set1')+sns.color_palette('icefire_r')+sns.color_palette('twilight_shifted_r'))
    #plt.title('Sample Distribution for '+i)
    labels = [f'{l}, {s:0.1f}%' 
              for l, s in zip(df[df['organism']==i]['smptype'].unique(),
                              df[df['organism']==i]['smpno'].value_counts(normalize=True).mul(100).round(1))]
    plt.legend(bbox_to_anchor=(0.75, 1), loc='upper left', labels=labels, fontsize="6", bbox_transform=plt.gcf().transFigure)
    plt.tight_layout()
    plt.savefig(f'Sample Distribution for {i}.jpg', format = 'jpg', dpi=1200, bbox_inches='tight')
    plt.show()

In [None]:
def ResistanceHeatmap(df,month,monthwise=False):
    df = aggregate(df,monthwise,month)
    df = df.round(1)
    text = np.array([df.iloc[0,1:],df.iloc[1,1:],df.iloc[2,1:]])
    df = df.replace(['R','I','S'],[1,2,3])
    plt.figure(figsize=(20,1)) 
    sns.heatmap(df.iloc[:,1:],annot=text,fmt='',cmap='Reds',yticklabels=False, annot_kws={"fontsize":8})
    n = df['organism'].to_list()[0]
    #plt.title(f'Resistance Heatmap of {n} in month of {month}')
    if monthwise==True:
        plt.savefig(f'Resistance Heatmap of {n} in month of {month}.jpg', format = 'jpg', dpi=1200, bbox_inches='tight')
    else:
        plt.savefig(f'Resistance Heatmap of {n}.jpg', format = 'jpg', dpi=1200, bbox_inches='tight')
    plt.show()

In [None]:
def ResistanceProfileBar(d, month):
    global new_df
    d = d.replace('-', np.nan)
    d = d[d['Month']==month]
    org = d["organism"].to_list()[0]
    d = d.iloc[:,4:-3]
    d = d.dropna(how='all', axis=1)
    r_count = []
    i_count = []
    s_count = []
    index = []
    error_list = []
    for j in d:
        index.append(j)
        try:
            r_count.append(d[j].value_counts()['R']/d[j].value_counts().sum()*100)
        except:
            r_count.append(0)
        try:
            i_count.append(d[j].value_counts()['I']/d[j].value_counts().sum()*100)
        except:
            i_count.append(0)
        try:
            s_count.append(d[j].value_counts()['S']/d[j].value_counts().sum()*100)
        except:
            s_count.append(0)

        error = []
        try:
            m_r = d[j].value_counts()['R']/d[j].value_counts().sum()
            s_r = (((d[j].value_counts()['R']-m_r)**2/d[j].value_counts().sum())**0.5)
            error.append(s_r)
        except:
            error.append(0)

        try:
            m_i = d[j].value_counts()['I']/d[j].value_counts().sum()
            s_i = (((d[j].value_counts()['I']-m_i)**2/d[j].value_counts().sum())**0.5)
            error.append(s_i)
        except:
            error.append(0)

        try:
            m_s = d[j].value_counts()['S']/d[j].value_counts().sum()
            s_s = (((d[j].value_counts()['S']-m_s)**2/d[j].value_counts().sum())**0.5)
            error.append(s_s)
        except:
            error.append(0)
        error_list.append(error)

    data = {'R':r_count, "I":i_count, "S":s_count}
    new_df = pd.DataFrame(data, columns=['R','I','S'], index=index)
    dfe = pd.DataFrame(error_list, columns = ['r_mean', 'i_mean', 's_mean'])
    f, ax = plt.subplots(figsize=(20,8))
    n=new_df.shape[0]
    r = np.arange(n) 
    width = 0.25


    b1 = plt.bar(r, new_df['R'], color = 'b', 
            width = width, label='R', yerr=dfe['r_mean'], error_kw=dict(ecolor='pink', lw=2, capsize=5, capthick=2)) 
    b2 = plt.bar(r + width, new_df['I'], color = 'g', 
            width = width, label='I', yerr=dfe['i_mean'], error_kw=dict(ecolor='pink', lw=2, capsize=5, capthick=2)) 
    b3 = plt.bar(r + width + width, new_df['S'], color = 'orange', 
            width = width, label='S', yerr=dfe['s_mean'], error_kw=dict(ecolor='pink', lw=2, capsize=5, capthick=2)) 

    plt.ylim(0,100)
    plt.xlim(-0.5,)
    plt.xlabel("Antibiotics") 
    plt.ylabel("Percentage (%)") 
    #plt.title(f"Resistance Profile with CI for {org}") 

    # plt.grid(linestyle='--') 
    plt.xticks(r + width/2,new_df.index) 
    
    labels = []
    
    for j in index:
        labels.append(f"{j} (n={str(d[j].value_counts().sum())})")
    #ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right", fontsize = 10)
    ax.set_xticklabels(labels, rotation=40, ha="right", fontsize = 15)
    
    line = Line2D([0], [0], color='pink', lw=2, label='95% CI')
    plt.legend(title = f'N = {d.shape[0]}', handles = [b1, b2, b3, line], loc = 'upper right') 
    plt.tight_layout()
    plt.savefig(f'Resistance Profile Bar for {org} of {month} new.jpg', format = 'jpg', dpi=1200, bbox_inches='tight')
    #plt.savefig(f'Resistance Profile Bar for {org} all.jpg', format = 'jpg', dpi=1200, bbox_inches='tight')
    plt.show() 

In [None]:
file_name = 'AMR 22.6.22.csv'
df = load(file_name)
df = clean(df)
df = remove(df)
info(df)

In [None]:
organisms = ['Pseudomonas aeruginosa','Acinetobacter baumannii','Klebsiella pneumoniae ss.',
            'Enterococcus faecium','Staphylococcus aureus','Escherichia coli', ]


for i in organisms:
    sample(df,i)

In [None]:
df = df.replace('-', np.nan)

In [None]:
df_pseu = df[df['organism'].isin(['Pseudomonas aeruginosa','Pseudomonas spp.'])]
df_ab =  df[df['organism'].isin(['Acinetobacter baumannii'])]
df_kp = df[df['organism'].isin(['Klebsiella pneumoniae','Klebsiella pneumoniae ss.'])]
df_ef = df[df['organism'].isin(['Enterococcus faecalis','Enterococcus faecium'])]
df_sa = df[df['organism'].isin(['Staphylococcus aureus','Staphylococcus aureus ss.'])]
df_ec = df[df['organism'].isin(['Escherichia coli'])]
df_sal = df[df['organism'].isin(['Salmonella typhi','Salmonella paratyphi A'])]

In [None]:
 for i in df['Month'].unique():
     print(i)
     try:
         ResistanceHeatmap(df_pseu,i)
         ResistanceProfileBar(df_pseu, i)
     except:
         pass
     try:
         ResistanceHeatmap(df_ab,i)
         ResistanceProfileBar(df_ab, i)
     except:
         pass
       
     try:
         ResistanceHeatmap(df_kp,i)
         ResistanceProfileBar(df_kp, i)
     except:
         pass
       
     try:
         ResistanceHeatmap(df_ef,i)
         ResistanceProfileBar(df_ef, i)
     except:
         pass
       
     try:
         ResistanceHeatmap(df_sa,i)
         ResistanceProfileBar(df_sa, i)
     except:
         pass
        
     try:
         ResistanceHeatmap(df_ec,i)
         ResistanceProfileBar(df_ec, i)
     except:
         pass
       
     try:
         ResistanceHeatmap(df_sal,i)
         ResistanceProfileBar(df_sal, i)
     except:
         pass
    
