In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from datetime import datetime, date

In [None]:
def load(file_name):
    df = pd.read_csv(file_name)
    return df

In [None]:
def remove(df):
    columns = tuple(df.columns[6:])
    df.columns = ('Sex','DOB','SmpType','Month','HOSNAME','ORGANISM')+columns
    df = df[(df['organism']!='Normal Pharyngeal Flora S') & (df['organism']!='Coagulase-Negative Staphy') &
          (df['organism']!='Bacillus sp. Suggestive o') & (df['organism']!='Polymicrobial Growth Sugg')]
    df = df[~df['organism'].isin(['No Organism','No Pathogen'])]
    df = df.fillna('-')
    
    return df

In [None]:
def clean(df):
    def age(dob):
        dob = datetime.strptime(dob, "%d-%m-%Y").date()
        today = date.today()
        return today.year - dob.year - ((today.month, 
                                          today.day) < (dob.month, 
                                                        dob.day))

    df['Age'] = df['dob'].apply(age)
    
    en = LabelEncoder()
    df['smpno'] = en.fit_transform(df['smptype'])
    return df

In [None]:
def aggregate(df,org,month):
    d = df[df['organism']==org]
    d = d[d['Month']==month]
    new_df = pd.DataFrame()
    new_df['organism'] = [org]*3
    new_df['Sensitivity'] = ['R','I','S']
    for i in d.columns[7:]:
            try:
                check = d[i].value_counts()[0]
            except:
                continue
            sum = 0
            try:
                sum = sum + d[i].value_counts()['R']
            except:
                pass
            try:
                sum = sum + d[i].value_counts()['S']
            except:
                pass
            try:
                sum = sum + d[i].value_counts()['I']
            except:
                pass
            try:
                R = (d[i].value_counts()['R']/sum)*100
            except:
                R = 0
            try:
                S = (d[i].value_counts()['S']/sum)*100
            except:
                S = 0
            try:
                I = (d[i].value_counts()['I']/sum)*100
            except:
                I = 0

            new_df[i] = [R,I,S]
            
            return new_df

In [None]:
def info(df):
    sns.histplot(df['Age'],kde=True,bins=df['Age'].nunique())
    plt.xlim(left=0)
    plt.title('Age Distribution')
    plt.show()
    
    sns.barplot(df['sex'].value_counts().index,df['sex'].value_counts())
    plt.title('Gender Distribution')
    plt.show()
    
    plt.figure(figsize=(15,8))
    ax = sns.barplot(df['organism'].value_counts().index,df['organism'].value_counts())
    ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
    plt.tight_layout()
    plt.title('Organism Distribution')
    plt.show()

In [None]:
def sample(df, i):
    plt.pie(df[df['organism']==i]['smpno'].value_counts(),
        labels=df[df['organism']==i]['smptype'].unique(),
        autopct=lambda p: "{:.0f}%".format(p) if p > 1 else None
        ,textprops={'fontsize': 6})
    plt.title('Sample Distribution for '+i)
    plt.show()

In [None]:
def ResistanceHeatmap(df,i,month):
    df = aggregate(df,i,month)
    df = df.round(0)
    text = np.array([df.iloc[0,1:],df.iloc[1,1:],df.iloc[2,1:]])
    df = df.replace(['R','I','S'],[1,2,3])
    plt.figure(figsize=(15,1)) 
    sns.heatmap(df.iloc[:,1:],annot=text,fmt='',cmap='Reds',yticklabels=False)
    n = df['organism'][0]
    plt.title(f'Resistance Heatmap of {i} in month of {month}')
    plt.show()

In [None]:
file_name = ''
df = load(file_name)
df = remove(df)
df = clean(df)
info(df)

organsims = ['Pseudomonas aeruginosa','Acinetobacter baumannii','Klebsiella pneumoniae ss.',
            'Enterococcus faecalis','Staphylococcus aureus','Escherichia coli']
for i in organisms:
    ResistanceHeatmap(df,i,'MAY')
    ResistanceHeatmap(df,i,'JUNE')