In [3]:
# Load from pkl files
import pandas as pd
import pickle


with open('./Input/8-mers/mic_dframe.pkl', 'rb') as file:
    mic_dframe = pickle.load(file)
with open('./Input/8-mers/suscep_classes.pkl', 'rb') as file:
    suscep_classes = pd.read_pickle(file)
    
print(mic_dframe)

        Genome ID  ciprofloxacin  trimethoprim/sulfamethoxazole  ceftriaxone  \
0       590.17802        0.01560                          0.125         0.25   
1       590.13932            NaN                            NaN          NaN   
2       54388.286        0.03120                            NaN          NaN   
3       590.14003        0.01560                          0.125         0.25   
4       590.13436            NaN                            NaN          NaN   
...           ...            ...                            ...          ...   
18213  28901.8277            NaN                            NaN          NaN   
18214   590.14758            NaN                            NaN          NaN   
18215   590.12655            NaN                            NaN          NaN   
18216   590.15152            NaN                            NaN          NaN   
18217   590.14487        0.00781                          0.125         0.25   

       gentamicin  ceftiofur  ampicilli

In [13]:
# Function that creates a dataframe with the number of genomes per class

import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.pyplot as plt

def genomes_per_class(df, antibiotics):
    amounts = pd.DataFrame(data={})
    for antibiotic in antibiotics:
        single_amr = df[['Genome ID',antibiotic]].loc[pd.notnull(df[antibiotic])].drop_duplicates()
        if single_amr.shape[0]==0:
            mics = [4]
            amount = [0]
        else:
            # Unique mic values
            mics = df[antibiotic].loc[pd.notnull(df[antibiotic])].sort_values().unique()
            amount = [len(single_amr.loc[single_amr[antibiotic]==mic]) for mic in mics]
        amounts.append(pd.DataFrame(amount, index=mics).T)
    amounts_df = amounts[0].append(amounts[1:len(antibiotics)], sort=False, ignore_index=True)
    return amounts_df

def susceptibility_heatmap(data, annot, antibiotics, title):
    sns.set_theme(font_scale=.8, style='white')
    myColors = ((0.0, 0.8, 0.0, 1.0), (0.0, 0.8, 0.8, 1.0), (0.8, 0.0, 0.0, 1.0))
    cmap = LinearSegmentedColormap.from_list('Custom', myColors, len(myColors))
    # Abbreviate the antibiotic names to 5 letters
    antibiotic_abbre = {i:antibiotics[i][:5].upper() for i in range(len(antibiotics))}
    # Number of genomes by Antibiotic and MIC.
    data = data.rename(index=antibiotic_abbre)
    # Set the width and height of the figure
    plt.figure(figsize=(10,6))
    # Add title
    plt.title(title, fontsize=12)
    # Heatmap showing the amount of genomes with the same MIC for each MIC, by antibiotic
    hp = sns.heatmap(data=data, annot=annot, cmap=cmap, fmt='.4g')
    # Manually specify colorbar labelling after it's been generated
    colorbar = hp.collections[0].colorbar
    colorbar.set_ticks([0.333, 1, 1.667])
    colorbar.set_ticklabels(['Susceptible', 'Intermediate', 'Resistant'])
    colorbar.ax.tick_params(labelsize=12)
    # Add label for horizontal axes
    plt.xlabel('MIC (micrograms per milliliter)', fontsize=12)
    plt.ylabel('Antibiotic', fontsize=12)

In [14]:
# Antibiotics
antibiotics = mic_dframe.columns[-12:]

# Comupute the distribution of MIC measurements
mic_distribution = genomes_per_class(mic_dframe, antibiotics)

# Combine the MIC distribution and the susceptibility map
title = 'Number of Genomes per Antibiotic and MIC'
susceptibility_heatmap(data=suscep_classes, annot=mic_distribution, antibiotics=antibiotics, title=title)

AttributeError: 'DataFrame' object has no attribute 'append'