In [3]:
import gradio as gr
import pandas as pd
import joblib
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from utils import set_label_encoding
from datetime import datetime
from tabulate import tabulate

In [4]:
# labels - column to use in resulting from model and used in gradio
# target_labels = ['Amoxicilline', 'Augmentin', 'Oxacilline / cefazoline', 'Tazocilline', 'Cefotaxime / ceftriaxone', 'Ceftazidime', 'Cefepime', 'Aztreonam', 'Imipenem', 'Meropenem', 'Ertapenem', 'Amikacine', 'Gentamicine', 'Ciprofloxacine', 'Levofloxacine', 'Bactrim', 'Vancomycine','Rifampicine','Clindamycine', 'Macrolides']

target_labels = [
    "Piperacilline/Tazobactam","Ceftriaxone", "Amoxicilline", "Meropenem", "Amoxicilline/Acide Clavulanique", "Cotrimoxazole","Vancomycine", 
    "Ciprofloxacine", "Azithromycine", "Gentamicine", "Levofloxacine","Clindamycine", "Amikacine", "Oxacilline / Cefazoline", "Ceftazidime","Cefepime",
    "Imipenem", "Fosfomycine", "Ertapenem", "Metronidazole", "Ofloxacine"
]

In [5]:
df = pd.read_excel('dataset/antibiogram_dataset.xlsx')

# remove '\u200b'
df = df.replace(to_replace='\u200b', value='', regex=True)

# # drop columns
# df = df.drop(columns=["Hopital", "IDLabo", "IDPatient", "visittype"])
df = df.rename(columns={"new_age": "age"})

# Convert column names to lowercase
df.columns = df.columns.str.lower()

# Mapping Khmer to English using map()
df['sex'] = df['sex'].map({'ប្រុស': 'Male', 'ស្រី': 'Female'})

In [6]:
# create list of element to put in gradion field - components

# Copy dataframe
df_amr_field = df

sex_list = sorted(df_amr_field['sex'].unique().tolist())
address_list = sorted(df_amr_field['address'].unique().tolist())
ward_list = sorted(df_amr_field['ward_english'].unique().tolist())
diagnosis_list = sorted(df_amr_field['new_diagnosis'].unique().tolist())
sample_list = sorted(df_amr_field['sample'].unique().tolist())
direct_list = sorted(df_amr_field['2_direct'].unique().tolist())
culture_list = sorted(df_amr_field['3_culture'].unique().tolist())
genre_list = sorted(df_amr_field['4_genre'].unique().tolist())
species_list = sorted(df_amr_field['5_species'].unique().tolist())

In [7]:
# Set Label Encoding to convert features to number
sex_dict = set_label_encoding(df, 'sex')
address_dict = set_label_encoding(df, 'address')
ward_dict = set_label_encoding(df, 'ward_english')
sample_dict = set_label_encoding(df, 'sample')
espece_requete_dict = set_label_encoding(df, 'espece_requete')
direct_2_dict = set_label_encoding(df, '2_direct')
culture_3_dict = set_label_encoding(df, '3_culture')
genre_4_dict = set_label_encoding(df, '4_genre')
espece_5_training_dict = set_label_encoding(df, '5_species')

diagnosis_dict = set_label_encoding(df, 'new_diagnosis') ## add diagnosis

In [8]:
def amr_project(age, sex, address, ward_en, date, diagnosis, sample, direct_2, culture_3, genre_4, species_5):

    # Convert timestamp to a datetime object
    date_time = datetime.fromtimestamp(date)
    # Extract the month
    month = date_time.month

    ### Stage 5 ###
    if age and sex and address and ward_en and sample and direct_2 and culture_3 and genre_4 and species_5:
        print('Stage 5')

        # Sample data
        input_data = {
            "month": [month],
            "age": [age],
            'sex': [sex_dict[sex]],
            'address': [address_dict[address]],
            'ward_english': [ward_dict[ward_en]],
            'sample': [sample_dict[sample]],
            'new_diagnosis': [diagnosis_dict[diagnosis]],
            '2_direct': [direct_2_dict[direct_2]],
            '3_culture': [culture_3_dict[culture_3]],
            '4_genre': [genre_4_dict[genre_4]],
            '5_species': [espece_5_training_dict[species_5]]
        }
        # Create the DataFrame
        df = pd.DataFrame(input_data)
        print(tabulate(df, headers="keys", tablefmt="fancy_grid"))
        # print(df)

        # Load the model back from the file
        rf_model_loaded = joblib.load('random_forest_stage_5.joblib')
        one_row_test = df

        # Now you can use the loaded model to make predictions
        y_pred_loaded = rf_model_loaded.predict(one_row_test)
        # print(len(y_pred_loaded[0]), np.array(y_pred_loaded))

        y_pred_prob = rf_model_loaded.predict_proba(one_row_test)
        # print(len(y_pred_prob), np.array(y_pred_prob))

        result_class_dict = dict()
        result_probab_dict = dict()

        for i in range(len(target_labels)):
            message = "Sensible" if y_pred_loaded[0][i] == 1 else "Resistance"
            # print(target_labels[i], ": class - ", y_pred_loaded[0][i], message,' - Probab:', y_pred_prob[i][0][1])
            result_class_dict[target_labels[i]] = message
            # result_probab_dict[target_labels[i]] = y_pred_prob[i][0][1]
            try:
                result_probab_dict[target_labels[i]] = y_pred_prob[i][0][1]
            except:
                result_probab_dict[target_labels[i]] = 0

        print()
        print(result_class_dict)
        print(result_probab_dict)
        
        # Sample data
        data = {
            "Model": ["Random Forest - Stage 5"]
        }
        # Create the DataFrame
        df_info = pd.DataFrame(data)
        print(tabulate(df_info, headers="keys", tablefmt="fancy_grid"))


    ### Stage 4 ###
    elif age and sex and address and ward_en and sample and direct_2 and culture_3 and genre_4:
        print('Stage 4')
        # Sample data
        input_data = {
            "month": [month],
            "age": [age],
            'sex': [sex_dict[sex]],
            'address': [address_dict[address]],
            'ward_english': [ward_dict[ward_en]],
            'sample': [sample_dict[sample]],
            'new_diagnosis': [diagnosis_dict[diagnosis]],
            '2_direct': [direct_2_dict[direct_2]],
            '3_culture': [culture_3_dict[culture_3]],
            '4_genre': [genre_4_dict[genre_4]]
        }
        # Create the DataFrame
        df = pd.DataFrame(input_data)
        print(tabulate(df, headers="keys", tablefmt="fancy_grid"))
        # print(df)
        # Load the model back from the file
        rf_model_loaded = joblib.load('random_forest_stage_4.joblib')
        one_row_test = df

        # Now you can use the loaded model to make predictions
        y_pred_loaded = rf_model_loaded.predict(one_row_test)
        # print(len(y_pred_loaded[0]), np.array(y_pred_loaded))

        y_pred_prob = rf_model_loaded.predict_proba(one_row_test)
        # print(len(y_pred_prob), np.array(y_pred_prob))

        result_class_dict = dict()
        result_probab_dict = dict()

        for i in range(len(target_labels)):
            message = "Sensible" if y_pred_loaded[0][i] == 1 else "Resistance"
            # print(target_labels[i], ": class - ", y_pred_loaded[0][i], message,' - Probab:', y_pred_prob[i][0][1])
            result_class_dict[target_labels[i]] = message
            # result_probab_dict[target_labels[i]] = y_pred_prob[i][0][1]
            try:
                result_probab_dict[target_labels[i]] = y_pred_prob[i][0][1]
            except:
                result_probab_dict[target_labels[i]] = 0

        print()
        print(result_class_dict)
        print(result_probab_dict)

         # Sample data
        data = {
            "Model": ["Random Forest - Stage 4"]
        }
        # Create the DataFrame
        df_info = pd.DataFrame(data)
        print(tabulate(df_info, headers="keys", tablefmt="fancy_grid"))

    ### Stage 3 ###
    elif age and sex and address and ward_en and sample and direct_2 and culture_3:
        print('Stage 3')
        # Sample data
        input_data = {
            "month": [month],
            "age": [age],
            'sex': [sex_dict[sex]],
            'address': [address_dict[address]],
            'ward_english': [ward_dict[ward_en]],
            'sample': [sample_dict[sample]],
            'new_diagnosis': [diagnosis_dict[diagnosis]],
            '2_direct': [direct_2_dict[direct_2]],
            '3_culture': [culture_3_dict[culture_3]]
        }
        # Create the DataFrame
        df = pd.DataFrame(input_data)
        print(tabulate(df, headers="keys", tablefmt="fancy_grid"))
        # print(df)
        # Load the model back from the file
        rf_model_loaded = joblib.load('random_forest_stage_3.joblib')
        one_row_test = df

        # Now you can use the loaded model to make predictions
        y_pred_loaded = rf_model_loaded.predict(one_row_test)
        # print(len(y_pred_loaded[0]), np.array(y_pred_loaded))

        y_pred_prob = rf_model_loaded.predict_proba(one_row_test)
        # print(len(y_pred_prob), np.array(y_pred_prob))

        result_class_dict = dict()
        result_probab_dict = dict()

        for i in range(len(target_labels)):
            message = "Sensible" if y_pred_loaded[0][i] == 1 else "Resistance"
            # print(target_labels[i], ": class - ", y_pred_loaded[0][i], message,' - Probab:', y_pred_prob[i][0][1])
            result_class_dict[target_labels[i]] = message
            # result_probab_dict[target_labels[i]] = y_pred_prob[i][0][1]
            try:
                result_probab_dict[target_labels[i]] = y_pred_prob[i][0][1]
            except:
                result_probab_dict[target_labels[i]] = 0

        print()
        print(result_class_dict)
        print(result_probab_dict)

         # Sample data
        data = {
            "Model": ["Random Forest - Stage 3"]
        }
        # Create the DataFrame
        df_info = pd.DataFrame(data)
        print(tabulate(df_info, headers="keys", tablefmt="fancy_grid"))

    #### Stage 2 ###
    elif age and sex and address and ward_en and sample and direct_2:
        print('Stage 2')
        # Sample data
        input_data = {
            "month": [month],
            "age": [age],
            'sex': [sex_dict[sex]],
            'address': [address_dict[address]],
            'ward_english': [ward_dict[ward_en]],
            'sample': [sample_dict[sample]],
            'new_diagnosis': [diagnosis_dict[diagnosis]],
            '2_direct': [direct_2_dict[direct_2]]
        }
        # Create the DataFrame
        df = pd.DataFrame(input_data)
        print(tabulate(df, headers="keys", tablefmt="fancy_grid"))
        # print(df)
        # Load the model back from the file
        rf_model_loaded = joblib.load('random_forest_stage_2.joblib')
        one_row_test = df

        # Now you can use the loaded model to make predictions
        y_pred_loaded = rf_model_loaded.predict(one_row_test)
        # print(len(y_pred_loaded[0]), np.array(y_pred_loaded))

        y_pred_prob = rf_model_loaded.predict_proba(one_row_test)
        # print(len(y_pred_prob), np.array(y_pred_prob))

        result_class_dict = dict()
        result_probab_dict = dict()

        for i in range(len(target_labels)):
            message = "Sensible" if y_pred_loaded[0][i] == 1 else "Resistance"
            # print(target_labels[i], ": class - ", y_pred_loaded[0][i], message,' - Probab:', y_pred_prob[i][0][1])
            result_class_dict[target_labels[i]] = message
            # result_probab_dict[target_labels[i]] = y_pred_prob[i][0][1]
            try:
                result_probab_dict[target_labels[i]] = y_pred_prob[i][0][1]
            except:
                result_probab_dict[target_labels[i]] = 0

        print()
        print(result_class_dict)
        print(result_probab_dict)

         # Sample data
        data = {
            "Model": ["Random Forest -  Stage 2"]
        }
        # Create the DataFrame
        df_info = pd.DataFrame(data)
        print(tabulate(df_info, headers="keys", tablefmt="fancy_grid"))

    ### Stage 1 ###
    elif age and sex and address and ward_en and sample:
        print('Stage 1')
        # Sample data
        input_data = {
            "month": [month],
            "age": [age],
            'sex': [sex_dict[sex]],
            'address': [address_dict[address]],
            'ward_english': [ward_dict[ward_en]],
            'sample': [sample_dict[sample]],
            'new_diagnosis': [diagnosis_dict[diagnosis]],
        }
        # Create the DataFrame
        df = pd.DataFrame(input_data)
        print(tabulate(df, headers="keys", tablefmt="fancy_grid"))
        
        # Load the model back from the file
        rf_model_loaded = joblib.load('random_forest_stage_1.joblib')
        one_row_test = df

        # Now you can use the loaded model to make predictions
        y_pred_loaded = rf_model_loaded.predict(one_row_test)
        # print(len(y_pred_loaded[0]), np.array(y_pred_loaded))

        y_pred_prob = rf_model_loaded.predict_proba(one_row_test)
        # print(len(y_pred_prob), np.array(y_pred_prob))

        result_class_dict = dict()
        result_probab_dict = dict()

        for i in range(len(target_labels)):
            message = "Sensible" if y_pred_loaded[0][i] == 1 else "Resistance"
            # print(target_labels[i], ": class - ", y_pred_loaded[0][i], message,' - Probab:', y_pred_prob[i][0][1])
            result_class_dict[target_labels[i]] = message
            try:
                result_probab_dict[target_labels[i]] = y_pred_prob[i][0][1]
            except:
                result_probab_dict[target_labels[i]] = 0

        print()
        print(result_class_dict)
        print(result_probab_dict)

         # Model used and stage information
        data = {
            "Model": ["Random Forest - Stage 1"]
        }
        # Create the DataFrame
        df_info = pd.DataFrame(data)
        print(tabulate(df_info, headers="keys", tablefmt="fancy_grid"))

    return df_info, result_probab_dict
    # return df_info, {'Amoxicilline': 0.74, 'Augmentin': 0.22, 'Oxacilline / cefazoline': 0.62}

app = gr.Interface(fn=amr_project, inputs=[
    gr.Textbox(label="Age"),
    gr.Radio(sex_list, label="Sex"),
    gr.Dropdown(address_list, label="Address"),
    gr.Dropdown(ward_list, label="Ward"),
    gr.DateTime(label='Date'),
    gr.Dropdown(diagnosis_list, label='Diagnosis'),
    gr.Dropdown(sample_list, label='Sample'),
    gr.Dropdown(direct_list, label='Stage 2 - Direct'),
    gr.Dropdown(culture_list, label='Stage 3 - Culture'),
    gr.Dropdown(genre_list, label='Stage 4 - Genre'),
    gr.Dropdown(species_list, label='Stage 5 - Species')
    # gr.Dropdown(['Amoxicilline', 'Augmentin', 'Oxacilline / cefazoline', 'Tazocilline', 'Cefotaxime / ceftriaxone', 'Ceftazidime', 'Cefepime', 'Aztreonam', 'Imipenem', 'Meropenem', 'Ertapenem', 'Amikacine', 'Gentamicine', 'Ciprofloxacine', 'Levofloxacine', 'Bactrim', 'Vancomycine', 'Rifampicine', 'Clindamycine', 'Macrolides'], label='Antibiotics')
    ], 
    outputs=[
        gr.Dataframe(label='Information'),
        gr.Label(label='Antibiotics - Sensibility Prediction')
        ],
    title='Cambodia AMR Predicting Application',
    description='A Machine Learning-based Clinical Decision Support System predicts antibiotic resistance and susceptibility using patient data and microbial information, helping combat Antimicrobial Resistance (CAMPRA).'
    )

app.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




--------


Stage 2
╒════╤═════════╤═══════╤═══════╤═══════════╤════════════════╤══════════╤═════════════════╤════════════╕
│    │   month │   age │   sex │   address │   ward_english │   sample │   new_diagnosis │   2_direct │
╞════╪═════════╪═══════╪═══════╪═══════════╪════════════════╪══════════╪═════════════════╪════════════╡
│  0 │       4 │    30 │     0 │         0 │              1 │       12 │              10 │          0 │
╘════╧═════════╧═══════╧═══════╧═══════════╧════════════════╧══════════╧═════════════════╧════════════╛


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations



{'Piperacilline/Tazobactam': 'Sensible', 'Ceftriaxone': 'Resistance', 'Amoxicilline': 'Resistance', 'Meropenem': 'Sensible', 'Amoxicilline/Acide Clavulanique': 'Resistance', 'Cotrimoxazole': 'Resistance', 'Vancomycine': 'Resistance', 'Ciprofloxacine': 'Sensible', 'Azithromycine': 'Resistance', 'Gentamicine': 'Sensible', 'Levofloxacine': 'Resistance', 'Clindamycine': 'Resistance', 'Amikacine': 'Sensible', 'Oxacilline / Cefazoline': 'Resistance', 'Ceftazidime': 'Sensible', 'Cefepime': 'Resistance', 'Imipenem': 'Sensible', 'Fosfomycine': 'Resistance', 'Ertapenem': 'Resistance', 'Metronidazole': 'Resistance', 'Ofloxacine': 'Resistance'}
{'Piperacilline/Tazobactam': 0.715, 'Ceftriaxone': 0.265, 'Amoxicilline': 0.12, 'Meropenem': 0.89, 'Amoxicilline/Acide Clavulanique': 0.25, 'Cotrimoxazole': 0.46, 'Vancomycine': 0.005, 'Ciprofloxacine': 0.56, 'Azithromycine': 0.0, 'Gentamicine': 0.59, 'Levofloxacine': 0.015, 'Clindamycine': 0.0, 'Amikacine': 0.735, 'Oxacilline / Cefazoline': 0.0, 'Ceftazid