In [36]:
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interactive
from IPython.display import display
from sklearn.datasets import make_blobs
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.gridspec as gridspec
from sklearn import neighbors
from sklearn.metrics import roc_curve, auc

# Setup global settings

In [2]:
# Global settings
label_healthy = "Non-leukemic"
label_diseased = "Leukemic"
label_x = "LAIP1"
label_y = "LAIP2"

# Define default values for sliders
default_values = {
    'n_cells': 10000,
    'frac_leukemic_AML1': 0.1,
    'frac_leukemic_AML2': 0.01,
    'frac_leukemic_AML3': 0.1,
    'mean_healthy_x': -1,
    'std_healthy_x': 0.5,
    'mean_healthy_y': -1,
    'std_healthy_y': 0.5,
    'dist_healthy_LAIP1': 3,
    'std_LAIP1_x': 0.5,
    'std_LAIP1_y': 0.5,
    'dist_healthy_LAIP2': 3,
    'std_LAIP2_x': 0.5,
    'std_LAIP2_y': 0.5
}

# Define a function to restore default values
def restore_defaults(button):
    n_cells_slider.value = default_values['n_cells']
    frac_leukemic_AML1_slider.value = default_values['frac_leukemic_AML1']
    frac_leukemic_AML2_slider.value = default_values['frac_leukemic_AML2']
    frac_leukemic_AML3_slider.value = default_values['frac_leukemic_AML3']
    mean_healthy_x_slider.value = default_values['mean_healthy_x']
    std_healthy_x_slider.value = default_values['std_healthy_x']
    mean_healthy_y_slider.value = default_values['mean_healthy_y']
    std_healthy_y_slider.value = default_values['std_healthy_y']
    dist_healthy_LAIP1_slider.value = default_values['dist_healthy_LAIP1']
    std_LAIP1_x_slider.value = default_values['std_LAIP1_x']
    std_LAIP1_y_slider.value = default_values['std_LAIP1_y']
    dist_healthy_LAIP2_slider.value = default_values['dist_healthy_LAIP2']
    std_LAIP2_x_slider.value = default_values['std_LAIP2_x']
    std_LAIP2_y_slider.value = default_values['std_LAIP2_y']

# Setup sliders

In [59]:
slider_layout = widgets.Layout(width='400px')

# Create sliders with default values
n_cells_slider = widgets.IntSlider(value=default_values['n_cells'], min=1, max=100000, step=1, description="Total cells", style={'description_width': 'initial'}, layout=slider_layout)
frac_leukemic_AML1_slider = widgets.FloatSlider(value=default_values['frac_leukemic_AML1'], min=0, max=1, step=0.01, description="Fraction leukemic cells in AML1", style={'description_width': 'initial'}, layout=slider_layout)
frac_leukemic_AML2_slider = widgets.FloatSlider(value=default_values['frac_leukemic_AML2'], min=0, max=1, step=0.01, description="Fraction leukemic cells in AML2", style={'description_width': 'initial'}, layout=slider_layout)
frac_leukemic_AML3_slider = widgets.FloatSlider(value=default_values['frac_leukemic_AML3'], min=0, max=1, step=0.01, description="Fraction leukemic cells in AML3", style={'description_width': 'initial'}, layout=slider_layout)

mean_healthy_x_slider = widgets.FloatSlider(value=default_values['mean_healthy_x'], min=-10, max=10, step=0.1, description="Mean of non-leukemic cluster (X-axis)", style={'description_width': 'initial'}, layout=slider_layout)
std_healthy_x_slider = widgets.FloatSlider(value=default_values['std_healthy_x'], min=0, max=3, step=0.1, description="Standard deviation of non-leukemic cluster (X-axis)", style={'description_width': 'initial'}, layout=slider_layout)
mean_healthy_y_slider = widgets.FloatSlider(value=default_values['mean_healthy_y'], min=-10, max=10, step=0.1, description="Mean of non-leukemic cluster (Y-axis)", style={'description_width': 'initial'}, layout=slider_layout)
std_healthy_y_slider = widgets.FloatSlider(value=default_values['std_healthy_y'], min=0, max=3, step=0.1, description="Standard deviation of non-leukemic cluster (Y-axis)", style={'description_width': 'initial'}, layout=slider_layout)

dist_healthy_LAIP1_slider = widgets.FloatSlider(value=default_values['dist_healthy_LAIP1'], min=0, max=5, step=0.1, description="Distance between LAIP1+ and non-leukemic clusters", style={'description_width': 'initial'}, layout=slider_layout)
std_LAIP1_x_slider = widgets.FloatSlider(value=default_values['std_LAIP1_x'], min=0, max=3, step=0.1, description="Standard deviation of LAIP1+ cluster (X-axis)", style={'description_width': 'initial'}, layout=slider_layout)
std_LAIP1_y_slider = widgets.FloatSlider(value=default_values['std_LAIP1_y'], min=0, max=3, step=0.1, description="Standard deviation of LAIP1+ cluster (Y-axis)", style={'description_width': 'initial'}, layout=slider_layout)

dist_healthy_LAIP2_slider = widgets.FloatSlider(value=default_values['dist_healthy_LAIP2'], min=0, max=5, step=0.1, description="Distance between LAIP2+ and non-leukemic clusters", style={'description_width': 'initial'}, layout=slider_layout)
std_LAIP2_x_slider = widgets.FloatSlider(value=default_values['std_LAIP2_x'], min=0, max=3, step=0.1, description="Standard deviation of LAIP2+ cluster (X-axis)", style={'description_width': 'initial'}, layout=slider_layout)
std_LAIP2_y_slider = widgets.FloatSlider(value=default_values['std_LAIP2_y'], min=0, max=3, step=0.1, description="Standard deviation of LAIP2+ cluster (Y-axis)", style={'description_width': 'initial'}, layout=slider_layout)

In [60]:
def simulate_samples(n_cells, frac_leukemic_AML1, frac_leukemic_AML2, frac_leukemic_AML3,
                     mean_healthy_x, std_healthy_x, mean_healthy_y, std_healthy_y,
                     dist_healthy_LAIP1, std_LAIP1_x, std_LAIP1_y,
                     dist_healthy_LAIP2, std_LAIP2_x, std_LAIP2_y):
    # Convert string widget input into floats
    n_cells = float(n_cells)
    frac_leukemic_AML1 = float(frac_leukemic_AML1)
    frac_leukemic_AML2 = float(frac_leukemic_AML2)
    frac_leukemic_AML3 = float(frac_leukemic_AML3)
    mean_healthy_x = float(mean_healthy_x)
    std_healthy_x = float(std_healthy_x)
    mean_healthy_y = float(mean_healthy_y)
    std_healthy_y = float(std_healthy_y)
    dist_healthy_LAIP1 = float(dist_healthy_LAIP1)
    std_LAIP1_x = float(std_LAIP1_x)
    std_LAIP1_y = float(std_LAIP1_y)
    dist_healthy_LAIP2 = float(dist_healthy_LAIP2)
    std_LAIP2_x = float(std_LAIP2_x)
    std_LAIP2_y = float(std_LAIP2_y)
    
    # Setup clusters
    healthy_means = [mean_healthy_x, mean_healthy_y]
    healthy_stds = [std_healthy_x, std_healthy_y]
    LAIP1_means = [mean_healthy_x + dist_healthy_LAIP1, mean_healthy_y]
    LAIP1_stds = [std_LAIP1_x, std_LAIP1_y]
    LAIP2_means = [mean_healthy_x, mean_healthy_y + dist_healthy_LAIP2]
    LAIP2_stds = [std_LAIP2_x, std_LAIP2_y]
    # Combine clusters
    centers_NBM = [healthy_means]
    centers_AML1 = [healthy_means, LAIP1_means]
    centers_AML2 = centers_AML1
    centers_AML3 = [healthy_means, LAIP2_means]
    stds_NBM = [healthy_stds]
    stds_AML1 = [healthy_stds, LAIP1_stds]
    stds_AML2 = [healthy_stds, LAIP1_stds]
    stds_AML3 = [healthy_stds, LAIP2_stds]
    n_cells_AML1 = [round(n_cells - (n_cells * frac_leukemic_AML1)), round(n_cells * frac_leukemic_AML1)]
    n_cells_AML2 = [round(n_cells - (n_cells * frac_leukemic_AML2)), round(n_cells * frac_leukemic_AML2)]
    n_cells_AML3 = [round(n_cells - (n_cells * frac_leukemic_AML3)), round(n_cells * frac_leukemic_AML3)]
    # NBM
    X, y = make_blobs(n_samples=int(n_cells), centers=centers_NBM, cluster_std=stds_NBM, random_state=0)
    NBM = pd.DataFrame(X, columns=[label_x, label_y])
    NBM["Cluster"] = np.where(y == 1, label_diseased, label_healthy)
    # AML1
    X, y = make_blobs(n_samples=n_cells_AML1, centers=centers_AML1, cluster_std=stds_AML1, random_state=0)
    AML1 = pd.DataFrame(X, columns=[label_x, label_y])
    AML1["Cluster"] = np.where(y == 1, label_diseased, label_healthy)
    # AML2
    X, y = make_blobs(n_samples=n_cells_AML2, centers=centers_AML2, cluster_std=stds_AML2, random_state=0)
    AML2 = pd.DataFrame(X, columns=[label_x, label_y])
    AML2["Cluster"] = np.where(y == 1, label_diseased, label_healthy)
    # AML3
    X, y = make_blobs(n_samples=n_cells_AML3, centers=centers_AML3, cluster_std=stds_AML3, random_state=0)
    AML3 = pd.DataFrame(X, columns=[label_x, label_y])
    AML3["Cluster"] = np.where(y == 1, label_diseased, label_healthy)
    return NBM, AML1, AML2, AML3

In [61]:
def plot_samples(n_cells, frac_leukemic_AML1, frac_leukemic_AML2, frac_leukemic_AML3,
                     mean_healthy_x, std_healthy_x, mean_healthy_y, std_healthy_y,
                     dist_healthy_LAIP1, std_LAIP1_x, std_LAIP1_y,
                     dist_healthy_LAIP2, std_LAIP2_x, std_LAIP2_y):
    NBM, AML1, AML2, AML3 = simulate_samples(n_cells, frac_leukemic_AML1, frac_leukemic_AML2, frac_leukemic_AML3,
                     mean_healthy_x, std_healthy_x, mean_healthy_y, std_healthy_y,
                     dist_healthy_LAIP1, std_LAIP1_x, std_LAIP1_y,
                     dist_healthy_LAIP2, std_LAIP2_x, std_LAIP2_y)
    xlims = (-6, 6)
    ylims = (-6, 6)
    
    fig = plt.figure(figsize=(20, 10))
    gs = gridspec.GridSpec(2, 4, figure=fig)
    
    # First subplot
    ax1 = fig.add_subplot(gs[0, 0])
    sns.scatterplot(data=NBM, x="LAIP1", y="LAIP2", hue="Cluster", ax=ax1)
    ax1.set_xlim(xlims)
    ax1.set_ylim(ylims)
    ax1.set_title('NBM')
    
    # Second subplot
    ax2 = fig.add_subplot(gs[0, 1])
    sns.scatterplot(data=AML1, x="LAIP1", y="LAIP2", hue="Cluster", hue_order=["Non-leukemic", "Leukemic"], ax=ax2)
    ax2.set_xlim(xlims)
    ax2.set_ylim(ylims)
    ax2.set_title('AML1')
    
    # Third subplot
    ax3 = fig.add_subplot(gs[0, 2])
    sns.scatterplot(data=AML2, x="LAIP1", y="LAIP2", hue="Cluster", hue_order=["Non-leukemic", "Leukemic"], ax=ax3)
    ax3.set_xlim(xlims)
    ax3.set_ylim(ylims)
    ax3.set_title('AML2')
    
    # Fourth subplot
    ax4 = fig.add_subplot(gs[0, 3])
    sns.scatterplot(data=AML3, x="LAIP1", y="LAIP2", hue="Cluster", hue_order=["Non-leukemic", "Leukemic"], ax=ax4)
    ax4.set_xlim(xlims)
    ax4.set_ylim(ylims)
    ax4.set_title('AML3')

    # Train nearest-neighbor classifier
    df_X = AML1[["LAIP1", "LAIP2"]]
    clf = neighbors.KNeighborsClassifier(5, weights='uniform')
    clf.fit(df_X, np.where(AML1["Cluster"]=="Leukemic",1,0))
    # Step 3: Plot the decision surface
    # Create a mesh to plot in
    x_min, x_max = xlims
    y_min, y_max = ylims
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
    # Plot decision boundary
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    ax5 = fig.add_subplot(gs[1, 0])
    plt.contourf(xx, yy, Z, alpha=0.3, cmap=plt.cm.coolwarm)
    sns.scatterplot(data=AML1, x="LAIP1", y="LAIP2", hue="Cluster", hue_order=["Non-leukemic", "Leukemic"])
    ax5.set_xlim(xlims)
    ax5.set_ylim(ylims)
    ax5.set_title('AML1 (training set)')

    ax6 = fig.add_subplot(gs[1, 1])
    plt.contourf(xx, yy, Z, alpha=0.3, cmap=plt.cm.coolwarm)
    sns.scatterplot(data=AML2, x="LAIP1", y="LAIP2", hue="Cluster", hue_order=["Non-leukemic", "Leukemic"])
    ax6.set_xlim(xlims)
    ax6.set_ylim(ylims)
    ax6.set_title('AML2')

    ax7 = fig.add_subplot(gs[1, 2])
    plt.contourf(xx, yy, Z, alpha=0.3, cmap=plt.cm.coolwarm)
    sns.scatterplot(data=AML3, x="LAIP1", y="LAIP2", hue="Cluster", hue_order=["Non-leukemic", "Leukemic"])
    ax7.set_xlim(xlims)
    ax7.set_ylim(ylims)
    ax7.set_title('AML3')


    ax8 = fig.add_subplot(gs[1, 3])
    y_score = clf.predict_proba(AML1[["LAIP1", "LAIP2"]])[:, 1]
    print(y_score)
    print(np.where(AML1["Cluster"]=="Leukemic",1,0))
    fpr, tpr, _ = roc_curve(np.where(AML1["Cluster"]=="Leukemic",1,0), y_score)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f'AML1 (AUC = {roc_auc:.2f})')

    y_score = clf.predict_proba(AML2[["LAIP1", "LAIP2"]])[:, 1]
    print(y_score)
    print(np.where(AML2["Cluster"]=="Leukemic",1,0))
    fpr, tpr, _ = roc_curve(np.where(AML2["Cluster"]=="Leukemic",1,0), y_score)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f'AML2 (AUC = {roc_auc:.2f})')

    y_score = clf.predict_proba(AML3[["LAIP1", "LAIP2"]])[:, 1]
    print(y_score)
    print(np.where(AML3["Cluster"]=="Leukemic",1,0))
    fpr, tpr, _ = roc_curve(np.where(AML3["Cluster"]=="Leukemic",1,0), y_score)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f'AML3 (AUC = {roc_auc:.2f})')

    plt.legend()

In [62]:
# Create a button to restore defaults
restore_defaults_button = widgets.Button(description="Restore Defaults", button_style='primary')
restore_defaults_button.on_click(restore_defaults)

# Define the interactive plot with sliders
interactive_plot = interactive(plot_samples,
                               n_cells = n_cells_slider,
                               frac_leukemic_AML1 = frac_leukemic_AML1_slider,
                               frac_leukemic_AML2 = frac_leukemic_AML2_slider,
                               frac_leukemic_AML3 = frac_leukemic_AML3_slider,
                               mean_healthy_x = mean_healthy_x_slider,
                               std_healthy_x = std_healthy_x_slider,
                               mean_healthy_y = mean_healthy_y_slider,
                               std_healthy_y = std_healthy_y_slider,
                               dist_healthy_LAIP1 = dist_healthy_LAIP1_slider,
                               std_LAIP1_x = std_LAIP1_x_slider,
                               std_LAIP1_y = std_LAIP1_y_slider,
                               dist_healthy_LAIP2 = dist_healthy_LAIP2_slider,
                               std_LAIP2_x = std_LAIP2_x_slider,
                               std_LAIP2_y = std_LAIP2_y_slider)

# Display the interactive plot and the restore defaults button
display(interactive_plot, restore_defaults_button)

interactive(children=(IntSlider(value=10000, description='Total cells', layout=Layout(width='400px'), max=1000…

Button(button_style='primary', description='Restore Defaults', style=ButtonStyle())