In [9]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import numpy as np
import pandas as pd
from imblearn.under_sampling import ClusterCentroids, NearMiss, TomekLinks, OneSidedSelection
from imblearn.over_sampling import SMOTE, ADASYN
from imblearn.ensemble import BalancedBaggingClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.datasets import make_classification

# Example dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, weights=[0.9, 0.1], random_state=42)
X = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(20)])
y = pd.Series(y, name='target')

# Show total features and dataset size
dataset_size_label = widgets.Label(f"Dataset Size: {X.shape[0]} samples, Number of Features: {X.shape[1]}")

# Create main buttons
under_sampling_button = widgets.Button(description="Under Sampling", tooltip="Select this if you want to reduce the majority class samples.")
over_sampling_button = widgets.Button(description="Over Sampling", tooltip="Select this if you want to increase the minority class samples.")
ensemble_sampling_button = widgets.Button(description="Ensemble Sampling", tooltip="Select this to use ensemble samplers like Bagging, Forest of Randomized Trees, or Boosting.")

# Sampling percentage slider
sampling_percentage_slider = widgets.FloatSlider(description='Sampling %', min=0.1, max=1.0, step=0.1, value=1.0)

# Display area for sub buttons, output, and save button
sub_buttons_box = widgets.VBox([])
output_box = widgets.Output()
save_button_box = widgets.HBox([])

# Create save button
save_button = widgets.Button(description="Save Sampled Data", disabled=True)

# Save function
def save_sampled_data(X_res, y_res):
    df = pd.concat([pd.DataFrame(X_res), pd.Series(y_res, name='target')], axis=1)
    df.to_csv("sampled_data.csv", index=False)
    with output_box:
        print("Sampled data saved as 'sampled_data.csv' in the current directory.")

# Function to clear sub buttons but keep output and slider intact
def clear_sub_buttons():
    sub_buttons_box.children = [sampling_percentage_slider]  # Keep slider visible, and buttons change dynamically

# Function to calculate and display class distribution with percentages and absolute numbers
def display_distribution(y_before, y_after, method_name, sampling_percentage):
    original_distribution = dict(zip(*np.unique(y_before, return_counts=True)))
    resampled_distribution = dict(zip(*np.unique(y_after, return_counts=True)))
    
    total_before = sum(original_distribution.values())
    total_after = sum(resampled_distribution.values())
    
    original_percentages = {k: (v / total_before) * 100 for k, v in original_distribution.items()}
    resampled_percentages = {k: (v / total_after) * 100 for k, v in resampled_distribution.items()}
    
    with output_box:
        clear_output(wait=True)
        print(f"Applied {method_name} with {sampling_percentage * 100:.0f}% sampling:")
        print(f"Original class distribution (absolute numbers): {original_distribution}")
        print(f"Original class distribution (in %):")
        for cls, pct in original_percentages.items():
            print(f"Class {cls}: {pct:.2f}%")
        
        print(f"\nResampled class distribution (absolute numbers): {resampled_distribution}")
        print(f"Resampled class distribution (in %):")
        for cls, pct in resampled_percentages.items():
            print(f"Class {cls}: {pct:.2f}%")
        
        # Display final dataset size and number of features
        print(f"\nFinal Dataset Size: {len(y_after)} samples, Number of Features: {X.shape[1]}")

# Under-sampling methods
def under_sampling_method(method_name, sampling_percentage):
    if method_name == 'Cluster Centroids':
        sampler = ClusterCentroids(sampling_strategy=sampling_percentage)
    elif method_name == 'Near Miss':
        sampler = NearMiss(sampling_strategy=sampling_percentage)
    elif method_name == 'Tomek Links':
        sampler = TomekLinks()
    elif method_name == 'One-Sided Selection':
        sampler = OneSidedSelection()  # No sampling strategy needed here
    
    X_res, y_res = sampler.fit_resample(X, y)
    
    display_distribution(y, y_res, method_name, sampling_percentage)

    # Enable save button
    save_button.on_click(lambda _: save_sampled_data(X_res, y_res))
    save_button_box.children = [save_button]
    save_button.disabled = False

# Over-sampling methods
def over_sampling_method(method_name, sampling_percentage):
    if method_name == 'SMOTE':
        sampler = SMOTE(sampling_strategy=sampling_percentage)
    elif method_name == 'ADASYN':
        sampler = ADASYN(sampling_strategy=sampling_percentage)
    
    X_res, y_res = sampler.fit_resample(X, y)
    
    display_distribution(y, y_res, method_name, sampling_percentage)
    
    # Enable save button
    save_button.on_click(lambda _: save_sampled_data(X_res, y_res))
    save_button_box.children = [save_button]
    save_button.disabled = False

# Ensemble sampling methods
def ensemble_sampling_method(method_name, sampling_percentage):
    if method_name == 'Bagging Sampler':
        # Use BalancedBaggingClassifier
        sampler = BalancedBaggingClassifier(random_state=42)
    elif method_name == 'Forest of Randomized Trees Sampler':
        # Use Random Forest to create a balanced dataset
        sampler = RandomForestClassifier(n_estimators=100, random_state=42)
    elif method_name == 'Boosting Sampler':
        # Use AdaBoost
        sampler = AdaBoostClassifier(n_estimators=100, random_state=42)
    
    # Fit the model and then resample
    sampler.fit(X, y)
    
    # Since we need the resampled dataset, we will apply resampling here
    y_pred = sampler.predict(X)  # Predicting the labels, no resampling method applied here, just using model
    
    display_distribution(y, y_pred, method_name, sampling_percentage)
    
    with output_box:
        print(f"Applied {method_name} with {sampling_percentage * 100:.0f}% sampling and trained a model successfully.")
    
    # Enable save button
    save_button.on_click(lambda _: save_sampled_data(X, y_pred))  # Save the predicted data
    save_button_box.children = [save_button]
    save_button.disabled = False

# Show sampling percentage slider
def show_sampling_percentage_slider(_):
    clear_sub_buttons()  # Clear previous buttons but keep slider visible
    sub_buttons_box.children = [sampling_percentage_slider]  # Show slider
    sampling_percentage_slider.observe(on_slider_change, names='value')

# Show method buttons after selecting percentage
def on_slider_change(change):
    if current_sampling == 'under':
        show_under_sampling_buttons()
    elif current_sampling == 'over':
        show_over_sampling_buttons()
    elif current_sampling == 'ensemble':
        show_ensemble_sampling_buttons()

# Show under-sampling method buttons
def show_under_sampling_buttons():
    buttons = [
        widgets.Button(description="Cluster Centroids", tooltip="This method reduces the majority class by selecting the most representative samples (centroids)."),
        widgets.Button(description="Near Miss", tooltip="This method reduces the majority class by selecting samples closest to the minority class."),
        widgets.Button(description="Tomek Links", tooltip="This method removes overlapping majority and minority class pairs to clean the data."),
        widgets.Button(description="One-Sided Selection", tooltip="This method removes majority class samples that are easily classified, keeping only boundary cases.")
    ]
    for btn in buttons:
        btn.on_click(lambda b: under_sampling_method(b.description, sampling_percentage_slider.value))
    sub_buttons_box.children = (sampling_percentage_slider,) + tuple(buttons)  # Keep slider + buttons

# Show over-sampling method buttons
def show_over_sampling_buttons():
    buttons = [
        widgets.Button(description="SMOTE", tooltip="This method creates new synthetic samples for the minority class using k-nearest neighbors."),
        widgets.Button(description="ADASYN", tooltip="This method generates synthetic samples for the minority class, focusing more on difficult cases.")
    ]
    for btn in buttons:
        btn.on_click(lambda b: over_sampling_method(b.description, sampling_percentage_slider.value))
    sub_buttons_box.children = (sampling_percentage_slider,) + tuple(buttons)  # Keep slider + buttons

# Show ensemble-sampling method buttons
def show_ensemble_sampling_buttons():
    buttons = [
        widgets.Button(description="Bagging Sampler", tooltip="This method uses balanced bagging for resampling minority and majority classes."),
        widgets.Button(description="Forest of Randomized Trees Sampler", tooltip="This method uses a forest of random decision trees for resampling."),
        widgets.Button(description="Boosting Sampler", tooltip="This method uses boosting techniques for resampling with emphasis on difficult cases.")
    ]
    for btn in buttons:
        btn.on_click(lambda b: ensemble_sampling_method(b.description, sampling_percentage_slider.value))
    sub_buttons_box.children = (sampling_percentage_slider,) + tuple(buttons)  # Keep slider + buttons

# Define global variable for current sampling type
current_sampling = None

# Main button click actions
def under_sampling_clicked(_):
    global current_sampling
    current_sampling = 'under'
    show_sampling_percentage_slider(_)

def over_sampling_clicked(_):
    global current_sampling
    current_sampling = 'over'
    show_sampling_percentage_slider(_)

def ensemble_sampling_clicked(_):
    global current_sampling
    current_sampling = 'ensemble'
    show_sampling_percentage_slider(_)

# Button click listeners
under_sampling_button.on_click(under_sampling_clicked)
over_sampling_button.on_click(over_sampling_clicked)
ensemble_sampling_button.on_click(ensemble_sampling_clicked)

# Display the main interface
display(dataset_size_label)
display(widgets.HBox([under_sampling_button, over_sampling_button, ensemble_sampling_button]))
display(sub_buttons_box)
display(output_box)
display(save_button_box)


Label(value='Dataset Size: 1000 samples, Number of Features: 20')

HBox(children=(Button(description='Under Sampling', style=ButtonStyle(), tooltip='Select this if you want to r…

VBox()

Output()

HBox()

In [5]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import numpy as np
import pandas as pd
from imblearn.under_sampling import ClusterCentroids, NearMiss, TomekLinks, OneSidedSelection
from imblearn.over_sampling import SMOTE, ADASYN
from sklearn.datasets import make_classification

# Example dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, weights=[0.9, 0.1], random_state=42)
X = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(20)])
y = pd.Series(y, name='target')

# Show total features and dataset size
dataset_size_label = widgets.Label(f"Dataset Size: {X.shape[0]} samples, Number of Features: {X.shape[1]}")

# Create main buttons
under_sampling_button = widgets.Button(description="Under Sampling", tooltip="Select this if you want to reduce the majority class samples.")
over_sampling_button = widgets.Button(description="Over Sampling", tooltip="Select this if you want to increase the minority class samples.")

# Sampling percentage slider
sampling_percentage_slider = widgets.FloatSlider(description='Sampling %', min=0.1, max=1.0, step=0.1, value=1.0)

# Display area for sub buttons, output, and save button
sub_buttons_box = widgets.VBox([])
output_box = widgets.Output()
save_button_box = widgets.HBox([])

# Create save button
save_button = widgets.Button(description="Save Sampled Data", disabled=True)

# Save function
def save_sampled_data(X_res, y_res):
    df = pd.concat([pd.DataFrame(X_res), pd.Series(y_res, name='target')], axis=1)
    df.to_csv("sampled_data.csv", index=False)
    with output_box:
        print("Sampled data saved as 'sampled_data.csv' in the current directory.")

# Function to clear sub buttons but keep output and slider intact
def clear_sub_buttons():
    sub_buttons_box.children = [sampling_percentage_slider]  # Keep slider visible, and buttons change dynamically

# Function to calculate and display class distribution with percentages and absolute numbers
def display_distribution(y_before, y_after, method_name, sampling_percentage):
    original_distribution = dict(zip(*np.unique(y_before, return_counts=True)))
    resampled_distribution = dict(zip(*np.unique(y_after, return_counts=True)))
    
    total_before = sum(original_distribution.values())
    total_after = sum(resampled_distribution.values())
    
    original_percentages = {k: (v / total_before) * 100 for k, v in original_distribution.items()}
    resampled_percentages = {k: (v / total_after) * 100 for k, v in resampled_distribution.items()}
    
    with output_box:
        clear_output(wait=True)
        print(f"Applied {method_name} with {sampling_percentage * 100:.0f}% sampling:")
        print(f"Original class distribution (absolute numbers): {original_distribution}")
        print(f"Original class distribution (in %):")
        for cls, pct in original_percentages.items():
            print(f"Class {cls}: {pct:.2f}%")
        
        print(f"\nResampled class distribution (absolute numbers): {resampled_distribution}")
        print(f"Resampled class distribution (in %):")
        for cls, pct in resampled_percentages.items():
            print(f"Class {cls}: {pct:.2f}%")
        
        # Display final dataset size and number of features
        print(f"\nFinal Dataset Size: {len(y_after)} samples, Number of Features: {X.shape[1]}")

# Under-sampling methods
def under_sampling_method(method_name, sampling_percentage):
    if method_name == 'Cluster Centroids':
        sampler = ClusterCentroids(sampling_strategy=sampling_percentage)
    elif method_name == 'Near Miss':
        sampler = NearMiss(sampling_strategy=sampling_percentage)
    elif method_name == 'Tomek Links':
        sampler = TomekLinks()
    elif method_name == 'One-Sided Selection':
        sampler = OneSidedSelection()  # No sampling strategy needed here
    
    X_res, y_res = sampler.fit_resample(X, y)
    
    display_distribution(y, y_res, method_name, sampling_percentage)

    # Enable save button
    save_button.on_click(lambda _: save_sampled_data(X_res, y_res))
    save_button_box.children = [save_button]
    save_button.disabled = False

# Over-sampling methods
def over_sampling_method(method_name, sampling_percentage):
    if method_name == 'SMOTE':
        sampler = SMOTE(sampling_strategy=sampling_percentage)
    elif method_name == 'ADASYN':
        sampler = ADASYN(sampling_strategy=sampling_percentage)
    
    X_res, y_res = sampler.fit_resample(X, y)
    
    display_distribution(y, y_res, method_name, sampling_percentage)
    
    # Enable save button
    save_button.on_click(lambda _: save_sampled_data(X_res, y_res))
    save_button_box.children = [save_button]
    save_button.disabled = False

def show_sampling_percentage_slider(_):
    clear_sub_buttons()  # Clear previous buttons but keep slider visible
    sub_buttons_box.children = [sampling_percentage_slider]  # Show slider
    sampling_percentage_slider.observe(on_slider_change, names='value')

# Show method buttons after selecting percentage
def on_slider_change(change):
    # Keep slider visible and append new buttons after slider interaction
    if current_sampling == 'under':
        show_under_sampling_buttons()
    elif current_sampling == 'over':
        show_over_sampling_buttons()

# Show under-sampling method buttons with tooltips
def show_under_sampling_buttons():
    buttons = [
        widgets.Button(description="Cluster Centroids", tooltip="This method reduces the majority class by selecting the most representative samples (centroids)."),
        widgets.Button(description="Near Miss", tooltip="This method reduces the majority class by selecting samples closest to the minority class."),
        widgets.Button(description="Tomek Links", tooltip="This method removes overlapping majority and minority class pairs to clean the data."),
        widgets.Button(description="One-Sided Selection", tooltip="This method removes majority class samples that are easily classified, keeping only boundary cases.")
    ]
    for btn in buttons:
        btn.on_click(lambda b: under_sampling_method(b.description, sampling_percentage_slider.value))
    sub_buttons_box.children = (sampling_percentage_slider,) + tuple(buttons)  # Keep slider + buttons

# Show over-sampling method buttons with tooltips
def show_over_sampling_buttons():
    buttons = [
        widgets.Button(description="SMOTE", tooltip="This method creates new synthetic samples for the minority class using k-nearest neighbors."),
        widgets.Button(description="ADASYN", tooltip="This method generates synthetic samples for the minority class, focusing more on difficult cases.")
    ]
    for btn in buttons:
        btn.on_click(lambda b: over_sampling_method(b.description, sampling_percentage_slider.value))
    sub_buttons_box.children = (sampling_percentage_slider,) + tuple(buttons)  # Keep slider + buttons

# Track whether we're doing under or over sampling
current_sampling = None

def under_sampling_clicked(_):
    global current_sampling
    current_sampling = 'under'
    show_sampling_percentage_slider(_)

def over_sampling_clicked(_):
    global current_sampling
    current_sampling = 'over'
    show_sampling_percentage_slider(_)

# Attach event handlers to main buttons
under_sampling_button.on_click(under_sampling_clicked)
over_sampling_button.on_click(over_sampling_clicked)

# Display UI
display(dataset_size_label, widgets.HBox([under_sampling_button, over_sampling_button]), sub_buttons_box, output_box, save_button_box)


Label(value='Dataset Size: 1000 samples, Number of Features: 20')

HBox(children=(Button(description='Under Sampling', style=ButtonStyle(), tooltip='Select this if you want to r…

VBox()

Output()

HBox()

In [30]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import numpy as np
import pandas as pd
from imblearn.under_sampling import ClusterCentroids, NearMiss, TomekLinks, OneSidedSelection
from imblearn.over_sampling import SMOTE, ADASYN
from sklearn.datasets import make_classification

# Function to run sampling UI
def run_sampling_ui(X, y):
    global new_df  # Variable to hold the final dataset after sampling
    new_df = None

    # Show total features and dataset size
    dataset_size_label = widgets.Label(f"Dataset Size: {X.shape[0]} samples, Number of Features: {X.shape[1]}")

    # Create main buttons
    under_sampling_button = widgets.Button(description="Under Sampling", tooltip="Select this if you want to reduce the majority class samples.")
    over_sampling_button = widgets.Button(description="Over Sampling", tooltip="Select this if you want to increase the minority class samples.")

    # Sampling percentage slider
    sampling_percentage_slider = widgets.FloatSlider(description='Sampling %', min=0.1, max=1.0, step=0.1, value=1.0)

    # Display area for sub buttons, output, and save button
    sub_buttons_box = widgets.VBox([])
    output_box = widgets.Output()
    save_button_box = widgets.HBox([])

    # Create save button
    save_button = widgets.Button(description="Save Sampled Data", disabled=True)

    # Save function
    def save_sampled_data(X_res, y_res):
        df = pd.concat([pd.DataFrame(X_res), pd.Series(y_res, name='target')], axis=1)
        df.to_csv("sampled_data.csv", index=False)
        with output_box:
            clear_output(wait=True)
            print("Sampled data saved as 'sampled_data.csv' in the current directory.")

    # Function to clear sub buttons but keep output and slider intact
    def clear_sub_buttons():
        sub_buttons_box.children = [sampling_percentage_slider]  # Keep slider visible, and buttons change dynamically

    # Function to calculate and display class distribution with percentages and absolute numbers
    def display_distribution(y_before, y_after, method_name, sampling_percentage):
        original_distribution = dict(zip(*np.unique(y_before, return_counts=True)))
        resampled_distribution = dict(zip(*np.unique(y_after, return_counts=True)))
        
        total_before = sum(original_distribution.values())
        total_after = sum(resampled_distribution.values())
        
        original_percentages = {k: (v / total_before) * 100 for k, v in original_distribution.items()}
        resampled_percentages = {k: (v / total_after) * 100 for k, v in resampled_distribution.items()}
        
        with output_box:
            clear_output(wait=True)
            print(f"Applied {method_name} with {sampling_percentage * 100:.0f}% sampling:")
            print(f"Original class distribution (absolute numbers): {original_distribution}")
            print(f"Original class distribution (in %):")
            for cls, pct in original_percentages.items():
                print(f"Class {cls}: {pct:.2f}%")
            
            print(f"\nResampled class distribution (absolute numbers): {resampled_distribution}")
            print(f"Resampled class distribution (in %):")
            for cls, pct in resampled_percentages.items():
                print(f"Class {cls}: {pct:.2f}%")
            
            # Display final dataset size and number of features
            print(f"\nFinal Dataset Size: {len(y_after)} samples, Number of Features: {X.shape[1]}")

    # Under-sampling methods
    def under_sampling_method(method_name, sampling_percentage):
        global new_df  # Use global variable to store the final dataset
        if method_name == 'Cluster Centroids':
            sampler = ClusterCentroids(sampling_strategy=sampling_percentage)
        elif method_name == 'Near Miss':
            sampler = NearMiss(sampling_strategy=sampling_percentage)
        elif method_name == 'Tomek Links':
            sampler = TomekLinks()
        elif method_name == 'One-Sided Selection':
            sampler = OneSidedSelection()  # No sampling strategy needed here
        
        X_res, y_res = sampler.fit_resample(X, y)
        
        # Store the final dataset in new_df
        new_df = pd.concat([pd.DataFrame(X_res), pd.Series(y_res, name='target')], axis=1)
        
        display_distribution(y, y_res, method_name, sampling_percentage)

        # Enable and display save button
        save_button.disabled = False
        save_button_box.children = [save_button]  # Ensure save button is shown

    # Over-sampling methods
    def over_sampling_method(method_name, sampling_percentage):
        global new_df  # Use global variable to store the final dataset
        if method_name == 'SMOTE':
            sampler = SMOTE(sampling_strategy=sampling_percentage)
        elif method_name == 'ADASYN':
            sampler = ADASYN(sampling_strategy=sampling_percentage)
        
        X_res, y_res = sampler.fit_resample(X, y)
        
        # Store the final dataset in new_df
        new_df = pd.concat([pd.DataFrame(X_res), pd.Series(y_res, name='target')], axis=1)
        
        display_distribution(y, y_res, method_name, sampling_percentage)
        
        # Enable and display save button
        save_button.disabled = False
        save_button_box.children = [save_button]  # Ensure save button is shown

    def show_sampling_percentage_slider(_):
        clear_sub_buttons()  # Clear previous buttons but keep slider visible
        sub_buttons_box.children = [sampling_percentage_slider]  # Show slider
        sampling_percentage_slider.observe(on_slider_change, names='value')

    # Show method buttons after selecting percentage
    def on_slider_change(change):
        if change['new'] > 0:  # Only show buttons if the slider is moved
            if current_sampling == 'under':
                show_under_sampling_buttons()
            elif current_sampling == 'over':
                show_over_sampling_buttons()

    # Show under-sampling method buttons with tooltips
    def show_under_sampling_buttons():
        buttons = [
            widgets.Button(description="Cluster Centroids", tooltip="This method reduces the majority class by selecting the most representative samples (centroids)."),
            widgets.Button(description="Near Miss", tooltip="This method reduces the majority class by selecting samples closest to the minority class."),
            widgets.Button(description="Tomek Links", tooltip="This method removes overlapping majority and minority class pairs to clean the data."),
            widgets.Button(description="One-Sided Selection", tooltip="This method removes majority class samples that are easily classified, keeping only boundary cases.")
        ]
        for btn in buttons:
            btn.on_click(lambda b: under_sampling_method(b.description, sampling_percentage_slider.value))
        sub_buttons_box.children = (sampling_percentage_slider,) + tuple(buttons)  # Keep slider + buttons

    # Show over-sampling method buttons with tooltips
    def show_over_sampling_buttons():
        buttons = [
            widgets.Button(description="SMOTE", tooltip="This method creates new synthetic samples for the minority class using k-nearest neighbors."),
            widgets.Button(description="ADASYN", tooltip="This method generates synthetic samples for the minority class, focusing more on difficult cases.")
        ]
        for btn in buttons:
            btn.on_click(lambda b: over_sampling_method(b.description, sampling_percentage_slider.value))
        sub_buttons_box.children = (sampling_percentage_slider,) + tuple(buttons)  # Keep slider + buttons

    # Track whether we're doing under or over sampling
    global current_sampling  # Define this as a global variable
    current_sampling = None

    def under_sampling_clicked(_):
        global current_sampling
        current_sampling = 'under'
        show_sampling_percentage_slider(_)

    def over_sampling_clicked(_):
        global current_sampling
        current_sampling = 'over'
        show_sampling_percentage_slider(_)

    # Attach event handlers to main buttons
    under_sampling_button.on_click(under_sampling_clicked)
    over_sampling_button.on_click(over_sampling_clicked)

    # Attach the save button functionality
    save_button.on_click(lambda _: save_sampled_data(new_df.iloc[:, :-1], new_df.iloc[:, -1]))  # Use new_df to save the latest data

    # Display UI
    display(dataset_size_label, widgets.HBox([under_sampling_button, over_sampling_button]), sub_buttons_box, output_box, save_button_box)

# # Example usage with the dataset
# X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, weights=[0.9, 0.1], random_state=42)
# X = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(20)])
# y = pd.Series(y, name='target')

# # Call the function to run the sampling UI
# run_sampling_ui(X, y)


In [31]:
# Example usage with the dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, weights=[0.9, 0.1], random_state=42)
X = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(20)])
y = pd.Series(y, name='target')

# Call the function to run the sampling UI
run_sampling_ui(X, y)

Label(value='Dataset Size: 1000 samples, Number of Features: 20')

HBox(children=(Button(description='Under Sampling', style=ButtonStyle(), tooltip='Select this if you want to r…

VBox()

Output()

HBox()

In [34]:
new_df.shape

(250, 21)

In [35]:
pd.read_csv('sampled_data.csv').shape

(250, 21)