In [None]:
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output

# Helper function to replace outliers using different methods
def replace_outliers(data, method):
    if method == 'Whisker':
        q1 = data.quantile(0.25)
        q3 = data.quantile(0.75)
        iqr = q3 - q1
        lower_bound = q1 - 1.5 * iqr
        upper_bound = q3 + 1.5 * iqr
        return data.clip(lower_bound, upper_bound)
    elif method == '95 Percentile':
        lower_bound = data.quantile(0.025)
        upper_bound = data.quantile(0.975)
        return data.clip(lower_bound, upper_bound)
    elif method == '99 Percentile':
        lower_bound = data.quantile(0.005)
        upper_bound = data.quantile(0.995)
        return data.clip(lower_bound, upper_bound)

# Main function for the tool
def outlier_replacement_tool(df):
    selected_features = {"Whisker": [], "95 Percentile": [], "99 Percentile": []}

    # Create a copy of the original dataset
    new_df = df.copy()

    # Buttons for methods
    whisker_button = widgets.Button(
        description='Whisker Method',
        tooltip='Replace outliers using IQR whisker values',
        button_style='primary'
    )
    percentile_95_button = widgets.Button(
        description='95 Percentile',
        tooltip='Replace outliers using 95th percentile bounds',
        button_style='primary'
    )
    percentile_99_button = widgets.Button(
        description='99 Percentile',
        tooltip='Replace outliers using 99th percentile bounds',
        button_style='primary'
    )
    
    # Final implement button
    implement_button = widgets.Button(
        description='Implement Methods',
        tooltip='Apply the selected methods to the dataset',
        button_style='success'
    )

    # Output widgets
    output = widgets.Output()
    summary_output = widgets.Output()

    # Feature selection and method handling
    def handle_method_click(method):
        def callback(b):
            with output:
                clear_output()
                available_features = [col for col in df.columns if df[col].dtype in [np.float64, np.int64]]
                multi_select = widgets.SelectMultiple(
                    options=available_features,
                    description='Features:',
                    rows=10,
                    layout=widgets.Layout(width='50%')
                )
                
                confirm_button = widgets.Button(
                    description='Confirm Selection',
                    button_style='success',
                    tooltip=f'Confirm the selected features for {method}'
                )

                def confirm_callback(cb):
                    selected = list(multi_select.value)
                    conflicts = [feat for feat in selected if feat in selected_features["Whisker"] or \
                                 feat in selected_features["95 Percentile"] or \
                                 feat in selected_features["99 Percentile"]]

                    if conflicts:
                        print(f"The following features have already been selected in another method: {', '.join(conflicts)}")
                    else:
                        selected_features[method].extend(selected)
                        print(f"Selected features for {method}: {', '.join(selected)}")

                confirm_button.on_click(confirm_callback)

                display(widgets.VBox([multi_select, confirm_button]))
        
        return callback

    # Assign callbacks to buttons
    whisker_button.on_click(handle_method_click("Whisker"))
    percentile_95_button.on_click(handle_method_click("95 Percentile"))
    percentile_99_button.on_click(handle_method_click("99 Percentile"))

    # Implement button callback
    def on_implement_clicked(b):
        with summary_output:
            clear_output()
            log = []
            for method, features in selected_features.items():
                for feature in features:
                    original_values = new_df[feature].copy()
                    new_df[feature] = replace_outliers(new_df[feature], method)
                    replaced_values = original_values[original_values != new_df[feature]]
                    for index, value in replaced_values.items():
                        log.append({
                            'Feature': feature,
                            'Method': method,
                            'Outlier Value': value,
                            'Replaced With': new_df.at[index, feature]
                        })

            log_df = pd.DataFrame(log)
            if not log_df.empty:
                log_df.to_csv('outlier_replacement_log.csv', index=False)
                print("Outlier replacement log saved as 'outlier_replacement_log.csv'.")
                display(log_df)
            print("Outlier treatment completed.")

    implement_button.on_click(on_implement_clicked)

    # Display widgets
    display(widgets.VBox([
        widgets.HBox([whisker_button, percentile_95_button, percentile_99_button]),
        implement_button,
        output,
        summary_output
    ]))
    
    return new_df



In [None]:
import pandas as pd
import numpy as np

def generate_test_dataset():
    np.random.seed(42)  # For reproducibility

    # Generate continuous features with some random outliers
    continuous_features = {
        f"Cont_{i}": np.random.normal(loc=50, scale=10, size=100).tolist() for i in range(1, 20)
    }
    # Add outliers to some of the continuous features
    continuous_features["Cont_3"][::10] = [150] * 10  # Outliers in every 10th row
    continuous_features["Cont_1"][::10] = [180] * 10   # Outliers in every 15th row
    continuous_features["Cont_6"][::15] = [-70] * 7   # Outliers in every 15th row
    continuous_features["Cont_7"][::15] = [-50] * 7   # Outliers in every 15th row

    # Generate categorical features
    categorical_features = {
        f"Cat_{i}": np.random.choice(["A", "B", "C", "D"], size=100).tolist() for i in range(1, 11)
    }

    # Combine continuous and categorical features
    dataset = {**continuous_features, **categorical_features}
    df = pd.DataFrame(dataset)

    return df

# Generate the dataset
test_dataset = generate_test_dataset()
