In [1]:
import pandas as pd
import os
import ipywidgets as widgets
from IPython.display import display

def handle_missing_data_interactive(data):
    def impute_missing_values(b):
        try:
            numeric = []
            categorical = []
            garbage = []
            nmean, nmode, nmedian, fb = [[] for _ in range(4)]  # Fixed variable names
            
            for col in data.columns:
                if data[col].dtype in ['int64', 'float64']:
                    numeric.append(col)
                elif data[col].dtype == 'O' or data[col].dtype == 'bool':
                    categorical.append(col)
                else:
                    garbage.append(col)
            
            if garbage:
                print("Following are erroneous columns:", garbage)
                data.drop(columns=garbage, inplace=True)
            
            missing_numeric = [col for col in numeric if data[col].isnull().any()]
            missing_categorical = [col for col in categorical if data[col].isnull().any()]
            
            if not missing_numeric and not missing_categorical:
                print("No missing values in the dataset.")
                return
            
            if missing_numeric:
                print("Numeric columns - missing values replaced by median:")
                for col in missing_numeric:
                    print(f"-> {col}")
                    data[col].fillna(data[col].median(), inplace=True)
                    nmean.append(data[col].mean())  # Fixed variable name
                    nmedian.append(data[col].median())
                    nmode.append(data[col].mode()[0])
                    fb.append(data[col].median())
            
            if missing_categorical:
                print("\nCategorical columns - missing values replaced by mode:")
                cmode = []
                for col in missing_categorical:
                    print(f"-> {col}")
                    mode_value = data[col].mode()[0]
                    data[col].fillna(mode_value, inplace=True)
                    cmode.append(mode_value)
                    fb.append(mode_value)
            
                cdf = pd.DataFrame({'Features': missing_categorical, 'Mode': cmode})
            else:
                cdf = pd.DataFrame(columns=['Features', 'Mode'])
            
            if missing_numeric:
                ndf = pd.DataFrame({'Features': missing_numeric, 'Mean': nmean, 'Median': nmedian, 'Mode': nmode})
            else:
                ndf = pd.DataFrame(columns=['Features', 'Mean', 'Median', 'Mode'])
            
            fdf = pd.DataFrame({'Features': missing_numeric + missing_categorical, 'Replacable Values': fb})
            
            os.makedirs("Reports", exist_ok=True)
            cdf.to_csv("Reports/CategoricalMMM.csv", index=False)
            ndf.to_csv("Reports/NumericMMM.csv", index=False)
            fdf.to_csv("Reports/FillMissingData.csv", index=False)
            
            print("Missing values imputed successfully and reports generated.")
        except Exception as e:
            print("The missing value computation has reached a fault checkpoint. Please revisit the data and the steps taken before calling this function:", e)
    
    button = widgets.Button(
        description="Impute Missing Values", 
        button_style='success',
        tooltip="Click to replace missing values with appropriate statistics",
        layout=widgets.Layout(width='200px')  # Ensures full visibility of button
    )
    display(widgets.HBox([button]))  # Ensures proper display of button
    button.on_click(impute_missing_values)
    return


In [2]:
haha = pd.DataFrame({
    'Age': [25, 30, 34, 35, 40],
    'Salary': [50000, None, 60000, 70000, None],
    'Gender': ['Male', 'Female', 'Female', None, 'Male'],
    'Married': [None, 'Yes', 'No', 'Yes', None]
})

In [3]:
handle_missing_data_interactive(haha)

HBox(children=(Button(button_style='success', description='Impute Missing Values', layout=Layout(width='200px'…

Numeric columns - missing values replaced by median:
-> Salary

Categorical columns - missing values replaced by mode:
-> Gender
-> Married
Missing values imputed successfully and reports generated.


In [4]:
haha

Unnamed: 0,Age,Salary,Gender,Married
0,25,50000.0,Male,Yes
1,30,60000.0,Female,Yes
2,34,60000.0,Female,No
3,35,70000.0,Female,Yes
4,40,60000.0,Male,Yes
