In [14]:
import pandas as pd
import numpy as np
from pyampute.ampute import MultivariateAmputation

In [12]:
def introduce_missingness(csv_path):
    # Load the dataset
    data = pd.read_csv(csv_path, index_col=0)

    print("Original Data:\n", data.head())

    # Define different missingness levels
    missingness_levels = [0.1, 0.2, 0.3, 0.4, 0.5]

    for level in missingness_levels:
        print(f"\nApplying {int(level * 100)}% missingness:")

        # MCAR: Missing Completely At Random
        mcar_amputer = MultivariateAmputation(
            prop=level,
            patterns=[{
                "incomplete_vars": data.columns.tolist(),
                "weights": np.zeros(len(data.columns)),
                "mechanism": "MCAR",
                "score_to_probability_func": "sigmoid-right"
            }],
        )
        mcar_data = mcar_amputer.fit_transform(data.copy())
        mcar_df = pd.DataFrame(mcar_data, columns=data.columns)
        mcar_df.to_csv(f"MCAR_{int(level * 100)}.csv", index=False)
        print(f"MCAR dataset saved as MCAR_{int(level * 100)}.csv")

        # MAR: Missing At Random
        mar_incomplete_vars = data.columns[:len(data.columns)//2].tolist()
        mar_observed_vars = data.columns[len(data.columns)//2:].tolist()
        mar_amputer = MultivariateAmputation(
            prop=level,
            patterns=[{
                "incomplete_vars": mar_incomplete_vars,
                "weights": [1 if col in mar_observed_vars else 0 for col in data.columns],
                "mechanism": "MAR",
                "score_to_probability_func": "sigmoid-right"
            }],
        )
        mar_data = mar_amputer.fit_transform(data.copy())
        mar_df = pd.DataFrame(mar_data, columns=data.columns)
        mar_df.to_csv(f"MAR_{int(level * 100)}.csv", index=False)
        print(f"MAR dataset saved as MAR_{int(level * 100)}.csv")

        # MNAR: Missing Not At Random
        mnar_incomplete_vars = data.columns[len(data.columns)//2:].tolist()
        mnar_amputer = MultivariateAmputation(
            prop=level,
            patterns=[{
                "incomplete_vars": mnar_incomplete_vars,
                "weights": [1 if col in mnar_incomplete_vars else 0 for col in data.columns],
                "mechanism": "MNAR",
                "score_to_probability_func": "sigmoid-right"
            }],
            
        )
        mnar_data = mnar_amputer.fit_transform(data.copy())
        mnar_df = pd.DataFrame(mnar_data, columns=data.columns)
        mnar_df.to_csv(f"MNAR_{int(level * 100)}.csv", index=False)
        print(f"MNAR dataset saved as MNAR_{int(level * 100)}.csv")

In [13]:
introduce_missingness("physionet_wo_missing.csv")





Original Data:
             re.admission.within.6.months  \
Unnamed: 0                                 
1                                      0   
2                                      0   
3                                      0   
4                                      1   
5                                      0   

            return.to.emergency.department.within.6.months  \
Unnamed: 0                                                   
1                                                      0.0   
2                                                      0.0   
3                                                      0.0   
4                                                      1.0   
5                                                      0.0   

            re.admission.within.3.months  re.admission.within.28.days  \
Unnamed: 0                                                              
1                                      0                            0   
2                    

AssertionError: Cannot ampute all features under MAR, since all vars will be missing.