In [None]:
# ============================================================================
# CONFIGURATION
# This cell contains all the settings a user needs to change.
# ============================================================================

# --- PATHS ---
# TODO: Update this path to match your file's location.
INPUT_FILE_PATH = "data/your_data_file.xlsx"

# --- DATA FORMAT ---
# Set to True if your data has samples as columns and metabolites as rows.
# Set to False if your data already has samples as rows and metabolites as columns.
TRANSPOSE_DATA = True

# Name of the column containing metabolite/feature identifiers.
# If None, the script will use the first column by default.
IDENTIFIER_COLUMN_NAME = None # or e.g., "Metabolite_Name"

# --- TEST PARAMETERS ---
# The significance level for interpreting the p-value.
ALPHA_LEVEL = 0.05

In [None]:
# ============================================================================
# SCRIPT LOGIC
# A user typically does not need to edit this cell.
# ============================================================================
import pandas as pd
import numpy as np
import os
import warnings

# We keep this import but acknowledge its potential instability.
from pyampute.exploration.mcar_statistical_tests import MCARTest

warnings.filterwarnings('ignore', category=UserWarning)

def perform_littles_mcar_test(input_file: str, transpose_data: bool, identifier_col: str, alpha: float):
    """
    Performs Little's MCAR test on a dataset, with flexible options for data format.

    Args:
        input_file (str): The full path to the input Excel file.
        transpose_data (bool): Whether to transpose the data (samples as columns -> samples as rows).
        identifier_col (str or None): The name of the column with feature IDs.
        alpha (float): The significance level for the test.
    """
    print(f"--- Performing Little's MCAR Test on: {os.path.basename(input_file)} ---")

    if not os.path.exists(input_file):
        print(f"ERROR: File not found at '{input_file}'.")
        return

    try:
        # Load the Excel file, interpreting various strings as NaN.
        df = pd.read_excel(input_file, na_values=['', ' ', '-', '#N/A', 'NULL', 'null', 'NA', 'N/A'])
        print(f"Successfully loaded data. Initial shape: {df.shape}")

        if transpose_data:
            print("Transposition is enabled. Assuming samples are columns.")
            # Use the specified identifier column or default to the first column.
            id_col_name = identifier_col if identifier_col in df.columns else df.columns[0]
            
            if df[id_col_name].nunique() == len(df):
                df = df.set_index(id_col_name)
                print(f"Set '{id_col_name}' as index before transposing.")
            else:
                print(f"Warning: Column '{id_col_name}' does not contain unique identifiers. Transposing without setting a new index.")

            df = df.T  # Transpose the DataFrame
            print(f"DataFrame transposed. New shape (samples as rows): {df.shape}")
        else:
            print("Transposition is disabled. Assuming samples are already rows.")

        # Convert all data to numeric, coercing errors to NaN. This is a robust way to handle mixed-type columns.
        df_numeric = df.apply(pd.to_numeric, errors='coerce')

        # Check if any numeric data remains for the test.
        if df_numeric.empty or df_numeric.select_dtypes(include=np.number).empty:
            print("ERROR: No suitable numeric columns found for analysis after data processing.")
            return

        # Check if there are any missing values to test.
        if df_numeric.isnull().sum().sum() == 0:
            print("\nRESULT: No missing values found. Little's MCAR test cannot be performed (data is complete).")
            return

        # Initialize and perform Little's MCAR test
        mcar_test_obj = MCARTest(method="little")
        p_value = mcar_test_obj.little_mcar_test(df_numeric)

        print(f"\n--- Little's MCAR Test Results ---")
        print(f"P-value: {p_value:.4f}")

        print("\nInterpretation:")
        if p_value > alpha:
            print(f"Since p-value ({p_value:.4f}) > alpha ({alpha}), we FAIL TO REJECT the null hypothesis.")
            print("There is NOT enough statistical evidence to suggest that the data are NOT Missing Completely At Random (MCAR).")
        else:
            print(f"Since p-value ({p_value:.4f}) <= alpha ({alpha}), we REJECT the null hypothesis.")
            print("There is statistical evidence to suggest that the data are NOT Missing Completely At Random (MCAR).")
            print("This indicates the missingness mechanism is likely MAR or MNAR.")

    except Exception as e:
        print(f"\nAn unexpected error occurred: {e}")
        print(f"Error type: {type(e).__name__}")
    finally:
        print("\n--- Test Finished ---")

In [None]:
# ============================================================================
# EXECUTION
# This cell calls the main function using the variables from the Configuration cell.
# ============================================================================

perform_littles_mcar_test(
    input_file=INPUT_FILE_PATH,
    transpose_data=TRANSPOSE_DATA,
    identifier_col=IDENTIFIER_COLUMN_NAME,
    alpha=ALPHA_LEVEL
)