### Feature Selection - Using Mutual Information
**Description**: Use mutual information for feature selection to identify important features.

In [None]:
import pandas as pd
from sklearn.feature_selection import mutual_info_classif
from sklearn.datasets import load_iris
import numpy as np

def select_features_using_mutual_info(X, y, threshold=0.1):
    """
    Select features with mutual information greater than threshold.

    Args:
        X (pd.DataFrame): Feature matrix.
        y (pd.Series or np.array): Target vector.
        threshold (float): Minimum mutual information score to keep a feature.

    Returns:
        pd.DataFrame: Subset of X with selected features.
    """
    # Basic validation
    if X.empty:
        raise ValueError("Input feature matrix X is empty.")
    if len(y) != len(X):
        raise ValueError("Feature matrix X and target y must have the same length.")
    if not isinstance(X, pd.DataFrame):
        raise TypeError("X must be a pandas DataFrame.")
    if threshold < 0:
        raise ValueError("Threshold must be non-negative.")

    # Compute mutual information scores
    mi_scores = mutual_info_classif(X, y, discrete_features='auto', random_state=42)
    mi_series = pd.Series(mi_scores, index=X.columns)

    # Select features above threshold
    selected_features = mi_series[mi_series > threshold].index.tolist()

    if not selected_features:
        print("Warning: No features selected. Consider lowering the threshold.")

    return X[selected_features]

# Example usage with iris dataset
if __name__ == "__main__":
    iris = load_iris()
    X = pd.DataFrame(iris.data, columns=iris.feature_names)
    y = pd.Series(iris.target)

    try:
        X_selected = select_features_using_mutual_info(X, y, threshold=0.1)
        print("Selected features based on mutual information:")
        print(X_selected.head())
    except Exception as e:
        print(f"Error during feature selection: {e}")

# Simple unit test
def test_select_features():
    X = pd.DataFrame({
        'f1': [1, 2, 3, 4, 5],
        'f2': [5, 5, 5, 5, 5],  # zero variance, should have low MI
        'f3': [1, 2, 1, 2, 1]
    })
    y = pd.Series([0, 1, 0, 1, 0])

    selected = select_features_using_mutual_info(X, y, threshold=0)
    assert 'f2' not in selected.columns, "Feature f2 should be excluded due to low MI."
    assert 'f1' in selected.columns, "Feature f1 should be included."
    assert 'f3' in selected.columns, "Feature f3 should be included."
    print("Unit test passed.")

# Uncomment to run the test
# test_select_features()