### Feature Selection - Using Mutual Information
**Description**: Use mutual information for feature selection to identify important features.

In [None]:
# write your code from here
import pandas as pd
import numpy as np
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import mutual_info_classif
import unittest
def feature_selection_mutual_information(X, y, k=2):
    """
    Selects the top k features from a DataFrame using mutual information.
    Args:
        X (pd.DataFrame): The input DataFrame containing the features.
        y (pd.Series): The target variable.
        k (int or str, optional): The number of top features to select.
                                   If 'all', selects all features. Defaults to 2.
    Returns:
        pd.DataFrame: A new DataFrame containing the selected features.
    Raises:
        TypeError: if X is not a DataFrame or y is not a Series.
        ValueError: if X is empty, y is empty, or k is invalid.
    """
    if not isinstance(X, pd.DataFrame):
        raise TypeError("Input 'X' must be a pandas DataFrame.")
    if not isinstance(y, pd.Series):
        raise TypeError("Input 'y' must be a pandas Series.")
    if X.empty:
        raise ValueError("Input DataFrame 'X' cannot be empty.")
    if y.empty:
        raise ValueError("Input Series 'y' cannot be empty.")
    if len(X) != len(y):
        raise ValueError("The number of rows in 'X' and 'y' must be the same.")
    n_features = X.shape[1]
    if isinstance(k, int):
        if not 1 <= k <= n_features:
            raise ValueError(f"'k' must be between 1 and {n_features} (inclusive).")
    elif isinstance(k, str):
        if k.lower() != 'all':
            raise ValueError("'k' must be an integer or 'all'.")
        k = n_features
    else:
        raise ValueError("'k' must be an integer or 'all'.")
    selector = SelectKBest(score_func=mutual_info_classif, k=k)
    selector.fit(X, y)
    selected_feature_names = X.columns[selector.get_support()]
    return X[selected_feature_names]
class TestFeatureSelectionMutualInformation(unittest.TestCase):
    def setUp(self):
        self.data = {'feature1': [1, 2, 3, 4, 5],
                     'feature2': [5, 4, 3, 2, 1],
                     'feature3': [1, 1, 0, 0, 1],
                     'target': [0, 1, 0, 1, 0]}
        self.df = pd.DataFrame(self.data)
        self.X = self.df[['feature1', 'feature2', 'feature3']]
        self.y = self.df['target']
    def test_valid_input_k_int(self):
        selected_df = feature_selection_mutual_information(self.X.copy(), self.y.copy(), k=2)
        self.assertEqual(selected_df.shape[1], 2)
    def test_valid_input_k_all(self):
        selected_df = feature_selection_mutual_information(self.X.copy(), self.y.copy(), k='all')
        self.assertEqual(selected_df.shape[1], 3)
    def test_invalid_input_X_type(self):
        with self.assertRaises(TypeError):
            feature_selection_mutual_information(self.X.to_numpy(), self.y.copy())
    def test_invalid_input_y_type(self):
        with self.assertRaises(TypeError):
            feature_selection_mutual_information(self.X.copy(), self.y.to_list())
    def test_empty_input_X(self):
        with self.assertRaises(ValueError):
            feature_selection_mutual_information(pd.DataFrame(), self.y.copy())
    def test_empty_input_y(self):
        with self.assertRaises(ValueError):
            feature_selection_mutual_information(self.X.copy(), pd.Series())
    def test_mismatched_lengths(self):
        X_diff_len = self.X.iloc[:-1].copy()
        with self.assertRaises(ValueError):
            feature_selection_mutual_information(X_diff_len, self.y.copy())
    def test_invalid_k_less_than_one(self):
        with self.assertRaises(ValueError):
            feature_selection_mutual_information(self.X.copy(), self.y.copy(), k=0)
    def test_invalid_k_greater_than_n_features(self):
        with self.assertRaises(ValueError):
            feature_selection_mutual_information(self.X.copy(), self.y.copy(), k=4)
    def test_invalid_k_type_string(self):
        with self.assertRaises(ValueError):
            feature_selection_mutual_information(self.X.copy(), self.y.copy(), k='top')
if __name__ == '__main__':
    data = {'feature1': [1, 2, 3, 4, 5, 6, 7],
            'feature2': [1, 2, 1, 2, 1, 2, 1],
            'feature3': [10, 20, 30, 40, 50, 60, 70],
            'feature4': [0, 0, 0, 0, 1, 1, 1],
            'target': [0, 1, 0, 1, 0, 1, 0]}
    df = pd.DataFrame(data)
    X = df.drop('target', axis=1)
    y = df['target']
    print("Original DataFrame (Features):")
    print(X)
    print("\nTarget Variable:")
    print(y)
    try:
        X_selected = feature_selection_mutual_information(X.copy(), y.copy(), k=2)
        print("\nDataFrame with selected features (k=2):")
        print(X_selected)
    except ValueError as e:
        print(f"\nError during feature selection (k=2): {e}")
    except TypeError as e:
        print(f"\nType error during feature selection (k=2): {e}")

    try:
        X_selected_all = feature_selection_mutual_information(X.copy(), y.copy(), k='all')
        print("\nDataFrame with selected features (k='all'):")
        print(X_selected_all)
    except ValueError as e:
        print(f"\nError during feature selection (k='all'): {e}")
    except TypeError as e:
        print(f"\nType error during feature selection (k='all'): {e}")

    print("\nRunning Unit Tests:")
    unittest.main(argv=['first-arg-is-ignored'], exit=False)