Checking if there are any categorical variables in our data

In [4]:
import pickle

# Load the split_data dictionary
with open("Data/split_regression_data.pkl", "rb") as f:
    split_data = pickle.load(f)

# Print variable types for X_train of each dataset
for name, data in split_data.items():
    X_train = data["X_train"]
    dtypes = X_train.dtypes.unique()
    print(f"\n{name} — Unique data types in X_train:")
    print(dtypes)


fri_c1_500_50 — Unique data types in X_train:
[dtype('float64')]

fri_c3_1000_50 — Unique data types in X_train:
[dtype('float64')]

fri_c4_500_50 — Unique data types in X_train:
[dtype('float64')]

fri_c4_1000_50 — Unique data types in X_train:
[dtype('float64')]

fri_c2_1000_25 — Unique data types in X_train:
[dtype('float64')]

fri_c1_1000_25 — Unique data types in X_train:
[dtype('float64')]

fri_c3_1000_25 — Unique data types in X_train:
[dtype('float64')]

BodyFat — Unique data types in X_train:
[dtype('float64') dtype('int64')]

Forest_Fires — Unique data types in X_train:
[dtype('int64') dtype('O') dtype('float64')]

Quakes — Unique data types in X_train:
[dtype('int64') dtype('float64')]

Servo — Unique data types in X_train:
[dtype('O') dtype('int64')]

auto93 — Unique data types in X_train:
[CategoricalDtype(categories=['Acura', 'Audi', 'BMW', 'Buick', 'Cadillac', 'Chevrolet',
                   'Chrysler', 'Dodge', 'Eagle', 'Ford', 'Geo', 'Honda',
                   'Hyund

Doing the FS

In [1]:
import pandas as pd
from feature_engine.selection import MRMR
import os
import time
import pickle

# Step 1: Load split_data.pkl
with open("Data/split_regression_data.pkl", "rb") as f:
    split_data = pickle.load(f)

# Step 2: Initialize output dictionary
selected_feature_data = {}

# Step 3: Loop through each dataset
for name, data in split_data.items():
    print(f"Processing {name}...")
    X_train = data["X_train"]
    X_val = data["X_val"]
    X_test = data["X_test"]
    y_train = data["y_train"]
    y_val = data["y_val"]
    y_test = data["y_test"]

    variables = X_train.columns.tolist()

    # Determine which features are discrete
    discrete_flags = []
    for dtype in X_train.dtypes:
        if pd.api.types.is_numeric_dtype(dtype):
            discrete_flags.append(False)  # Continuous: int or float
        else:
            discrete_flags.append(True)   # Discrete: object, category

    # Store results for this dataset
    selected_feature_data[name] = {}

    # Step 4: Loop from 1 to number of variables
    for k in range(1, len(variables) + 1):
        try:
            sel = MRMR(
                variables=variables,
                method="MIQ",
                max_features=k,
                discrete_features=discrete_flags,
                regression=True,
                random_state=42
            )
            start = time.time()
            sel.fit(X_train, y_train)
            elapsed = time.time() - start

            # Transform splits
            X_train_sel = sel.transform(X_train)
            X_val_sel = sel.transform(X_val)
            X_test_sel = sel.transform(X_test)

            # Save results under this k
            selected_feature_data[name][k] = {
                "X_train": X_train_sel,
                "X_val": X_val_sel,
                "X_test": X_test_sel,
                "y_train": y_train,
                "y_val": y_val,
                "y_test": y_test,
                "time": elapsed
            }

        except Exception as e:
            selected_feature_data[name][k] = {"error": str(e)}

# Step 5: Save to pickle file
os.makedirs("Data", exist_ok=True)
with open("Data/selected_feature_data.pkl", "wb") as f:
    pickle.dump(selected_feature_data, f)

print("✅ Feature selection completed and saved to 'Data/selected_feature_data.pkl'")


Processing fri_c1_500_50...


  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr =

Processing fri_c3_1000_50...


  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr =

Processing fri_c4_500_50...


  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr =

Processing fri_c4_1000_50...


  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr =

Processing fri_c2_1000_25...


  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr =

Processing fri_c1_1000_25...


  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr =

Processing fri_c3_1000_25...


  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr =

Processing BodyFat...
Processing Forest_Fires...
Processing Quakes...
Processing Servo...
Processing auto93...
Processing autoPrice...
Processing autoMPG...
Processing Concrete_Compressive_Strength...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Processing Airfoil_Self_Noise...
Processing pyrim...


  mrmr = relevance / redundance
  y = column_or_1d(y, warn=True)
  mrmr = relevance / redundance
  y = column_or_1d(y, warn=True)
  mrmr = relevance / redundance
  y = column_or_1d(y, warn=True)
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrmr = relevance / redundance
  mrm

Processing boston...
Processing Wine_Quality...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Processing California_Housing...
✅ Feature selection completed and saved to 'Data/selected_feature_data.pkl'
