In [None]:
# Import Libraries
import pandas as pd
from sklearn.feature_selection import SelectKBest, chi2, RFE
from sklearn.linear_model import LogisticRegression

# Load Scaled Data
data_path = '../data/processed_data.csv'
df = pd.read_csv(data_path)
target_column = 'Bankrupt?'

X = df.drop(columns=[target_column])
y = df[target_column]

# Correlation-Based Selection
correlations = X.corrwith(y)
top_corr_features = correlations.abs().sort_values(ascending=False).head(10).index
print("Top Correlated Features:\n", top_corr_features)

# Chi-Squared Test
from sklearn.preprocessing import MinMaxScaler
min_max_scaler = MinMaxScaler()
X_scaled_non_negative = pd.DataFrame(min_max_scaler.fit_transform(X), columns=X.columns)

chi2_selector = SelectKBest(chi2, k=10).fit(X_scaled_non_negative, y)
chi2_features = X.columns[chi2_selector.get_support()]
print("Features selected by Chi-Squared Test:\n", chi2_features)

# RFE
rfe_model = LogisticRegression(max_iter=1000, random_state=42)
rfe_selector = RFE(rfe_model, n_features_to_select=10).fit(X, y)
rfe_features = X.columns[rfe_selector.support_()]
print("Features selected by RFE:\n", rfe_features)

# Combine Features
final_features = list(set(top_corr_features) | set(chi2_features) | set(rfe_features))
print("Final Selected Features:\n", final_features)
