In [1]:
import seaborn as sns
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, GradientBoostingRegressor, RandomForestRegressor
from sklearn.linear_model import Lasso, LinearRegression
from sklearn.feature_selection import RFE
from sklearn.feature_selection import SelectFromModel
from sklearn.svm import SVR
from concurrent.futures import ThreadPoolExecutor
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import Ridge
from statsmodels.formula.api import ols
import statsmodels.api as sm

In [2]:
df = pd.read_csv("C:\\Users\\diana\\Downloads\\Loans\\LoanApprovals\\loans_clean4.csv")

In [3]:
y=df['Loan_Status']
X = df.drop(columns=['Loan_Status'])

In [4]:
from sklearn.linear_model import Lasso

from concurrent.futures import ThreadPoolExecutor
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import Ridge

def fit_lasso(X, y):
    lasso = Lasso(alpha=0.01).fit(X, y)
    lasso_selected = (np.abs(lasso.coef_) > 0).astype(int)
    return lasso_selected

def fit_svm(X, y):
    svm = SVR(kernel='linear').fit(X, y)
    svm_selected = (np.abs(svm.coef_.ravel()) > 0).astype(int)
    return svm_selected

def fit_gradient_boost(X, y):
    gb = GradientBoostingRegressor().fit(X, y)
    gb_importances = gb.feature_importances_
    gb_selected = (gb_importances > 0).astype(int)
    return gb_selected

def fit_random_forest(X, y):
    rf = RandomForestRegressor().fit(X, y)
    rf_importances = rf.feature_importances_
    rf_selected = (rf_importances > 0).astype(int)
    return rf_selected

def fit_ridge(X, y):
    ridge = Ridge(alpha=1.0).fit(X, y)
    ridge_selected = (np.abs(ridge.coef_) > 0).astype(int)
    return ridge_selected

def fit_elastic_net(X, y):
    enet = ElasticNet(alpha=0.01, l1_ratio=0.5).fit(X, y)
    enet_selected = (np.abs(enet.coef_) > 0).astype(int)
    return enet_selected

def fit_rfe(X, y):
    model = LinearRegression()
    rfe = RFE(model, n_features_to_select=5).fit(X, y)
    rfe_selected = rfe.support_.astype(int)
    return rfe_selected

In [5]:
with ThreadPoolExecutor() as executor:
    lasso_future = executor.submit(fit_lasso, X, y)
    svm_future = executor.submit(fit_svm, X, y)
    gb_future = executor.submit(fit_gradient_boost, X, y)
    rf_future = executor.submit(fit_random_forest, X, y)
    en_future = executor.submit(fit_elastic_net, X, y)
    ridge_future = executor.submit(fit_ridge, X, y)
    rfe_future = executor.submit(fit_rfe, X, y)

    lasso_selected = lasso_future.result()
    svm_selected = svm_future.result()
    gb_selected = gb_future.result()
    rf_selected = rf_future.result()
    en_selected = en_future.result()
    ridge_selected = ridge_future.result()
    rfe_selected = rfe_future.result()

In [6]:
selection_df = pd.DataFrame({
    'Feature': X.columns,
    'Lasso': lasso_selected,
    'SVM': svm_selected,
    'GradientBoost': gb_selected,
    'RandomForest': rf_selected,
    'ElasticNet': en_selected,
    'Ridge': ridge_selected,
    'RFE': rfe_selected
})

selection_df['Sum'] = selection_df[['Lasso', 'SVM', 'GradientBoost', 'RandomForest', 'ElasticNet', 'Ridge','RFE']].sum(axis=1)

print(selection_df.sort_values('Sum', ascending=False))

              Feature  Lasso  SVM  GradientBoost  RandomForest  ElasticNet  \
2             Married      1    1              1             1           1   
4           Education      1    1              1             1           1   
10     Credit_History      1    1              1             1           1   
11      Property_Area      1    1              1             1           1   
0          Unnamed: 0      1    1              1             1           1   
6     ApplicantIncome      1    1              1             1           1   
7   CoapplicantIncome      1    1              1             1           1   
8          LoanAmount      1    1              1             1           1   
9    Loan_Amount_Term      1    1              1             1           1   
1              Gender      0    1              1             1           0   
3          Dependents      0    1              1             1           1   
5       Self_Employed      0    1              1             1  