In [1]:
%matplotlib inline
import numpy as np

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt

from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from imblearn.pipeline import Pipeline

import custom_helpers as ch

np.random.seed(0)

data = ch.load_data('../data/train_month_3_with_target.csv')# print(df.info())

------------------------------------------------------------
loading data...
transforming dates...
cast types into bool, object, categorical...
data loaded and casted
------------------------------------------------------------


In [2]:
y_train = data.target
X_train = data.drop(columns = ['target'])
# X_train = X_train.select_dtypes(exclude = ['object','category','datetime64','bool'])
X_train = X_train.select_dtypes(include = 'number')

num_col = make_column_selector(dtype_include = 'number', dtype_exclude = 'bool')

numeric_transformer = Pipeline(steps = [
    ('impute',SimpleImputer(missing_values=np.nan, strategy='median')),
    ('scaler',StandardScaler())
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num',numeric_transformer,num_col)
    ],
    remainder = "passthrough"
)

In [3]:
from sklearn.feature_selection import SelectFromModel

sel_ = SelectFromModel(LogisticRegression(max_iter=10000, tol=0.5, class_weight = 'balanced',C=1, penalty='l1', solver='liblinear'))
                                          
pipe = Pipeline(
    steps=[
        ("preprocessor", preprocessor),
        ("lasso", sel_)]
)

clf = pipe.fit(X_train,y_train)

In [4]:
print(clf['lasso'].get_support())

X = X_train.copy()
print(X.columns[clf['lasso'].get_support()])

[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True]
Index(['bal_insurance_21', 'bal_insurance_23', 'cap_life_insurance_fixed_cap',
       'cap_life_insurance_decreasing_cap', 'prem_fire_car_other_insurance',
       'bal_personal_loan', 'bal_mortgage_loan', 'bal_current_account',
       'bal_pension_saving', 'bal_savings_account',
       'bal_savings_account_starter', 'bal_current_account_starter',
       'visits_distinct_so', 'visits_distinct_so_areas',
       'customer_since_all_years', 'customer_since_bank_years',
       'customer_age'],
      dtype='object')


In [5]:
XX = preprocessor.fit_transform(X_train)
len(XX[0])

17