In [38]:
import pandas as pd
credit_df = pd.read_csv('./data/credit.csv')
credit_df.head()

Unnamed: 0,bill_amt_1,bill_amt_2,bill_amt_3,bill_amt_4,bill_amt_5,bill_amt_6,limit_balance,degree,pay_0,pay_2,...,pay_amt_1,pay_amt_2,pay_amt_3,pay_amt_4,pay_amt_5,pay_amt_6,sex,marital_status,age,default_payment_next_month
0,20867,20168,14464,3455,0,0,180000,university,1,4,...,0,0,0,0,0,0,2,1,29,0
1,9787,-1256,65064,66102,54275,51791,110000,university,1,1,...,5,68500,3114,4016,2000,2000,2,1,24,0
2,24386,23744,23090,22436,26886,30858,30000,university,1,4,...,0,0,0,5000,5000,0,1,2,26,0
3,46750,45734,44741,43562,44039,45008,50000,university,5,4,...,0,0,0,1262,1358,1275,1,2,32,0
4,19276,18691,18048,18553,19465,16523,20000,university,1,4,...,0,0,1000,1400,0,3000,1,2,26,0


In [39]:
num_col_all = credit_df.loc[:, ~credit_df.columns.isin(['degree', 'default_payment_next_month'])].columns.tolist()

In [40]:
from sklearn.externals import joblib

xgb = joblib.load('./data/GS_obj_1_1.pkl')

In [41]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder, QuantileTransformer
from sklearn.compose import ColumnTransformer, TransformedTargetRegressor
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from xgboost import XGBClassifier


NUMERICAL_FEATURES_all = num_col_all
NUMERICAL_FEATURES_bill = ['bill_amt_1', 'bill_amt_2', 'bill_amt_3', 'bill_amt_4', 'bill_amt_5', 'bill_amt_6']
NUMERICAL_FEATURES_pay = ['pay_0', 'pay_2', 'pay_3', 'pay_4', 'pay_5', 'pay_6']
CATEGORICAL_FEATURES = ['degree']


def get_model_pipeline() -> Pipeline:
    """return user score predcition model pipeline"""

    num_preprocessor_all = Pipeline([
            ("std_scaler", StandardScaler())
        ])
    num_preprocessor_bill_amt = Pipeline([ 
            ("pca_bill", PCA(0.9))
        ])
    num_preprocessor_pay = Pipeline([
            ("pca_pay", PCA(0.85))
        ])

    preprocessor = ColumnTransformer(
            [
                ('num_features_all', num_preprocessor_all, NUMERICAL_FEATURES_all),
                ('num_features_bill', num_preprocessor_bill_amt, NUMERICAL_FEATURES_bill),
                ('num_features_pay', num_preprocessor_pay, NUMERICAL_FEATURES_pay),
                ('categ_features', OneHotEncoder(), CATEGORICAL_FEATURES)
            ], 
            remainder='drop'
        )

    model_pipeline = Pipeline([
                ('preprocessor', preprocessor), 
                ('classifier', xgb)
            ])
    
    return model_pipeline




In [42]:
X_df = credit_df.drop(['default_payment_next_month'],axis=1)
Y_df = credit_df.default_payment_next_month

In [43]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X_df, Y_df, test_size=0.2, random_state=45)

In [44]:
model = get_model_pipeline()
model.fit(x_train, y_train)

# now we can aave the whole model to pkl for future usage in production web-service

# save model
joblib.dump(model, './data/credit_model.pkl')

['./data/credit_model.pkl']

In [64]:
model.predict(x_test.head(1))

array([1], dtype=int64)

In [31]:
model

Pipeline(memory=None,
         steps=[('preprocessor',
                 ColumnTransformer(n_jobs=None, remainder='drop',
                                   sparse_threshold=0.3,
                                   transformer_weights=None,
                                   transformers=[('num_features_all',
                                                  Pipeline(memory=None,
                                                           steps=[('std_scaler',
                                                                   StandardScaler(copy=True,
                                                                                  with_mean=True,
                                                                                  with_std=True))],
                                                           verbose=False),
                                                  ['level_0', 'index',
                                                   'bill_amt_1', 'bill_amt_2',
                      