In [9]:
from numpy import mean
from numpy import std
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import StackingClassifier
from imblearn.over_sampling import SMOTE

In [10]:
# get the dataset
def get_dataset():
    df = pd.read_csv('C:/Users/admin/Downloads/credit_dataset.csv')
    df['FAMILY SIZE'] = df['FAMILY SIZE'].astype('int64')
    # label encoding
    from sklearn.preprocessing import LabelEncoder
    le = LabelEncoder()
    label_df = df.copy()
    s = (df.dtypes == 'object')
    object_cols = list(s[s].index)
    for row in object_cols:
        label_df[row] = le.fit_transform(df[row])

    label_df.drop(label_df.columns[0], axis=1, inplace=True)
    label_df.drop(['ID', 'GENDER', 'REALITY','NO_OF_CHILD', 'HOUSE_TYPE', 'FLAG_MOBIL', 'WORK_PHONE', 'E_MAIL'], axis=1, inplace=True)
    label_df=label_df[label_df['INCOME'] < 600000]
    label_df=label_df[label_df['YEARS_EMPLOYED'] < 20]
    # Choosing features and Target for training and testing
    X = label_df.copy()
    y = X.pop('TARGET')
    X_smote,y_smote=SMOTE().fit_resample(X,y)
    return X_smote, y_smote

In [11]:
# get a stacking ensemble of models
def get_stacking():
	# define the base models
	level0 = list()
	level0.append(('lr', LogisticRegression()))
	level0.append(('knn', KNeighborsClassifier()))
	level0.append(('cart', DecisionTreeClassifier()))
	# define meta learner model
	level1 = LogisticRegression()
	# define the stacking ensemble
	model = StackingClassifier(estimators=level0, final_estimator=level1, cv=5)
	return model

In [12]:
# get a list of models to evaluate
def get_models():
	models = dict()
	models['lr'] = LogisticRegression()
	models['knn'] = KNeighborsClassifier()
	models['cart'] = DecisionTreeClassifier()
	models['stacking'] = get_stacking()
	return models

In [13]:
# evaluate a give model using cross-validation
def evaluate_model(model, X, y):
	cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
	scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
	return scores

In [14]:
# define dataset
X, y = get_dataset()
# get the models to evaluate
models = get_models()
# evaluate the models and store results
results, names = list(), list()
for name, model in models.items():
	scores = evaluate_model(model, X, y)
	results.append(scores)
	names.append(name)
	print('>%s %.3f (%.3f)' % (name, mean(scores), std(scores)))

>lr 0.611 (0.019)
>knn 0.934 (0.004)
>cart 0.972 (0.003)
>stacking 0.978 (0.002)
