#### Import Libraries

In [28]:
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.externals import joblib

from keras.layers import Dense, Dropout
from keras.models import Model, Sequential
from keras.wrappers.scikit_learn import KerasClassifier

#### Load Data

In [29]:
df = pd.read_csv('data/bank_marketing.csv')

x = df[df.columns[:-1]]
y = df.pop('y')

df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown


#### Create a function that will wrap my Keras Model

In [30]:
#First I define my keras model

def create_model():
    optimizer='adagrad'
    kernel_initializer='glorot_uniform'
    dropout=0.2
    
    model = Sequential()
    model.add(Dense(64,activation='relu',kernel_initializer=kernel_initializer))
    model.add(Dropout(dropout))
    model.add(Dense(1,activation='sigmoid',kernel_initializer=kernel_initializer))

    model.compile(loss='binary_crossentropy',optimizer=optimizer, metrics=['accuracy'])

    return model

#### Define Preprocessing steps per column data type

In [31]:
#Preprocessing for numerical features
numeric_features = ['age', 'balance','day','duration','campaign','pdays','previous']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

#Preprocessing for categorical features
categorical_features = ['job', 'marital', 'education','default','housing','loan','contact','month','poutcome']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

#Preprocessor with all of the steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

#### Fit the preprocessing pipeline on the data

In [32]:
# Full preprocessing pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor)])

#Train the model-Pipeline
pipeline.fit(x,y)

#Preprocess x
preprocessed = pipeline.transform(x)

#### Train the Keras Classifier

In [33]:
#Create the keras model
clf = KerasClassifier(build_fn=create_model, verbose=0)
clf.fit(preprocessed,y)

<tensorflow.python.keras.callbacks.History at 0x7f94ee924d68>

#### Save Keras Model and sklearn Pipeline
The Keras model and the sklearn pipeline could be saved under one pickle file but it is also good to have an example of how to mix and match different preprocessing methods and models

In [34]:
joblib.dump(pipeline, 'models/preprocessing_pipeline_18-09-2020.pkl')
clf.model.save('models/classifier_18-09-2020.h5')