#### Import Libraries

In [None]:
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.externals import joblib

from keras.layers import Dense, Dropout
from keras.models import Model, Sequential
from keras.wrappers.scikit_learn import KerasClassifier

#### Load Data

In [None]:
df = pd.read_csv('data/bank_marketing.csv')

x = df[df.columns[:-1]]
y = df.pop('y')

df.head()

#### Create a function that will wrap my Keras Model

In [None]:
#First I define my keras model

def create_model():
    optimizer='adagrad'
    kernel_initializer='glorot_uniform'
    dropout=0.2
    
    model = Sequential()
    model.add(Dense(64,activation='relu',kernel_initializer=kernel_initializer))
    model.add(Dropout(dropout))
    model.add(Dense(1,activation='sigmoid',kernel_initializer=kernel_initializer))

    model.compile(loss='binary_crossentropy',optimizer=optimizer, metrics=['accuracy'])

    return model

#### Define Preprocessing steps per column data type

In [None]:
#Preprocessing for numerical features
numeric_features = ['age', 'balance','day','duration','campaign','pdays','previous']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

#Preprocessing for categorical features
categorical_features = ['job', 'marital', 'education','default','housing','loan','contact','month','poutcome']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

#Preprocessor with all of the steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

#### Fit the preprocessing pipeline on the data

In [None]:
# Full preprocessing pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor)])

#Train the model-Pipeline
pipeline.fit(x,y)

#Preprocess x
preprocessed = pipeline.transform(x)

#### Train the Keras Classifier

In [None]:
#Create the keras model
clf = KerasClassifier(build_fn=create_model, verbose=0)
clf.fit(preprocessed,y)

#### Save Keras Model and sklearn Pipeline
The Keras model and the sklearn pipeline could be bunched together **but they need to be saved seperately**. This should not be an issue with other models as keras is not pickle-able right now.

In [None]:
joblib.dump(pipeline, 'artifacts/preprocessing_pipeline_18-09-2020.pkl')
clf.model.save('artifacts/classifier_18-09-2020.h5')