In [1]:
import numpy as np
import pandas as pd

import bentoml

import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction import DictVectorizer
from sklearn.ensemble import RandomForestClassifier

  from pandas import MultiIndex, Int64Index


In [2]:
url = "https://raw.githubusercontent.com/DonAvery/customer-marketing-project/main/Data/cleaned-marketing-campaign.csv"
# or to use the cleaned csv created in the Data-Prep-Clean-EDA unhash the next line and hash the first and last line
# df = pd.read_csv("cleaned-marketing-campaign.csv")
df = pd.read_csv(url)

In [3]:
df.head()

Unnamed: 0,id,year_birth,education,marital_status,income,kidhome,teenhome,dt_customer,recency,mntwines,...,numstorepurchases,numwebvisitsmonth,acceptedcmp3,acceptedcmp4,acceptedcmp5,acceptedcmp1,acceptedcmp2,complain,response,cust_age
0,5524,1957,Graduation,Single,58138,0,0,2012-09-04,58,635,...,4,7,0,0,0,0,0,0,1,58
1,2174,1954,Graduation,Single,46344,1,1,2014-03-08,38,11,...,2,5,0,0,0,0,0,0,0,61
2,4141,1965,Graduation,Together,71613,0,0,2013-08-21,26,426,...,10,4,0,0,0,0,0,0,0,50
3,6182,1984,Graduation,Together,26646,1,0,2014-02-10,26,11,...,4,6,0,0,0,0,0,0,0,31
4,5324,1981,PhD,Married,58293,1,0,2014-01-19,94,173,...,6,5,0,0,0,0,0,0,0,34


In [4]:
df_train, df_test = train_test_split(df, test_size=0.2, random_state=11)

df_train = df_train.reset_index(drop=True)
df_test = df_test.reset_index(drop=True)

y_train = (df_train.response == 'default').astype('int').values
y_test = (df_test.response == 'default').astype('int').values

del df_train['response']
del df_test['response']

In [5]:
dv = DictVectorizer(sparse=False)

train_dicts = df_train.fillna(0).to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

test_dicts = df_test.fillna(0).to_dict(orient='records')
X_test = dv.transform(test_dicts)

In [6]:
rf = RandomForestClassifier(n_estimators=400, max_depth=10, min_samples_leaf=3, random_state=1)
rf.fit(X_train, y_train)

In [7]:
dtrain = xgb.DMatrix(X_train, label=y_train)

In [8]:
xgb_params = {
    'eta': 0.1,
    'max_depth': 15,
    'min_child_weight': 1,

    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    'nthread': 8,
    'seed': 1,
}

model = xgb.train(xgb_params, dtrain, num_boost_round=120)

## Using BentoML to save our model

In [9]:
saved_model = bentoml.xgboost.save_model(
    'cust_marketing_model',
    model,
    custom_objects={
        "dictVectorizer": dv
    }
)

After running the above cell you will see the line starting with "Model" and you will need the tag inside the ""

In [10]:
print(f"model saved: {saved_model}")

model saved: Model(tag="cust_marketing_model:6kynosebvcgckpd2")
