In [1]:
# Dependencies and data.
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
import pandas as pd
import tensorflow as tf

df = pd.read_csv('Resources/bank_telemarketing.csv')
df.head()

Unnamed: 0,Age,Job,Marital_Status,Education,Default_Credit,Housing_Loan,Personal_Loan,Subscribed
0,56,other,married,Primary_Education,no,no,no,no
1,37,services,married,Secondary_Education,no,yes,no,no
2,40,admin,married,Primary_Education,no,no,no,no
3,56,services,married,Secondary_Education,no,no,yes,no
4,59,admin,married,Professional_Education,no,no,no,no


In [2]:
# Check if categorical variables need bucketing.
cats = df.dtypes[df.dtypes == 'object'].index.tolist()

# Number of unique values in each categorical column.
df[cats].nunique()

Job               9
Marital_Status    3
Education         4
Default_Credit    2
Housing_Loan      2
Personal_Loan     2
Subscribed        2
dtype: int64

In [3]:
# Encode the categorical variables.
enc = OneHotEncoder(sparse=False)

# Fit the categorical variables to the encoder.
encode_df = pd.DataFrame(enc.fit_transform(df[cats]))

# Change column names.
encode_df.columns = enc.get_feature_names(cats)
encode_df.head()

Unnamed: 0,Job_admin,Job_blue-collar,Job_entrepreneur,Job_management,Job_other,Job_retired,Job_self-employed,Job_services,Job_technician,Marital_Status_divorced,...,Education_Secondary_Education,Education_Tertiary_Education,Default_Credit_no,Default_Credit_yes,Housing_Loan_no,Housing_Loan_yes,Personal_Loan_no,Personal_Loan_yes,Subscribed_no,Subscribed_yes
0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0


In [4]:
# Merge encoded DF back to original DF, and drop categorical variables.
df = df.merge(encode_df, left_index=True, right_index=True).drop(cats, 1)
df.head()

Unnamed: 0,Age,Job_admin,Job_blue-collar,Job_entrepreneur,Job_management,Job_other,Job_retired,Job_self-employed,Job_services,Job_technician,...,Education_Secondary_Education,Education_Tertiary_Education,Default_Credit_no,Default_Credit_yes,Housing_Loan_no,Housing_Loan_yes,Personal_Loan_no,Personal_Loan_yes,Subscribed_no,Subscribed_yes
0,56,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0
1,37,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0
2,40,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0
3,56,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0
4,59,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0


In [5]:
# Split target variable, and then into testing and training.
y = df.Subscribed_yes.values
X = df.drop(columns=['Subscribed_yes', 'Subscribed_no']).values

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

# Scale the data.
X_scaler = StandardScaler().fit(X_train)

# Transform scaled data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [6]:
# Construct SVM model.
svm = SVC(kernel='linear')

# Train the model.
svm.fit(X_train_scaled, y_train)

# Predict with the model.
y_pred = svm.predict(X_test_scaled)
print(f'SVM Model Accuracy: {accuracy_score(y_test, y_pred)}')

SVM Model Accuracy: 0.873490813648294


In [7]:
# Build NN model.
nn = tf.keras.models.Sequential()

# Add hidden layers and output layer.
nn.add(tf.keras.layers.Dense(units=10, input_dim=len(X_train_scaled[0]), activation='relu'))
nn.add(tf.keras.layers.Dense(units=5, activation='relu'))
nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Compile model with custom metrics.
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model.
fit_model = nn.fit(X_train_scaled, y_train, epochs=50, verbose=0)

# Evaluate with test data.
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

239/239 - 0s - loss: 0.3685 - accuracy: 0.8734
Loss: 0.3685097098350525, Accuracy: 0.8733595609664917
