 ### Marketing campaign outcome prediction with deep learning

This ➡️ dataset contains banking marketing campaign data and we can use it to optimize marketing campaigns to attract more customers to term deposit subscription.

What is a Term Deposit?

A Term deposit is a deposit that a bank or a financial institution offers with a fixed rate (often better than just opening deposit account) in which your money will be returned back at a specific maturity time.


Instructions

In this assignment you will have to take the following steps:

- Import your dataset and perform initial analysis and visualization
- Clean the data
- Build a  deep learning network to predict the marketing campaign outcome ("deposit" either Yes or No)

#### Importing necessary libraries

In [29]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

#### Loading my dataset

In [31]:
df = pd.read_csv("/content/drive/MyDrive/bank.csv")

In [32]:
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,deposit
0,59,admin.,married,secondary,no,2343,yes,no,unknown,5,may,1042,1,-1,0,unknown,yes
1,56,admin.,married,secondary,no,45,no,no,unknown,5,may,1467,1,-1,0,unknown,yes
2,41,technician,married,secondary,no,1270,yes,no,unknown,5,may,1389,1,-1,0,unknown,yes
3,55,services,married,secondary,no,2476,yes,no,unknown,5,may,579,1,-1,0,unknown,yes
4,54,admin.,married,tertiary,no,184,no,no,unknown,5,may,673,2,-1,0,unknown,yes


In [33]:
df.shape

(11162, 17)

In [34]:
df.isnull().sum()

Unnamed: 0,0
age,0
job,0
marital,0
education,0
default,0
balance,0
housing,0
loan,0
contact,0
day,0


In [35]:
df.duplicated().sum()

0

In [36]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11162 entries, 0 to 11161
Data columns (total 17 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   age        11162 non-null  int64 
 1   job        11162 non-null  object
 2   marital    11162 non-null  object
 3   education  11162 non-null  object
 4   default    11162 non-null  object
 5   balance    11162 non-null  int64 
 6   housing    11162 non-null  object
 7   loan       11162 non-null  object
 8   contact    11162 non-null  object
 9   day        11162 non-null  int64 
 10  month      11162 non-null  object
 11  duration   11162 non-null  int64 
 12  campaign   11162 non-null  int64 
 13  pdays      11162 non-null  int64 
 14  previous   11162 non-null  int64 
 15  poutcome   11162 non-null  object
 16  deposit    11162 non-null  object
dtypes: int64(7), object(10)
memory usage: 1.4+ MB


In [37]:
df["job"].value_counts()

Unnamed: 0_level_0,count
job,Unnamed: 1_level_1
management,2566
blue-collar,1944
technician,1823
admin.,1334
services,923
retired,778
self-employed,405
student,360
unemployed,357
entrepreneur,328


In [38]:
df["marital"].value_counts()

Unnamed: 0_level_0,count
marital,Unnamed: 1_level_1
married,6351
single,3518
divorced,1293


In [39]:
df["education"].value_counts()

Unnamed: 0_level_0,count
education,Unnamed: 1_level_1
secondary,5476
tertiary,3689
primary,1500
unknown,497


In [40]:
df["default"].value_counts()

Unnamed: 0_level_0,count
default,Unnamed: 1_level_1
no,10994
yes,168


In [41]:
df["housing"].value_counts()

Unnamed: 0_level_0,count
housing,Unnamed: 1_level_1
no,5881
yes,5281


In [42]:
df["loan"].value_counts()

Unnamed: 0_level_0,count
loan,Unnamed: 1_level_1
no,9702
yes,1460


In [43]:
df["contact"].value_counts()

Unnamed: 0_level_0,count
contact,Unnamed: 1_level_1
cellular,8042
unknown,2346
telephone,774


In [44]:
df["month"].value_counts()

Unnamed: 0_level_0,count
month,Unnamed: 1_level_1
may,2824
aug,1519
jul,1514
jun,1222
nov,943
apr,923
feb,776
oct,392
jan,344
sep,319


In [45]:
df["job"].value_counts()

Unnamed: 0_level_0,count
job,Unnamed: 1_level_1
management,2566
blue-collar,1944
technician,1823
admin.,1334
services,923
retired,778
self-employed,405
student,360
unemployed,357
entrepreneur,328


In [46]:
df["poutcome"].value_counts()

Unnamed: 0_level_0,count
poutcome,Unnamed: 1_level_1
unknown,8326
failure,1228
success,1071
other,537


In [47]:
df["deposit"].value_counts()

Unnamed: 0_level_0,count
deposit,Unnamed: 1_level_1
no,5873
yes,5289


#### Encoding categorical columns

In [48]:
df['deposit'] = df['deposit'].map({'yes': 1, 'no': 0})
df['contact'] = df['contact'].map( {'cellular': 1, 'telephone': 2, 'unknown': 0})
df['loan'] = df['loan'].map({'yes': 1, 'no': 0})
df['housing'] = df['housing'].map({'yes': 1, 'no': 0})
df['default'] = df['default'].map({'yes': 1, 'no': 0})
df['marital'] = df['marital'].map({'married': 1, 'single': 2, 'divorced': 3})

In [49]:
df = pd.get_dummies(df, columns=['job', 'education', 'month', 'poutcome'], drop_first=True)

In [50]:
from sklearn.preprocessing import LabelEncoder

# Initialize LabelEncoder
le = LabelEncoder()

# Encode the 'deposit' column (target variable)
df['deposit'] = le.fit_transform(df['deposit'])

In [51]:
df.head()

Unnamed: 0,age,marital,default,balance,housing,loan,contact,day,duration,campaign,...,month_jul,month_jun,month_mar,month_may,month_nov,month_oct,month_sep,poutcome_other,poutcome_success,poutcome_unknown
0,59,1,0,2343,1,0,0,5,1042,1,...,False,False,False,True,False,False,False,False,False,True
1,56,1,0,45,0,0,0,5,1467,1,...,False,False,False,True,False,False,False,False,False,True
2,41,1,0,1270,1,0,0,5,1389,1,...,False,False,False,True,False,False,False,False,False,True
3,55,1,0,2476,1,0,0,5,579,1,...,False,False,False,True,False,False,False,False,False,True
4,54,1,0,184,0,0,0,5,673,2,...,False,False,False,True,False,False,False,False,False,True


In [52]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11162 entries, 0 to 11161
Data columns (total 41 columns):
 #   Column               Non-Null Count  Dtype
---  ------               --------------  -----
 0   age                  11162 non-null  int64
 1   marital              11162 non-null  int64
 2   default              11162 non-null  int64
 3   balance              11162 non-null  int64
 4   housing              11162 non-null  int64
 5   loan                 11162 non-null  int64
 6   contact              11162 non-null  int64
 7   day                  11162 non-null  int64
 8   duration             11162 non-null  int64
 9   campaign             11162 non-null  int64
 10  pdays                11162 non-null  int64
 11  previous             11162 non-null  int64
 12  deposit              11162 non-null  int64
 13  job_blue-collar      11162 non-null  bool 
 14  job_entrepreneur     11162 non-null  bool 
 15  job_housemaid        11162 non-null  bool 
 16  job_management       1

In [61]:
import tensorflow as tf  # TensorFlow library for building neural networks
from tensorflow.keras.models import Sequential  # Sequential model for building a feedforward neural network
from tensorflow.keras.layers import Dense, Dropout  # Dense layer for fully connected layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam  # Adam optimizer for gradient descent
from sklearn.model_selection import train_test_split  # Function to split data into training and test sets
from sklearn.preprocessing import StandardScaler  # Standardization of features (scaling)

#### Splitting the dataset

In [62]:
X = df.drop(columns=["deposit"])
y = df["deposit"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [63]:
X_train.shape

(8929, 40)

#### Scaling my data

In [64]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#### Building my model

In [66]:
model = Sequential()

#input layer
model.add(Dense(64, activation='relu', input_dim=X_train.shape[1]))
model.add(Dropout(0.3))

#hidden layers
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))

#output layer
model.add(Dense(1, activation='sigmoid'))

#Compiling the layers
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

In [67]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [68]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=32, callbacks=[early_stopping])

Epoch 1/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5707 - loss: 0.6884 - val_accuracy: 0.6431 - val_loss: 0.6489
Epoch 2/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6446 - loss: 0.6494 - val_accuracy: 0.7161 - val_loss: 0.5947
Epoch 3/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7120 - loss: 0.5945 - val_accuracy: 0.7600 - val_loss: 0.5352
Epoch 4/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7417 - loss: 0.5464 - val_accuracy: 0.7949 - val_loss: 0.4804
Epoch 5/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7635 - loss: 0.5046 - val_accuracy: 0.8142 - val_loss: 0.4414
Epoch 6/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7786 - loss: 0.4781 - val_accuracy: 0.8218 - val_loss: 0.4201
Epoch 7/100
[1m280/28

In [69]:
# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)

print(f"Test Accuracy: {test_acc * 100:.2f}%")

[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8256 - loss: 0.3962
Test Accuracy: 84.10%
