In [41]:
# Import some important libraries
import pandas as pd, numpy as np

# Read the CSV file
df = pd.read_csv("bank-additional-full.csv")
df.head(10)

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
1,57,services,married,high.school,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
2,37,services,married,high.school,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
3,40,admin.,married,basic.6y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
4,56,services,married,high.school,no,no,yes,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
5,45,services,married,basic.9y,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
6,59,admin.,married,professional.course,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
7,41,blue-collar,married,unknown,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
8,24,technician,single,professional.course,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
9,25,services,single,high.school,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no


In [42]:
# Show the number of columns and rows of the dataset
df.shape

(41188, 21)

In [43]:
# Show some info about the dataset 
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41188 entries, 0 to 41187
Data columns (total 21 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   age             41188 non-null  int64  
 1   job             41188 non-null  object 
 2   marital         41188 non-null  object 
 3   education       41188 non-null  object 
 4   default         41188 non-null  object 
 5   housing         41188 non-null  object 
 6   loan            41188 non-null  object 
 7   contact         41188 non-null  object 
 8   month           41188 non-null  object 
 9   day_of_week     41188 non-null  object 
 10  duration        41188 non-null  int64  
 11  campaign        41188 non-null  int64  
 12  pdays           41188 non-null  int64  
 13  previous        41188 non-null  int64  
 14  poutcome        41188 non-null  object 
 15  emp.var.rate    41188 non-null  float64
 16  cons.price.idx  41188 non-null  float64
 17  cons.conf.idx   41188 non-null 

In [44]:
# Show some important info about only the numerical columns in the dataset
df.describe()

Unnamed: 0,age,duration,campaign,pdays,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
count,41188.0,41188.0,41188.0,41188.0,41188.0,41188.0,41188.0,41188.0,41188.0,41188.0
mean,40.02406,258.28501,2.567593,962.475454,0.172963,0.081886,93.575664,-40.5026,3.621291,5167.035911
std,10.42125,259.279249,2.770014,186.910907,0.494901,1.57096,0.57884,4.628198,1.734447,72.251528
min,17.0,0.0,1.0,0.0,0.0,-3.4,92.201,-50.8,0.634,4963.6
25%,32.0,102.0,1.0,999.0,0.0,-1.8,93.075,-42.7,1.344,5099.1
50%,38.0,180.0,2.0,999.0,0.0,1.1,93.749,-41.8,4.857,5191.0
75%,47.0,319.0,3.0,999.0,0.0,1.4,93.994,-36.4,4.961,5228.1
max,98.0,4918.0,56.0,999.0,7.0,1.4,94.767,-26.9,5.045,5228.1


# Data Preprocessing Steps 


In [45]:
# Identify categorical and numerical columns
categorical_cols = df.select_dtypes(include = ['object']).drop('y', axis = 1).columns.tolist()
numerical_cols = df.select_dtypes(include = ['int64', 'float64']).columns.tolist()

In [46]:
# Split the target column and other features
x = df.drop('y', axis = 1)
y = df.y.map({'yes' : 1, 'no' : 0}).values

In [47]:
# Import some importan libraries in order to use them in data preprocessing 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE

In [48]:
# Splitting the dataset into training and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

In [49]:
# Define preprocessors for numerical and categorical data
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Create a pipeline that first preprocesses the data
preprocessor = ColumnTransformer(
    transformers = [
        ('num', numerical_transformer, numerical_cols),  
        ('cat', categorical_transformer, categorical_cols)
    ]
)
# Apply the transformer to data
x_train_preprocessed = preprocessor.fit_transform(x_train)
x_test_preprocessed = preprocessor.fit_transform(x_test)

# Apply SMOTE to training data 
smote = SMOTE()                # the syenthetic minority oversampling technique is significantly used when (imbalanced dataset) one class is less represented than the others
x_train_resampled, y_train_resampled = smote.fit_resample(x_train_preprocessed, y_train)

# Model Construction, Training and Evaluation

In [51]:
# Import some import libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Warning Filtering
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings('ignore', category=UserWarning)


# The Costruction Phase of the Model 
model = Sequential([
    Dense(256, activation = 'relu', input_dim = x_train_resampled.shape[1]),
    BatchNormalization(),
    Dropout(0.30),
    Dense(64, activation = 'relu'),
    BatchNormalization(),
    Dropout(0.30),
    Dense(32, activation = 'relu'),
    BatchNormalization(),
    Dropout(0.30),
    Dense(16, activation = 'relu'),
    BatchNormalization(),
    Dropout(0.30),
    Dense(8, activation = 'relu'),
    BatchNormalization(),
    Dropout(0.30),
    Dense(1, activation = ('sigmoid'))
])

# The Compliling Phase of the Model
model.compile( optimizer = tf.keras.optimizers.Adam(learning_rate =  0.001), loss = 'binary_crossentropy', metrics = ['accuracy'])

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10)
model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True)

# The Training Phase of the Model 
print("Training Phase Results : ")
print()
model.fit(x_train_preprocessed, y_train_resampled, epochs = 10, batch_size = 15, validation_split = 0.3, callbacks=[early_stopping, model_checkpoint])

print()
print()

# The Evaluation Phase of the Model
model = tf.keras.models.load_model('best_model.h5')
print("Evaluation Phase Result : ")
print()
loss, accuracy = model.evaluate(x_test_preprocessed, y_test)
print(f"Loss Value = {loss} and Percentage of Accuracy = {round(accuracy * 100)} %")  

Training Phase Results : 

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Evaluation Phase Result : 

Loss Value = 0.1835470050573349 and Percentage of Accuracy = 91 %
