
@Author: Suresh<br>
@Date: 11-10-2024<br>
@Last modified by: Suresh<br>
@Last modified Date: 11-10-2024<br>
@Title: Multi layer neural network to Predict Subscription to Term Deposit<br>
<br>
'''


In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

### Load Dataset

In [2]:

url = 'https://drive.google.com/uc?id=1E0EURgsF3L9Bt5hnOalE0d4Tw9mIOgTn'  # Corrected link format
df = pd.read_csv(url,delimiter=';')

print(df.head())

   age          job  marital  education default  balance housing loan  \
0   30   unemployed  married    primary      no     1787      no   no   
1   33     services  married  secondary      no     4789     yes  yes   
2   35   management   single   tertiary      no     1350     yes   no   
3   30   management  married   tertiary      no     1476     yes  yes   
4   59  blue-collar  married  secondary      no        0     yes   no   

    contact  day month  duration  campaign  pdays  previous poutcome   y  
0  cellular   19   oct        79         1     -1         0  unknown  no  
1  cellular   11   may       220         1    339         4  failure  no  
2  cellular   16   apr       185         1    330         1  failure  no  
3   unknown    3   jun       199         4     -1         0  unknown  no  
4   unknown    5   may       226         1     -1         0  unknown  no  


### Handling missing values

In [3]:
df.dropna(inplace=True)
df.shape

(4521, 17)

### Handling categorical values
Encode categorical variables using LabelEncoder

In [4]:
categorical_columns = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome']

label_encoders = {}
for column in categorical_columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le


df['y'] = df['y'].map({'yes': 1, 'no': 0})    # Map the target variable 'y' to binary

### Spliting dataset
Define features (X) and target (y)

In [5]:

X = df.drop(columns=['y'])  # Features are all columns except 'y'
y = df['y']                 # Target variable

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


### Scaling Dataset

In [6]:

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### Model Building
Build the Multi-Layer Neural Network

In [7]:

model = Sequential()

# Input layer and first hidden layer with 128 neurons and ReLU activation
model.add(Dense(128, input_dim=X_train_scaled.shape[1], activation='relu'))
model.add(Dropout(0.2))     # Add dropout to prevent overfitting


# Second hidden layer with 64 neurons
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))  # Dropout

model.add(Dense(32, activation='relu'))

# Output layer with 1 neuron (binary classification) and sigmoid activation
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_scaled, y_train, epochs=20, batch_size=32, validation_split=0.2)

# Make predictions on the test set
y_pred = model.predict(X_test_scaled)
y_pred = (y_pred > 0.5).astype(int)  # Convert predictions to 0 or 1


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8395 - loss: 0.4299 - val_accuracy: 0.8950 - val_loss: 0.2632
Epoch 2/20
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8868 - loss: 0.2713 - val_accuracy: 0.8923 - val_loss: 0.2545
Epoch 3/20
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8795 - loss: 0.2793 - val_accuracy: 0.8950 - val_loss: 0.2512
Epoch 4/20
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8865 - loss: 0.2752 - val_accuracy: 0.8950 - val_loss: 0.2517
Epoch 5/20
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9005 - loss: 0.2487 - val_accuracy: 0.8895 - val_loss: 0.2527
Epoch 6/20
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8951 - loss: 0.2398 - val_accuracy: 0.8936 - val_loss: 0.2504
Epoch 7/20
[1m91/91[0m [32m━━━━━━━━━━

### Model performance

In [8]:
accuracy = accuracy_score(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print('Classification Report:\n', class_report)



Accuracy: 0.8906077348066298
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.97      0.94       807
           1       0.49      0.28      0.35        98

    accuracy                           0.89       905
   macro avg       0.70      0.62      0.65       905
weighted avg       0.87      0.89      0.88       905

