In [1]:
# Import necessary libraries
import pandas as pd
from google.colab import files
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


In [4]:
df = pd.read_csv('/content/bank.csv', delimiter=';')
print(df.head())

   age          job  marital  education default  balance housing loan  \
0   30   unemployed  married    primary      no     1787      no   no   
1   33     services  married  secondary      no     4789     yes  yes   
2   35   management   single   tertiary      no     1350     yes   no   
3   30   management  married   tertiary      no     1476     yes  yes   
4   59  blue-collar  married  secondary      no        0     yes   no   

    contact  day month  duration  campaign  pdays  previous poutcome   y  
0  cellular   19   oct        79         1     -1         0  unknown  no  
1  cellular   11   may       220         1    339         4  failure  no  
2  cellular   16   apr       185         1    330         1  failure  no  
3   unknown    3   jun       199         4     -1         0  unknown  no  
4   unknown    5   may       226         1     -1         0  unknown  no  


In [5]:
# Check for missing values and data types
print(df.isnull().sum())
print(df.info())
print(df.describe())


age          0
job          0
marital      0
education    0
default      0
balance      0
housing      0
loan         0
contact      0
day          0
month        0
duration     0
campaign     0
pdays        0
previous     0
poutcome     0
y            0
dtype: int64
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4521 entries, 0 to 4520
Data columns (total 17 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   age        4521 non-null   int64 
 1   job        4521 non-null   object
 2   marital    4521 non-null   object
 3   education  4521 non-null   object
 4   default    4521 non-null   object
 5   balance    4521 non-null   int64 
 6   housing    4521 non-null   object
 7   loan       4521 non-null   object
 8   contact    4521 non-null   object
 9   day        4521 non-null   int64 
 10  month      4521 non-null   object
 11  duration   4521 non-null   int64 
 12  campaign   4521 non-null   int64 
 13  pdays      4521 non-null   int64 
 

In [6]:
# Encode categorical variables
df_encoded = pd.get_dummies(df, drop_first=True)

# Normalize numeric columns
numeric_columns = ['age', 'balance', 'duration']  # Adjust based on dataset
scaler = StandardScaler()
df_encoded[numeric_columns] = scaler.fit_transform(df_encoded[numeric_columns])


In [7]:
# Separate features and target
X = df_encoded.drop('y_yes', axis=1)  # Replace 'y_yes' with actual target column
y = df_encoded['y_yes']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print("Training Set:", X_train.shape)
print("Test Set:", X_test.shape)


Training Set: (3164, 42)
Test Set: (1357, 42)


In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Initialize and train the Random Forest classifier
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_rf = rf_model.predict(X_test)

# Evaluate the model
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print("\nClassification Report:\n", classification_report(y_test, y_pred_rf))


Random Forest Accuracy: 0.894620486366986

Classification Report:
               precision    recall  f1-score   support

       False       0.91      0.98      0.94      1205
        True       0.58      0.22      0.32       152

    accuracy                           0.89      1357
   macro avg       0.74      0.60      0.63      1357
weighted avg       0.87      0.89      0.87      1357



In [9]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Build the neural network
nn_model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # For binary classification
])

# Compile the model
nn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = nn_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.7275 - loss: 1.7568 - val_accuracy: 0.8865 - val_loss: 0.3570
Epoch 2/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8708 - loss: 0.3233 - val_accuracy: 0.8865 - val_loss: 0.2821
Epoch 3/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8805 - loss: 0.2939 - val_accuracy: 0.8895 - val_loss: 0.2654
Epoch 4/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8992 - loss: 0.2514 - val_accuracy: 0.8931 - val_loss: 0.2694
Epoch 5/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9022 - loss: 0.2756 - val_accuracy: 0.8873 - val_loss: 0.2558
Epoch 6/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8920 - loss: 0.2641 - val_accuracy: 0.8880 - val_loss: 0.2512
Epoch 7/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━

In [10]:
# Evaluate the neural network on the test set
loss, accuracy = nn_model.evaluate(X_test, y_test)
print("Neural Network Accuracy:", accuracy)


[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8882 - loss: 0.2431 
Neural Network Accuracy: 0.8887251019477844


In [11]:
from sklearn.metrics import roc_auc_score

# Random Forest
rf_auc = roc_auc_score(y_test, rf_model.predict_proba(X_test)[:, 1])
print("Random Forest ROC-AUC:", rf_auc)

# Neural Network
nn_predictions = nn_model.predict(X_test).ravel()
nn_auc = roc_auc_score(y_test, nn_predictions)
print("Neural Network ROC-AUC:", nn_auc)


Random Forest ROC-AUC: 0.9110258790128849
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Neural Network ROC-AUC: 0.8869785979471501
