In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler

Data processing


In [None]:
dataset = pd.read_csv('BankCustomerData.csv')

In [None]:
dataset['loan'] = dataset['loan'].map({'yes': 1, 'no': 0})
dataset['housing'] = dataset['housing'].map({'yes': 1, 'no': 0})
dataset['default'] = dataset['default'].map({'yes': 1, 'no': 0})
dataset['term_deposit'] = dataset['term_deposit'].map({'yes': 1, 'no': 0})

In [None]:
dummyvar = pd.get_dummies(dataset, drop_first=True)

In [None]:
dummyvar['subscribed'] = (dummyvar['term_deposit']>0).astype(int)

Feature selcetion

In [None]:
X = dummyvar.drop(['balance', 'default', 'loan', 'housing','term_deposit', 'subscribed'],  axis = 1)
y = dummyvar['subscribed']

Data Splitting

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

Model training

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)


In [None]:
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

Model evaluation

In [None]:
y_pred = model.predict(X_test_scaled)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Confusion Matrix: ")
print(conf_matrix)
print("Classification Report: ")
print(class_report)

Accuracy: 0.9179174484052532
Confusion Matrix: 
[[7593  135]
 [ 565  235]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.93      0.98      0.96      7728
           1       0.64      0.29      0.40       800

    accuracy                           0.92      8528
   macro avg       0.78      0.64      0.68      8528
weighted avg       0.90      0.92      0.90      8528



Conclusion

The model's accuracy is about 91.79%, meaning that in about 91.79% of cases, it accurately predicts whether a consumer would subscribe to a term deposit or not. The model's highly effective overall accuracy suggests that it is capable of predicting the probability of subscribing to a term deposit or not. This implies that the bank may be able to more precisely target new customers with this strategy, so saving money on marketing expenses. Though the model performs well overall, there are several parts that might be improved to make them more consistent with the bank's advertisements and increase the model's ability to draw in new types of deposit subscribers.