In [None]:
#@title 1. Data Preprocessing
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_recall_fscore_support
from sklearn.preprocessing import StandardScaler

dt = pd.read_csv('BankCustomerData.csv')
dt.dropna(inplace=True)
df_dumb = pd.get_dummies(dt, drop_first=True)
df_dumb['subscribed'] = (dt['balance']>50).astype(int)

In [None]:

#@title 2. Feature Selection
X = df_dumb.drop(['subscribed'], axis=1)
y = df_dumb['subscribed']

In [None]:
#@title 3. Data Splitting
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [None]:

#@title 4. Model Training
scaler = StandardScaler()

numerical_features = ['age', 'balance', 'day', 'duration', 'campaign', 'pdays', 'previous']
X_train[numerical_features] = scaler.fit_transform(X_train[numerical_features])
X_test[numerical_features] = scaler.transform(X_test[numerical_features])

model = LogisticRegression()
model.fit(X_train, y_train)
LogisticRegression()

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:

#@title 5. Model Evaluation
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

precision_1, recall_1, f1_1, _ = precision_recall_fscore_support(y_test, y_pred, average='binary', pos_label=1)

print(f"\nAccuracy: \n{accuracy}\n")
print(f"\nConfusion Matrix: \n{conf_matrix}\n")
print(f"\nClassification Report: \n{class_report}\n")
print(f"\nPrecision for class 1: \n{precision_1}\n")
print(f"\nRecall for class 1: \n{recall_1}\n")
print(f"\nF1-score for class 1: \n{f1_1}\n")


Accuracy: 
0.9824108818011257


Confusion Matrix: 
[[2037    2]
 [ 148 6341]]


Classification Report: 
              precision    recall  f1-score   support

           0       0.93      1.00      0.96      2039
           1       1.00      0.98      0.99      6489

    accuracy                           0.98      8528
   macro avg       0.97      0.99      0.98      8528
weighted avg       0.98      0.98      0.98      8528



Precision for class 1: 
0.999684691786221


Recall for class 1: 
0.9771921713669286


F1-score for class 1: 
0.9883104738154612



In [None]:
#@title 6. Conclusion
#@markdown 6. Conclusion:
#@markdown ### The efficacy of the logistic regression model in forecasting term deposit subscriptions was demonstrated by its near-perfect accuracy, precision, recall, and F1-score on the test set.
#@markdown - Insights:
#@markdown  - The model has a high precision that is nearly perfect (positive predictive value) because I did certain workarounds, such including all numerical features. This implies that when it forecasts a consumer will subscribe to a term deposit, it is always true.
#@markdown  - The recall (sensitivity) is likewise strong, suggesting that the model captures a significant percentage of actual term deposit subscriptions, even if this is something I've only noticed recently.
#@markdown - Implications for the bank's marketing strategies:
#@markdown  - The bank may be able to identify potential clients who are likely to sign up for term deposits using this technique, I believe.
#@markdown  - Additionally, targeted marketing initiatives can be focused on the identified client segments; in my opinion, doing so can significantly increase the effectiveness of marketing campaigns.
