In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
df = pd.read_csv("bank.csv")

df.head()

Unnamed: 0,age,job,marital,education,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,deposit
0,59,admin.,married,secondary,2343,yes,no,unknown,5,may,1042,1,-1,0,yes
1,56,admin.,married,secondary,45,no,no,unknown,5,may,1467,1,-1,0,yes
2,41,technician,married,secondary,1270,yes,no,unknown,5,may,1389,1,-1,0,yes
3,55,services,married,secondary,2476,yes,no,unknown,5,may,579,1,-1,0,yes
4,54,admin.,married,tertiary,184,no,no,unknown,5,may,673,2,-1,0,yes


In [3]:
X = df.drop("loan", axis=1)
y = df["loan"]

In [4]:
encoder = LabelEncoder()
for col in X.columns:
    if X[col].dtype == "object":
        X[col] = encoder.fit_transform(X[col])

In [5]:
if y.dtype == "object":
    y = encoder.fit_transform(y)

In [6]:
def evaluate_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average="binary"),
        "Recall": recall_score(y_test, y_pred, average="binary"),
        "F1": f1_score(y_test, y_pred, average="binary")
    }

In [7]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(),
    "KNN": KNeighborsClassifier()
}

results = []

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
for model_name, model in models.items():
    metrics = evaluate_model(model, X_train, X_test, y_train, y_test)
    results.append([model_name, "Without Normalization", metrics["Accuracy"], metrics["Precision"], metrics["Recall"], metrics["F1"]])

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=1000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [9]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
for model_name, model in models.items():
    metrics = evaluate_model(model, X_train, X_test, y_train, y_test)
    results.append([model_name, "Before Split", metrics["Accuracy"], metrics["Precision"], metrics["Recall"], metrics["F1"]])

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
for model_name, model in models.items():
    metrics = evaluate_model(model, X_train_scaled, X_test_scaled, y_train, y_test)
    results.append([model_name, "After Split", metrics["Accuracy"], metrics["Precision"], metrics["Recall"], metrics["F1"]])

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [12]:
df_results = pd.DataFrame(results, columns=["Model", "Normalization Method", "Accuracy", "Precision", "Recall", "F1"]).T
df_results

Unnamed: 0,0,1,2,3,4,5,6,7,8
Model,Logistic Regression,Decision Tree,KNN,Logistic Regression,Decision Tree,KNN,Logistic Regression,Decision Tree,KNN
Normalization Method,Without Normalization,Without Normalization,Without Normalization,Before Split,Before Split,Before Split,After Split,After Split,After Split
Accuracy,0.877743,0.791312,0.860725,0.877743,0.790416,0.860278,0.877743,0.789969,0.860725
Precision,0.0,0.21194,0.243243,0.0,0.207207,0.270588,0.0,0.211765,0.27381
Recall,0.0,0.260073,0.065934,0.0,0.252747,0.084249,0.0,0.263736,0.084249
F1,0.0,0.233553,0.103746,0.0,0.227723,0.128492,0.0,0.23491,0.128852
