In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

In [2]:
df = pd.read_csv("bank-additional-full.csv", sep=';')

In [4]:
df.replace('unknown', np.nan, inplace=True)
for col in df.select_dtypes(include='object').columns:
    df[col] = df[col].fillna(df[col].mode()[0])

In [5]:
encoder = LabelEncoder()
for col in df.select_dtypes(include='object').columns:
    df[col] = encoder.fit_transform(df[col])

In [6]:
X = df.drop('y', axis=1)
y = df['y']

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.25,
    random_state=42,
    stratify=y
)

In [7]:
dt = DecisionTreeClassifier(
    criterion="entropy",
    max_depth=6,
    min_samples_split=20,
    class_weight="balanced",
    random_state=42
)

dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)

In [8]:
print("Decision Tree Accuracy:", accuracy_score(y_test, dt_pred))
print(classification_report(y_test, dt_pred))

Decision Tree Accuracy: 0.8428668544236185
              precision    recall  f1-score   support

           0       0.99      0.83      0.90      9137
           1       0.41      0.93      0.57      1160

    accuracy                           0.84     10297
   macro avg       0.70      0.88      0.74     10297
weighted avg       0.92      0.84      0.87     10297



In [9]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
knn = KNeighborsClassifier(
    n_neighbors=9,
    weights='distance',
    metric='euclidean'
)

knn.fit(X_train_scaled, y_train)
knn_pred = knn.predict(X_test_scaled)

In [11]:
print("KNN Accuracy:", accuracy_score(y_test, knn_pred))
print(classification_report(y_test, knn_pred))

KNN Accuracy: 0.9055064581917063
              precision    recall  f1-score   support

           0       0.92      0.97      0.95      9137
           1       0.64      0.37      0.47      1160

    accuracy                           0.91     10297
   macro avg       0.78      0.67      0.71     10297
weighted avg       0.89      0.91      0.89     10297

