In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix
)


In [5]:
df = pd.read_csv("bank.csv")
df.head()


Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,deposit
0,59,admin.,married,secondary,no,2343,yes,no,unknown,5,may,1042,1,-1,0,unknown,yes
1,56,admin.,married,secondary,no,45,no,no,unknown,5,may,1467,1,-1,0,unknown,yes
2,41,technician,married,secondary,no,1270,yes,no,unknown,5,may,1389,1,-1,0,unknown,yes
3,55,services,married,secondary,no,2476,yes,no,unknown,5,may,579,1,-1,0,unknown,yes
4,54,admin.,married,tertiary,no,184,no,no,unknown,5,may,673,2,-1,0,unknown,yes


In [8]:
target = "deposit"



In [9]:
df[target].value_counts(normalize=True)


Unnamed: 0_level_0,proportion
deposit,Unnamed: 1_level_1
no,0.52616
yes,0.47384


In [10]:
label_encoders = {}

for col in df.columns:
    if df[col].dtype == "object":
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le


In [11]:
X = df.drop(target, axis=1)
y = df[target]


In [12]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


In [13]:
rf_model = RandomForestClassifier(
    n_estimators=200,
    random_state=42,
    class_weight="balanced",
    n_jobs=-1
)

rf_model.fit(X_train, y_train)


In [14]:
y_pred = rf_model.predict(X_test)


In [15]:
print("Accuracy:", accuracy_score(y_test, y_pred))


Accuracy: 0.851321092700403


In [16]:
confusion_matrix(y_test, y_pred)


array([[975, 200],
       [132, 926]])

In [17]:
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.88      0.83      0.85      1175
           1       0.82      0.88      0.85      1058

    accuracy                           0.85      2233
   macro avg       0.85      0.85      0.85      2233
weighted avg       0.85      0.85      0.85      2233



In [18]:
feature_importance = pd.DataFrame({
    "Feature": X.columns,
    "Importance": rf_model.feature_importances_
}).sort_values(by="Importance", ascending=False)

feature_importance.head(10)


Unnamed: 0,Feature,Importance
11,duration,0.3733
5,balance,0.0904
0,age,0.083807
10,month,0.07932
9,day,0.073381
8,contact,0.044885
13,pdays,0.044096
1,job,0.03888
12,campaign,0.034603
15,poutcome,0.031779
