In [None]:
import pandas
import numpy
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [None]:
df= pandas.read_csv("bank.csv",delimiter = ";")

In [None]:
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,no
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,no
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,no
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,no
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,no


In [None]:
df['contact_time'] = df['month'] + '_' + df['day'].astype(str)

In [None]:
df['pdays'] = df['pdays'].apply(lambda x: 0 if x == -1 else 1)

In [None]:

job_mapping = {
    'student': 'low_income',
    'unemployed': 'low_income',
    'unknown': 'low_income',
    'housemaid': 'lower_middle_income',
    'technician': 'lower_middle_income',
    'blue-collar': 'lower_middle_income',
    'management': 'high_income',
    'admin.': 'high_income',
    'services': 'low_income',
    'retired': 'high_income',
    'self-employed': 'low_income',
    'entrepreneur': 'high_income'
}


df['job_category'] = df['job'].map(job_mapping)

print(df['job_category'].value_counts())


high_income            1845
lower_middle_income    1826
low_income              850
Name: job_category, dtype: int64


In [None]:
df["contact"].value_counts()

cellular     2896
unknown      1324
telephone     301
Name: contact, dtype: int64

In [None]:
df['pdays'] = df['pdays'].apply(lambda x: 0 if x == -1 else 1)

In [None]:

education_mapping = {
    'primary': 'primary',
    'unknown': 'primary',
    'secondary': 'secondary',
    'tertiary': 'tertiary'
}

df['education_category'] = df['education'].map(education_mapping)


print(df['education_category'].value_counts())


secondary    2306
tertiary     1350
primary       865
Name: education_category, dtype: int64


In [None]:
balance_bins = [-numpy.inf, 0, 1000, 5000, 10000, numpy.inf]
balance_labels = ['negative', 'low', 'medium', 'high', 'very_high']
df['balance_category'] = pandas.cut(df['balance'], bins=balance_bins, labels=balance_labels)

In [None]:
df["poutcome"].value_counts()

unknown    3705
failure     490
other       197
success     129
Name: poutcome, dtype: int64

In [None]:
columns_drop = ["age",'job','education','balance','contact', 'day', 'month',"poutcome","previous"]
df = df.drop(columns_drop,axis=1)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4521 entries, 0 to 4520
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   marital             4521 non-null   int32  
 1   default             4521 non-null   int32  
 2   housing             4521 non-null   int32  
 3   loan                4521 non-null   int32  
 4   duration            4521 non-null   float64
 5   campaign            4521 non-null   float64
 6   pdays               4521 non-null   int64  
 7   y                   4521 non-null   int32  
 8   contact_time        4521 non-null   int32  
 9   job_category        4521 non-null   int32  
 10  education_category  4521 non-null   int32  
 11  balance_category    4521 non-null   int32  
dtypes: float64(2), int32(9), int64(1)
memory usage: 265.0 KB


In [None]:
categorical_cols = df.select_dtypes(include=['object',"category"]).columns

In [None]:

label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [None]:
df.sample(5)

Unnamed: 0,marital,default,housing,loan,duration,campaign,pdays,y,contact_time,job_category,education_category,balance_category
2134,1,0,1,1,-0.631038,-0.576829,1,0,199,0,1,1
1148,2,0,0,0,-0.226924,0.066368,1,0,138,0,2,1
1080,1,0,1,0,-0.138404,0.066368,1,0,190,2,1,1
2111,2,0,0,0,-0.869658,4.247155,1,0,40,0,2,3
4004,2,0,0,0,0.11561,0.066368,1,0,31,2,1,1


In [None]:
scaler = StandardScaler()
df[['duration', 'campaign']] = scaler.fit_transform(df[[ 'duration', 'campaign']])

In [None]:
df["y"].value_counts()

0    4000
1     521
Name: y, dtype: int64

In [None]:

from sklearn.model_selection import train_test_split
X = df.drop('y', axis=1)
y = df['y']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:

from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)


In [None]:

from sklearn.metrics import classification_report, accuracy_score
y_pred = dt.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")


Accuracy: 0.87
