In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import export_text
from sklearn import tree

In [2]:
 # Load the dataset
bank_data = pd.read_csv("bank-additional-full.csv", sep=';')
bank_data.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
1,57,services,married,high.school,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
2,37,services,married,high.school,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
3,40,admin.,married,basic.6y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
4,56,services,married,high.school,no,no,yes,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no


In [3]:
# Data preprocessing
# Encoding categorical variables
le = LabelEncoder()
bank_data['job'] = le.fit_transform(bank_data['job'])
bank_data['marital'] = le.fit_transform(bank_data['marital'])
bank_data['education'] = le.fit_transform(bank_data['education'])
bank_data['default'] = le.fit_transform(bank_data['default'])
bank_data['housing'] = le.fit_transform(bank_data['housing'])
bank_data['loan'] = le.fit_transform(bank_data['loan'])
bank_data['contact'] = le.fit_transform(bank_data['contact'])
bank_data['month'] = le.fit_transform(bank_data['month'])
bank_data['day_of_week'] = le.fit_transform(bank_data['day_of_week'])
bank_data['poutcome'] = le.fit_transform(bank_data['poutcome'])
bank_data['y'] = le.fit_transform(bank_data['y'])

In [4]:
# Define the inputs and target
X = bank_data.drop(columns=['y']).values
y = bank_data['y'].values

In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [6]:
# Train a decision tree classifier
clf = DecisionTreeClassifier(random_state=1)
clf.fit(X_train, y_train)

In [7]:
# Test the classifier
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

In [8]:
print(accuracy)

0.8889293517844137


In [9]:
# Visualize the decision tree
tree_rules = export_text(clf, feature_names=list(X[0]))
print("\nDecision Tree Rules:")
print(tree_rules)


Decision Tree Rules:
|--- 5191.0 <= 5087.65
|   |--- 261.0 <= 172.50
|   |   |--- 999.0 <= 6.50
|   |   |   |--- 261.0 <= 136.50
|   |   |   |   |--- -36.4 <= -30.75
|   |   |   |   |   |--- 4.857 <= 1.24
|   |   |   |   |   |   |--- 4.857 <= 0.64
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- 4.857 >  0.64
|   |   |   |   |   |   |   |--- 56.0 <= 53.50
|   |   |   |   |   |   |   |   |--- 999.0 <= 1.00
|   |   |   |   |   |   |   |   |   |--- 4.857 <= 0.89
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- 4.857 >  0.89
|   |   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |   |--- 999.0 >  1.00
|   |   |   |   |   |   |   |   |   |--- 4.857 <= 0.83
|   |   |   |   |   |   |   |   |   |   |--- 4.857 <= 0.81
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |   |   |--- 4.857 >  0.81
|   |   |   |   |   |   |   |   |   |   |   |--- 