In [10]:
#Block 1 – Load and Inspect Data

import pandas as pd
from sklearn.datasets import fetch_openml

bank = fetch_openml(name="BankMarketing", version=1, as_frame=True)
df = bank.frame

print("Dataset shape:", df.shape)
df.head()



Dataset shape: (41188, 21)


Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
1,57,services,married,high.school,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
2,37,services,married,high.school,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
3,40,admin.,married,basic.6y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
4,56,services,married,high.school,no,no,yes,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no


In [11]:
#Block 2 – Preprocessing

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

X = df.drop("y", axis=1)
y = df["y"]

for col in X.select_dtypes(include=["category", "object"]).columns:
    X[col] = LabelEncoder().fit_transform(X[col])

y = LabelEncoder().fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Training and testing sets created successfully.")



Training and testing sets created successfully.


In [12]:
#Block 3 – Train Decision Tree Classifier

from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

print("Full tree depth:", clf.get_depth())
print("Number of leaves:", clf.get_n_leaves())


Full tree depth: 27
Number of leaves: 2478


In [13]:
#Block 4 – Full Tree Visualization (Save as PDF)

import matplotlib.pyplot as plt
from sklearn.tree import plot_tree

plt.figure(figsize=(50, 30))  # large figure for readability
plot_tree(clf, feature_names=X.columns, class_names=["No", "Yes"], filled=True)
plt.savefig("full_decision_tree.pdf", bbox_inches="tight")
plt.close()

print("Full decision tree saved as 'full_decision_tree.pdf'")



Full decision tree saved as 'full_decision_tree.pdf'
