In [None]:
# Task 3: Decision Tree Classifier on Bank Marketing Dataset

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [None]:
df = pd.read_csv(r"C:\Users\jg275\Downloads\bank+marketing\bank-additional\bank-additional\bank-additional-full.csv", encoding='latin1',sep=";")
print(df.head())
print(df.info())
print(df['y'].value_counts())

In [None]:
print("Missing values per column:")
print(df.isnull().sum())

In [None]:
# Data Preprocessing
# Encode all categorical columns
le = LabelEncoder()
for col in df.select_dtypes(include='object').columns:
    df[col] = le.fit_transform(df[col])

In [None]:
# Define Features and Target
X = df.drop('y', axis=1)
y = df['y']  # 0 = no, 1 = yes
#Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build Decision Tree Classifier
dt_model = DecisionTreeClassifier(criterion='gini', max_depth=5, random_state=42)
dt_model.fit(X_train, y_train)

# Evaluate the Model

y_pred = dt_model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

In [None]:
# Visualize the Tree
plt.figure(figsize=(20,10))
plot_tree(dt_model, feature_names=X.columns, class_names=['No', 'Yes'], filled=True)
plt.title("Decision Tree for Bank Marketing")
plt.show()

In [None]:
# Feature Importance
feat_imp = pd.Series(dt_model.feature_importances_, index=X.columns)
feat_imp.sort_values(ascending=False).plot(kind='bar', title='Feature Importances', color='teal')
plt.ylabel('Importance Score')
plt.show()