# 🎯 Bank Marketing: Predict If Customer Will Buy Product (Decision Tree Classifier)

Dataset: `bank.csv` (10% sample)

**Goal**: Predict whether a customer will buy a product (subscribe to a term deposit)


In [None]:
# ✅ Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier, plot_tree, export_text
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
# ✅ Step 2: Load Dataset
df = pd.read_csv("bank.csv", sep=';')
df.head()

In [None]:
# ✅ Step 3: Simplify Target Labels for Readability
df['y'] = df['y'].map({
    'yes': 'Will Buy Product',
    'no': 'Will Not Buy Product'
})

In [None]:
# ✅ Step 4: Encode All Categorical Features (Including New Target)
le_y = LabelEncoder()
df['y'] = le_y.fit_transform(df['y'])

for col in df.select_dtypes(include='object').columns:
    df[col] = LabelEncoder().fit_transform(df[col])

In [None]:
# ✅ Step 5: Train-Test Split
X = df.drop('y', axis=1)
y = df['y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# ✅ Step 6: Train Decision Tree
clf = DecisionTreeClassifier(max_depth=3, random_state=42)
clf.fit(X_train, y_train)

In [None]:
# ✅ Step 7: Evaluate Model
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le_y.classes_))

In [None]:
# ✅ Step 8: Visualize Decision Tree
%matplotlib inline
plt.figure(figsize=(24,12))
plot_tree(clf, 
          feature_names=X.columns, 
          class_names=le_y.classes_, 
          filled=True, rounded=True, fontsize=12)
plt.title("Decision Tree: Will Customer Buy the Product?", fontsize=16)
plt.show()

In [None]:
# ✅ Step 9: Show Decision Rules (on what basis)
rules = export_text(clf, feature_names=list(X.columns), max_depth=3)
print(rules)

In [None]:
# ✅ Step 10: Show Feature Importance
feature_importance = pd.Series(clf.feature_importances_, index=X.columns)
feature_importance.sort_values().plot(kind='barh', figsize=(10,6))
plt.title("Feature Importance: What Influences Buying Decision")
plt.xlabel("Importance")
plt.show()