# Reading Dataset

In [287]:
import pandas as pd

In [288]:
data = pd.read_csv("/content/bank.csv", sep=";")

In [None]:
data

# Understanding the Dataset

In [None]:
# Dataset shape (rows,columns)
print(data.shape)

In [None]:
# Total columns
print(len(data.columns))

In [None]:
# Columns name
print(data.columns.tolist())

In [None]:
# Check null values in the dataset
print(data.isnull().sum().sum())

In [None]:
# Column names, non-null count, and data types
data.info()

# Data Preprocessing

In [295]:
# Separate features and target
X = data.drop('y', axis=1)
y = data['y']

In [296]:
# One-Hot Encode categorical features
X = pd.get_dummies(X, drop_first=False, dtype=int)

In [None]:
# Target
y

In [None]:
# Target distribution
y.value_counts()

In [None]:
# Target distribution proportion
y.value_counts(normalize=True)*100

In [300]:
from imblearn.over_sampling import SMOTE

In [301]:
# Apply SMOTE to balance dataset
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

In [None]:
# Target distribution after SMOTE
y_resampled.value_counts()

In [None]:
# Target distribution proportion after SMOTE
y_resampled.value_counts(normalize=True)*100

In [304]:
from sklearn.model_selection import train_test_split

In [305]:
# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.3, random_state=42, stratify=y_resampled)

In [None]:
# Training sample
X_train.shape[0]

In [None]:
# Testing sample
X_test.shape[0]

# Model Training

In [308]:
from sklearn.tree import DecisionTreeClassifier

In [309]:
# Train Decision Tree
model = DecisionTreeClassifier(max_depth=4, random_state=42)
model.fit(X_train, y_train)

# Prediction & Accuracy

In [310]:
# Predictions
y_pred = model.predict(X_test)

In [None]:
print(y_pred)

In [312]:
from sklearn.metrics import accuracy_score, classification_report

In [None]:
#Accuracy
print("\nAccuracy:", round(accuracy_score(y_test, y_pred), 4)*100)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['No', 'Yes']))

In [314]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['No','Yes'], yticklabels=['No','Yes'])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix (with SMOTE)")
plt.show()

In [316]:
from sklearn.tree import plot_tree

In [None]:
# Visualize Decision Tree
plt.figure(figsize=(20,10))
plot_tree(model, feature_names=X.columns, class_names=['No', 'Yes'], filled=True)
plt.show()

In [318]:
from sklearn.model_selection import cross_val_score, StratifiedKFold

In [None]:
# Cross-Validation (Stratified K-Fold)
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(model, X_resampled, y_resampled, cv=cv, scoring='accuracy')

print("\nCross-validation Scores:", scores*100)
print("Mean Cross-Validation Accuracy:", scores.mean()*100)