In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import numpy as np
from google.colab import files
import io

uploaded = files.upload()

if 'bank.csv' in uploaded:
    FILE_CONTENT = io.BytesIO(uploaded['bank.csv'])
    df = pd.read_csv(FILE_CONTENT, sep=';')
else:
    raise SystemExit("Error: 'bank.csv' was not found in the uploaded files.")

df['y'] = df['y'].map({'no': 0, 'yes': 1})

categorical_features = df.select_dtypes(include='object').columns.tolist()

df_encoded = pd.get_dummies(df, columns=categorical_features, drop_first=True)

X = df_encoded.drop('y', axis=1)
y = df_encoded['y']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

dt_classifier = DecisionTreeClassifier(max_depth=5, random_state=42)
dt_classifier.fit(X_train, y_train)
y_pred = dt_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['No Subscription', 'Subscription']))

feature_importance = pd.Series(dt_classifier.feature_importances_, index=X.columns)
top_10_features = feature_importance.nlargest(10)

print("\n--- Top 10 Most Important Features ---")
print(top_10_features.to_markdown(floatfmt=".4f"))

Saving bank.csv to bank.csv
Model Accuracy: 0.8917

Classification Report:
                 precision    recall  f1-score   support

No Subscription       0.91      0.97      0.94       801
   Subscription       0.55      0.31      0.40       104

       accuracy                           0.89       905
      macro avg       0.73      0.64      0.67       905
   weighted avg       0.87      0.89      0.88       905


--- Top 10 Most Important Features ---
|                  |      0 |
|:-----------------|-------:|
| duration         | 0.5194 |
| poutcome_success | 0.2244 |
| month_oct        | 0.0586 |
| pdays            | 0.0571 |
| age              | 0.0444 |
| day              | 0.0436 |
| balance          | 0.0245 |
| month_mar        | 0.0145 |
| contact_unknown  | 0.0136 |
| campaign         | 0.0000 |
