In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
df=pd.read_csv("bank-additional-full.csv",sep=';')
df.head()

In [None]:
df.info()
print("\nTarget variable distribution:\n",df['y'].value_counts())


In [13]:
df_encoded=df.copy()
le=LabelEncoder()
for column in df_encoded.select_dtypes(include='object').columns:
 df_encoded[column]=le.fit_transform(df_encoded[column])

In [17]:
X = df_encoded.drop('y',axis=1)
y = df_encoded['y']

In [18]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=5)

In [None]:
model=DecisionTreeClassifier(random_state=5)
model.fit(X_train,y_train)

In [None]:
y_pred=model.predict(X_test)
print("Accuracy:",accuracy_score(y_test,y_pred))
print("\nClassification Report\n",classification_report(y_test,y_pred))
print("\nConfused Matrix:\n",confusion_matrix(y_test,y_pred))

In [None]:
plt.figure(figsize=(30,10))
plot_tree(model,feature_names=X.columns,class_names=["NO","YES"],filled=True,max_depth=3)
plt.show()

In [None]:
feature_importances = pd.Series(model.feature_importances_, index=X.columns)
top_feature = feature_importances.idxmax()
print(" Most important feature influencing the decision:", top_feature)