In [7]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
import io
import requests
import zipfile



In [8]:

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank-additional.zip"

response = requests.get(url)

with zipfile.ZipFile(io.BytesIO(response.content)) as z:

    csv_data = z.read('bank-additional/bank-additional-full.csv').decode('utf-8')
    df = pd.read_csv(io.StringIO(csv_data), sep=';')



In [9]:


label_encoder = LabelEncoder()
categorical_columns = df.select_dtypes(include=['object']).columns

for col in categorical_columns:
    df[col] = label_encoder.fit_transform(df[col])

X = df.drop('y', axis=1)  # Features (all columns except 'y')
y = df['y']  # Target variable ('y' column)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)


accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.89
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.94      0.94     10968
           1       0.51      0.51      0.51      1389

    accuracy                           0.89     12357
   macro avg       0.72      0.73      0.72     12357
weighted avg       0.89      0.89      0.89     12357

