In [None]:
import pandas as pd
import zipfile
import requests
from io import BytesIO
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# URL of the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank.zip'

# Download and extract the dataset
response = requests.get(url)
with zipfile.ZipFile(BytesIO(response.content)) as z:
    file_names = z.namelist()
    print("Files in the zip:", file_names)
    df = pd.read_csv(z.open(file_names[0]), sep=';')

# Display initial columns to identify the target variable
print("Available columns:")
print(df.columns.tolist())

# Set the correct target column name
target_column = 'y'
print("Columns before encoding:", df.columns.tolist())

# Isolate the target column
y = df[target_column]

# One-hot encode the rest of the dataset
df = pd.get_dummies(df.drop(columns=[target_column]))  # Drop target column before encoding

# Ensure the target column is not lost after preprocessing
print("Columns after preprocessing:")
print(df.columns.tolist())

# Separate features and target variable
X = df  # Features remain as they are after encoding

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train the classifier
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the model
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(classification_report(y_test, y_pred))


In [None]:
# We have already imported necessary libraries and preprocessed the data.
# X contains the features after one-hot encoding.
# y is the target variable 'y'.

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt

# 1. Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 2. Initialize the Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)

# 3. Train the classifier
clf.fit(X_train, y_train)

# 4. Make predictions on the test set
y_pred = clf.predict(X_test)

# 5. Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

print('Classification Report:')
print(classification_report(y_test, y_pred))

print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))

# 6. Plot the decision tree
plt.figure(figsize=(20, 10))
plot_tree(clf, filled=True, feature_names=X.columns, class_names=clf.classes_, rounded=True)
plt.title('Decision Tree')
plt.show()
