In [1]:
#Task3
import pandas as pd
import requests
from zipfile import ZipFile
from io import BytesIO
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the dataset from the ZIP file
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank-additional.zip"

# Specify the file path within the ZIP archive
file_path = "bank-additional/bank-additional-full.csv"

# Fetch the ZIP file
resp = requests.get(url)

# Read the ZIP file contents
with ZipFile(BytesIO(resp.content)) as z:
    with z.open(file_path) as f:
        # Read the CSV file from the ZIP archive
        df = pd.read_csv(f, sep=';')

# Data preprocessing
# Perform feature engineering, handling missing values, encoding categorical variables, etc.
# Here, we'll perform one-hot encoding for categorical variables
df = pd.get_dummies(df, drop_first=True)

# Splitting the data into features and target variable
X = df.drop(columns=['y_yes'])  # Features
y = df['y_yes']  # Target variable

# Splitting the dataset into the training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating the decision tree classifier
clf = DecisionTreeClassifier(random_state=42)

# Training the decision tree classifier
clf.fit(X_train, y_train)

# Making predictions on the test set
y_pred = clf.predict(X_test)

# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Classification report
print("Classification Report:\n", classification_report(y_test, y_pred))

# Confusion matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.8868657441126487
Classification Report:
               precision    recall  f1-score   support

       False       0.94      0.93      0.94      7303
        True       0.50      0.52      0.51       935

    accuracy                           0.89      8238
   macro avg       0.72      0.73      0.72      8238
weighted avg       0.89      0.89      0.89      8238

Confusion Matrix:
 [[6817  486]
 [ 446  489]]
