In [10]:
# 1. Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import zipfile
import os

# Assuming the CSV file inside is named 'data_banknote_authentication.txt'
# and is comma-separated, without a header. Define column names.
file_path = '/content/data_banknote_authentication.txt'
column_names = ['variance', 'skewness', 'curtosis', 'entropy', 'class']
data = pd.read_csv(file_path, sep=',', header=None, names=column_names)

# 3. Encode categorical variables - Not needed for this dataset as it's purely numerical
# The original code might have been intended for a different dataset (e.g., Bank Marketing)
# If there were categorical columns, LabelEncoder would be applied here.
# For this dataset, this loop will likely find no 'object' columns.
# le = LabelEncoder()
# for col in data.select_dtypes(include='object').columns:
#     data[col] = le.fit_transform(data[col])

# 4. Split features and target
X = data.drop('class', axis=1)   # Features
y = data['class']                # Target ('class' column in this dataset)

# 5. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 6. Build Decision Tree model
dt_model = DecisionTreeClassifier(
    criterion='gini',
    max_depth=5,
    random_state=42
)

# 7. Train the model
dt_model.fit(X_train, y_train)

# 8. Make predictions
y_pred = dt_model.predict(X_test)

# 9. Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9672727272727273

Confusion Matrix:
 [[148   0]
 [  9 118]]

Classification Report:
               precision    recall  f1-score   support

           0       0.94      1.00      0.97       148
           1       1.00      0.93      0.96       127

    accuracy                           0.97       275
   macro avg       0.97      0.96      0.97       275
weighted avg       0.97      0.97      0.97       275

