# Decision Tree Classifier

In [1]:
#Logistic Re
import numpy as np
from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import roc_auc_score, average_precision_score, log_loss
from sklearn.metrics import cohen_kappa_score, confusion_matrix, brier_score_loss
from sklearn.metrics import matthews_corrcoef, fowlkes_mallows_score, r2_score
from sklearn.preprocessing import label_binarize
from sklearn.metrics import precision_recall_curve, roc_curve, top_k_accuracy_score

# Load the forest cover type dataset
data = fetch_covtype()
X, y = data.data, data.target

# Adjust labels to be in the range 0 to 6
y = y - 1
num_classes = len(set(y))

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train a Decision Tree model
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

# Make predictions with both models
y_pred_dt = dt_model.predict(X_test)
y_prob_dt = dt_model.predict_proba(X_test)

# Define a function to calculate and print metrics
def print_metrics(y_test, y_pred, y_prob, model_name):
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro', zero_division=1)
    recall = recall_score(y_test, y_pred, average='macro', zero_division=1)
    f1 = f1_score(y_test, y_pred, average='macro', zero_division=1)
    roc_auc = roc_auc_score(label_binarize(y_test, classes=range(num_classes)), y_prob, multi_class='ovr')
    pr_auc = average_precision_score(label_binarize(y_test, classes=range(num_classes)), y_prob, average='macro')
    log_loss_value = log_loss(y_test, y_prob)
    kappa = cohen_kappa_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    brier_scores = [brier_score_loss(y_test == i, y_prob[:, i]) for i in range(num_classes)]
    brier_score_avg = sum(brier_scores) / num_classes

    mcc = matthews_corrcoef(y_test, y_pred)
    fmi = fowlkes_mallows_score(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    top_k_acc = top_k_accuracy_score(y_test, y_prob, k=3)

    print(f"Metrics for {model_name}:")
    print(f"Accuracy: {accuracy:.3f}")
    print(f"Precision: {precision:.3f}")
    print(f"Recall: {recall:.3f}")
    print(f"F1-Score: {f1:.3f}")
    print(f"AUC-ROC: {roc_auc:.3f}")
    print(f"AUC-PR: {pr_auc:.3f}")
    print(f"Log Loss: {log_loss_value:.3f}")
    print(f"Cohen's Kappa: {kappa:.3f}")
    print(f"Confusion Matrix:\n{conf_matrix}")
    print(f"Average Brier Score: {brier_score_avg:.3f}")
    print(f"Matthews Correlation Coefficient: {mcc:.3f}")
    print(f"Fowlkes-Mallows Index: {fmi:.3f}")
    print(f"Coefficient of Determination (R^2): {r2:.3f}")
    print(f"Top-3 Accuracy: {top_k_acc:.3f}")
    print("\n")

# Print metrics for both models
print_metrics(y_test, y_pred_dt, y_prob_dt, "Decision Tree")

Metrics for Decision Tree:
Accuracy: 0.939
Precision: 0.903
Recall: 0.900
F1-Score: 0.902
AUC-ROC: 0.943
AUC-PR: 0.824
Log Loss: 2.203
Cohen's Kappa: 0.902
Confusion Matrix:
[[39905  2421     1     0    36     4   190]
 [ 2389 53545   165     1   261   100    39]
 [    4   123  6650    52    16   274     2]
 [    0     2    71   428     0    25     0]
 [   48   252    26     0  1658    10     1]
 [    8   102   253    25     7  3094     0]
 [  164    29     0     0     1     0  3821]]
Average Brier Score: 0.017
Matthews Correlation Coefficient: 0.902
Fowlkes-Mallows Index: 0.894
Coefficient of Determination (R^2): 0.848
Top-3 Accuracy: 0.944


