# Decision Tree Model Application on Custom Dataset

## Data Loading and Preprocessing

In [3]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

data = pd.read_csv('dataset.csv', delimiter=';')

X = data.drop(columns=['Target'])
y = data['Target']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

## Decision Tree Model Training and Evaluation

In [4]:
clf_gini = DecisionTreeClassifier(criterion='gini', random_state=42)
clf_gini.fit(X_train, y_train)

y_pred_train_gini = clf_gini.predict(X_train)
y_pred_test_gini = clf_gini.predict(X_test)

train_accuracy_gini = accuracy_score(y_train, y_pred_train_gini)
test_accuracy_gini = accuracy_score(y_test, y_pred_test_gini)

train_conf_matrix_gini = confusion_matrix(y_train, y_pred_train_gini)
test_conf_matrix_gini = confusion_matrix(y_test, y_pred_test_gini)

clf_entropy = DecisionTreeClassifier(criterion='entropy', random_state=42)
clf_entropy.fit(X_train, y_train)

y_pred_train_entropy = clf_entropy.predict(X_train)
y_pred_test_entropy = clf_entropy.predict(X_test)

train_accuracy_entropy = accuracy_score(y_train, y_pred_train_entropy)
test_accuracy_entropy = accuracy_score(y_test, y_pred_test_entropy)

train_conf_matrix_entropy = confusion_matrix(y_train, y_pred_train_entropy)
test_conf_matrix_entropy = confusion_matrix(y_test, y_pred_test_entropy)

# Collecting the results
decision_tree_results = {
    "Gini Criterion": {
        "Train Accuracy": train_accuracy_gini,
        "Test Accuracy": test_accuracy_gini,
        "Train Confusion Matrix": train_conf_matrix_gini,
        "Test Confusion Matrix": test_conf_matrix_gini
    },
    "Entropy Criterion": {
        "Train Accuracy": train_accuracy_entropy,
        "Test Accuracy": test_accuracy_entropy,
        "Train Confusion Matrix": train_conf_matrix_entropy,
        "Test Confusion Matrix": test_conf_matrix_entropy
    }
}

decision_tree_results


{'Gini Criterion': {'Train Accuracy': 1.0,
  'Test Accuracy': 0.6926553672316385,
  'Train Confusion Matrix': array([[1105,    0,    0],
         [   0,  643,    0],
         [   0,    0, 1791]], dtype=int64),
  'Test Confusion Matrix': array([[205,  60,  51],
         [ 33,  63,  55],
         [ 35,  38, 345]], dtype=int64)},
 'Entropy Criterion': {'Train Accuracy': 1.0,
  'Test Accuracy': 0.6903954802259887,
  'Train Confusion Matrix': array([[1105,    0,    0],
         [   0,  643,    0],
         [   0,    0, 1791]], dtype=int64),
  'Test Confusion Matrix': array([[220,  50,  46],
         [ 47,  54,  50],
         [ 39,  42, 337]], dtype=int64)}}