In [3]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, confusion_matrix
from sklearn.preprocessing import StandardScaler

data = pd.read_excel('dataset.xlsx')
X = data.drop('Class', axis=1)
y = data[['Class']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


tree_model = DecisionTreeClassifier()
tree_model.fit(X_train, y_train.values.ravel())

y_train_pred = tree_model.predict(X_train)
y_test_pred = tree_model.predict(X_test)


accuracy = accuracy_score(y_test, y_test_pred)
precision = precision_score(y_test, y_test_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)

confusion = confusion_matrix(y_test, y_test_pred)

print("Confusion matrix:")
print(confusion)

parameters = {'max_depth': [3, 5, 7], 'min_samples_leaf': [1, 5, 10]}
clf = GridSearchCV(DecisionTreeClassifier(), parameters)
clf.fit(X_train, y_train)

print("Best parameters:")
print(clf.best_params_)

best_model = DecisionTreeClassifier(max_depth=clf.best_params_['max_depth'], min_samples_leaf=clf.best_params_['min_samples_leaf'])
best_model.fit(X_train, y_train.values.ravel())

y_pred_best = best_model.predict(X_test)

accuracy_best = accuracy_score(y_test, y_pred_best)
precision_best = precision_score(y_test, y_pred_best, average='weighted')

print("Accuracy with best parameters:", accuracy_best)
print("Precision with best parameters:", precision_best)

Accuracy: 0.8939764936336925
Precision: 0.8940121400465778
Confusion matrix:
[[358   0  24   0   2   2   9]
 [  0 161   0   0   0   0   0]
 [ 25   0 432   0  14   3   5]
 [  1   0   0 924   4  22  92]
 [  4   0  14   5 551   0  14]
 [  6   0   1  30   0 564  18]
 [  5   0   4  87  17  25 661]]
Best parameters:
{'max_depth': 7, 'min_samples_leaf': 1}
Accuracy with best parameters: 0.9052399608227228
Precision with best parameters: 0.905214714132882
