In [None]:
import numpy as np
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.utils import Bunch

In [None]:
def decision_tree_model(dataSet, testData_percentage, max_depth):
    img_train, img_test, label_train, label_test = train_test_split(dataSet.data, dataSet.target, test_size=testData_percentage, random_state=0)
    
    classifier = DecisionTreeClassifier()
    
    #fitting classifier model
    classifier.fit(img_train, label_train)
    
    # classification_report outputs classification metrics
    # such as precision, recall and F1 score
    pred_result = classifier.predict(img_train)
    print('Classification Training Report: \n', classification_report(label_train, pred_result))
    
    # confusion_matrix outputs how many samples are correctly or incorrectly classified
    print('Train Confusion Matrix: \n', confusion_matrix(label_train, pred_result), "\n")

    # accuracy computes classification accuracy
    print('Train Accuracy: ', accuracy_score(label_train, pred_result), '\n')
    
    # testing with validate data
    validate_result = classifier.predict(img_test)
    print('Classification Testing Report: \n', classification_report(label_test, validate_result, zero_division=0))
    # `confusion_matrix` outputs how many samples are correctly or incorrectly classified
    print('Test Confusion Matrix: \n', confusion_matrix(label_test, validate_result), "\n")
    # `accuracy` computes classification accuracy
    print('Test Accuracy: ', accuracy_score(label_test, validate_result))

In [None]:
data_number = 2910
cropped_window_size = 200
# load data from .npz file
npdata = np.load(f'{data_number}_datasetC_{cropped_window_size}x{cropped_window_size}.npz')

print(npdata['data'].shape)
img = np.zeros(shape=(data_number, cropped_window_size ** 2))
for i in range(0, data_number): 
    img[i] = npdata['data'][i].reshape(-1) #flatten image - input data must be 1d-array
print(img.shape, "\n")

# make dataset in Bunch format 
dataset = Bunch(data = img, target=npdata['label'])

decision_tree_model(dataset, 0.2, 3)

(2910, 200, 200)
(2910, 40000) 

Classification Training Report: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       686
           1       1.00      1.00      1.00       625
           2       1.00      1.00      1.00      1017

    accuracy                           1.00      2328
   macro avg       1.00      1.00      1.00      2328
weighted avg       1.00      1.00      1.00      2328

Train Confusion Matrix: 
 [[ 686    0    0]
 [   0  625    0]
 [   0    0 1017]] 

Train Accuracy:  1.0 

Classification Testing Report: 
               precision    recall  f1-score   support

           0       0.70      0.74      0.72       184
           1       0.36      0.35      0.35       158
           2       0.62      0.60      0.61       240

    accuracy                           0.58       582
   macro avg       0.56      0.56      0.56       582
weighted avg       0.57      0.58      0.57       582

Test Confusion Matrix: 
 [[137  