Training the model

In [1]:
#Import libraries
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
import joblib

In [2]:
#Load the data
DATA_PATH = "../../../../../data/processed/breast_cancer"

#Train data
npz = np.load(DATA_PATH + '/train_data.npz',  allow_pickle=True)
train_inputs, train_labels = npz['inputs'].astype(np.int64), npz['labels'].astype(np.int64)

#Validation data
npz = np.load(DATA_PATH + '/val_data.npz',  allow_pickle=True)
validation_inputs, validation_labels = npz['inputs'].astype(np.int64), npz['labels'].astype(np.int64)

#Test data
npz = np.load(DATA_PATH + '/test_data.npz',  allow_pickle=True)
test_inputs, test_labels = npz['inputs'].astype(np.int64), npz['labels'].astype(np.int64)

In [3]:
#Model building and training
MAX_DEPTH = 10

model = DecisionTreeClassifier(max_depth=MAX_DEPTH, random_state=42)
model.fit(train_inputs, train_labels)

Evaluate model

In [4]:
#Test model with train, val, and test inputs
y_train_pred = model.predict(train_inputs)
y_val_pred = model.predict(validation_inputs)
y_test_pred = model.predict(test_inputs)

In [5]:
#Make reports

#Train set report
train_model_report = classification_report(train_labels, y_train_pred)

#Validation set report
val_model_report = classification_report(validation_labels, y_val_pred)

#test set report
test_model_report = classification_report(test_labels, y_test_pred)

In [6]:
#Train set report
print(train_model_report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       270
           1       1.00      1.00      1.00       139

    accuracy                           1.00       409
   macro avg       1.00      1.00      1.00       409
weighted avg       1.00      1.00      1.00       409



In [7]:
#Validation set report
print(val_model_report)

              precision    recall  f1-score   support

           0       0.94      0.98      0.96        89
           1       0.95      0.88      0.91        48

    accuracy                           0.94       137
   macro avg       0.95      0.93      0.93       137
weighted avg       0.94      0.94      0.94       137



In [8]:
#Test set report
print(test_model_report)

              precision    recall  f1-score   support

           0       0.93      0.96      0.95        85
           1       0.94      0.88      0.91        52

    accuracy                           0.93       137
   macro avg       0.94      0.92      0.93       137
weighted avg       0.93      0.93      0.93       137



In [9]:
#Save the model
SAVE_MODEL_PATH = '../../../../../models/breast_cancer/decision_tree'
joblib.dump(model, SAVE_MODEL_PATH + '/breast_cancel_detection.pkl')

['../../../../../models/breast_cancer/decision_tree/breast_cancel_detection.pkl']