In [1]:
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from src.read_data import read_file
from src.create_train_test_split import split_predictor_and_target
from src.xgboost_model import xgboost_classifier_model

# Load Data

In [2]:
# load the train and test data
df_train = read_file(folder="processed", filename="iris_train_processed.csv", delimiter=",")
df_test = read_file(folder="processed", filename="iris_test_processed.csv", delimiter=",")

# Split Data

In [3]:
# split processed training set
X_train, y_train, columns_X_train, columns_y_train = split_predictor_and_target(df=df_train)
# split processed test set
X_test, y_test, columns_X_test, columns_y_test = split_predictor_and_target(df=df_test)

# XG Boost Classifier

In [4]:
# get the best xg boost model using grid search
xgb_model = xgboost_classifier_model(X=X_train, y=y_train)

In [5]:
# predict on test set using the obtained model
y_pred_xgb = xgb_model.predict(X_test)

In [6]:
# calculate accuracy of xgb classifier
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
species_names = list(columns_y_train)
print("Accuracy:", accuracy_xgb)
print(50*'-')
# print the classification report
print("Classification Report XGBoost:")
print({0: species_names[0], 1: species_names[1], 2: species_names[2]})
print(classification_report(y_test, y_pred_xgb))

Accuracy: 0.9333333333333333
--------------------------------------------------
Classification Report XGBoost:
{0: 'species_setosa', 1: 'species_versicolor', 2: 'species_virginica'}
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.90      0.95        10
           2       0.90      0.90      0.90        10

   micro avg       0.97      0.93      0.95        30
   macro avg       0.97      0.93      0.95        30
weighted avg       0.97      0.93      0.95        30
 samples avg       0.93      0.93      0.93        30



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


**Precision:** The precision of a class is the ratio of true positives to the sum of true positives and false positives. It measures the accuracy of positive predictions. In other words: Given we predict for example setosa, how accurate is the classifier.

**Recall:** The recall of a class is the ratio of true positives to the sum of true positives and false negatives. It measures the ability of the classifier to find all positive instances. Or in other words, did we get all the observations of a specific iris class?

**F1-score:** The F1-score is the harmonic mean of precision and recall. It provides a balance between precision and recall.

**Support:** The support is the number of actual occurrences of the class in the test set.