# Decision Tree

In [203]:
from sklearn.tree import DecisionTreeClassifier
import numpy as np
import pandas as pd
from sklearn.model_selection import (
    cross_val_score,
    KFold,
    cross_val_predict,
    GridSearchCV,
    cross_validate
)
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    precision_score,
    f1_score,
    recall_score,
    classification_report,
)
import matplotlib.pyplot as plt
import seaborn as sns

In [204]:
def train_test_validation_split(
    x: pd.DataFrame,
    y: pd.Series,
    train_size: float = 0.7,
    test_size: float = 0.15,
    validation_size: float = 0.15,
) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.Series, pd.Series, pd.Series]:
    """Split data into fixed train, test and validation subsets."""
    x_train, x_test, y_train, y_test = train_test_split(
        x,
        y,
        train_size=train_size,
        test_size=test_size + validation_size,
        random_state=42,
    )
    x_test, x_validation, y_test, y_validation = train_test_split(
        x_test,
        y_test,
        train_size=test_size / (test_size + validation_size),
        test_size=validation_size / (test_size + validation_size),
        random_state=42,
    )
    return x_train, x_test, x_validation, y_train, y_test, y_validation

In [205]:
df: pd.DataFrame = pd.read_csv("../Vectores_Caracteristicos_Mariposas.csv")
x: pd.DataFrame = df.drop("Etiqueta", axis=1)
y: pd.Series = df["Etiqueta"].astype(int)

(
    x_train,
    x_test,
    x_validation,
    y_train,
    y_test,
    y_validation,
) = train_test_validation_split(x, y)

# 5-Fold Cross Validation

In [206]:
dtc = DecisionTreeClassifier(criterion="entropy", max_depth=10, random_state=42)

scores: dict = cross_validate(
    dtc,
    x,
    y,
    cv=5,
    scoring=["precision_macro", "recall_macro", "f1_macro"],
    n_jobs=5,
)
a = pd.DataFrame(scores)
print(a)

   fit_time  score_time  test_precision_macro  test_recall_macro  \
0  0.439038    0.008841              0.324597           0.304844   
1  0.459321    0.008915              0.382698           0.358748   
2  0.454387    0.008968              0.318687           0.308957   
3  0.461971    0.009209              0.399177           0.365146   
4  0.442946    0.008650              0.371769           0.366518   

   test_f1_macro  
0       0.309625  
1       0.362150  
2       0.311002  
3       0.365168  
4       0.362950  
