# Decision Tree Classifier - Arbol de Decision

En este notebook se implementará el algoritmo de clasificación de árbol de decisión.

In [None]:
# Instalación de librerías
%pip install -r '../requirements.txt'

In [None]:
# Importación de librerías
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score, precision_score, recall_score

In [None]:
# Carga de datos
dataset_entrenamiento = pd.read_csv(
    '../5-dataset-final/dist/dataset-train.csv')
dataset_pruebas = pd.read_csv('../5-dataset-final/dist/dataset-test.csv')
dataset_validacion = pd.read_csv(
    '../5-dataset-final/dist/dataset-validation.csv')

In [None]:
# Eliminar valores nulos
dataset_entrenamiento = dataset_entrenamiento.dropna()
dataset_pruebas = dataset_pruebas.dropna()
dataset_validacion = dataset_validacion.dropna()

In [None]:
# Ver cantidad de registros
print(f'Dataset de entrenamiento: {len(dataset_entrenamiento)} registros')
print(f'Dataset de pruebas: {len(dataset_pruebas)} registros')
print(f'Dataset de validación: {len(dataset_validacion)} registros')

In [None]:
# División de datos
# Entrenamiento
X_train = dataset_entrenamiento.drop(columns=['sentiment'])
y_train = dataset_entrenamiento['sentiment']
# Pruebas
X_test = dataset_pruebas.drop(columns=['sentiment'])
y_test = dataset_pruebas['sentiment']
# Validación
X_val = dataset_validacion.drop(columns=['sentiment'])
y_val = dataset_validacion['sentiment']

In [None]:
# Modelo
modelo_entropy = DecisionTreeClassifier(criterion='entropy', random_state=0)
modelo_gini = DecisionTreeClassifier(criterion='gini', random_state=0)

In [None]:
# Entrenamiento
modelo_entropy.fit(X_train, y_train)
modelo_gini.fit(X_train, y_train)

In [None]:
# Predicción con datos de entrenamiento
y_pred_entropy = modelo_entropy.predict(X_train)
y_pred_gini = modelo_gini.predict(X_train)

# Métricas
print('Entrenamiento')
print('Entropy')
print(classification_report(y_train, y_pred_entropy))
print('Matriz de confusión')
print(confusion_matrix(y_train, y_pred_entropy))
print(f'Accuracy: {accuracy_score(y_train, y_pred_entropy)}')
print('F1 Score', f1_score(y_train, y_pred_entropy))
print('Precision', precision_score(y_train, y_pred_entropy))
print('Recall', recall_score(y_train, y_pred_entropy))
print('Gini')
print(classification_report(y_train, y_pred_gini))
print('Matriz de confusión')
print(confusion_matrix(y_train, y_pred_gini))
print(f'Accuracy: {accuracy_score(y_train, y_pred_gini)}')
print('F1 Score', f1_score(y_train, y_pred_gini))
print('Precision', precision_score(y_train, y_pred_gini))
print('Recall', recall_score(y_train, y_pred_gini))


In [None]:
# Predicción con datos de pruebas
y_pred_entropy = modelo_entropy.predict(X_test)
y_pred_gini = modelo_gini.predict(X_test)

# Métricas
print('Pruebas')
print('Entropy')
print(classification_report(y_test, y_pred_entropy))
print('Matriz de confusión')
print(confusion_matrix(y_test, y_pred_entropy))
print(f'Accuracy: {accuracy_score(y_test, y_pred_entropy)}')
print('F1 Score', f1_score(y_test, y_pred_entropy))
print('Precision', precision_score(y_test, y_pred_entropy))
print('Recall', recall_score(y_test, y_pred_entropy))
print('Gini')
print(classification_report(y_test, y_pred_gini))
print('Matriz de confusión')
print(confusion_matrix(y_test, y_pred_gini))
print(f'Accuracy: {accuracy_score(y_test, y_pred_gini)}')
print('F1 Score', f1_score(y_test, y_pred_gini))
print('Precision', precision_score(y_test, y_pred_gini))
print('Recall', recall_score(y_test, y_pred_gini))


In [None]:
# Predicción con datos de validación
y_pred_entropy = modelo_entropy.predict(X_val)
y_pred_gini = modelo_gini.predict(X_val)

# Métricas
print('Validación')
print('Entropy')
print(classification_report(y_val, y_pred_entropy))
print('Matriz de confusión')
print(confusion_matrix(y_val, y_pred_entropy))
print(f'Accuracy: {accuracy_score(y_val, y_pred_entropy)}')
print('F1 Score', f1_score(y_val, y_pred_entropy))
print('Precision', precision_score(y_val, y_pred_entropy))
print('Recall', recall_score(y_val, y_pred_entropy))
print('Gini')
print(classification_report(y_val, y_pred_gini))
print('Matriz de confusión')
print(confusion_matrix(y_val, y_pred_gini))
print(f'Accuracy: {accuracy_score(y_val, y_pred_gini)}')
print('F1 Score', f1_score(y_val, y_pred_gini))
print('Precision', precision_score(y_val, y_pred_gini))
print('Recall', recall_score(y_val, y_pred_gini))