In [None]:

# 📦 Importaciones generales
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import (
    accuracy_score, f1_score, classification_report,
    ConfusionMatrixDisplay, mean_absolute_error,
    mean_squared_error, r2_score
)


## 🌳 Árbol de Decisión - Clasificación

In [None]:

# Entrenamiento del modelo Árbol de Decisión
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

# Simulación de datos (reemplazar por los reales en tu entorno)
X_tree_train, X_tree_test, y_tree_train, y_tree_test = train_test_split(X, y, test_size=0.3, random_state=42)
modelo_tree = DecisionTreeClassifier(random_state=42)
modelo_tree.fit(X_tree_train, y_tree_train)
y_tree_pred = modelo_tree.predict(X_tree_test)


In [None]:

# Visualización de métricas globales
metricas_tree = {
    "Modelo": "Árbol de Decisión",
    "Accuracy": accuracy_score(y_tree_test, y_tree_pred),
    "F1 Macro": f1_score(y_tree_test, y_tree_pred, average="macro"),
    "F1 Weighted": f1_score(y_tree_test, y_tree_pred, average="weighted")
}
df_tree = pd.DataFrame([metricas_tree]).set_index("Modelo")
df_tree.plot(kind="bar", figsize=(8,5), ylim=(0,1))
plt.title("Métricas globales - Árbol de Decisión")
plt.xticks(rotation=0)
plt.grid(axis='y')
plt.tight_layout()
plt.show()


In [None]:

# Matriz de Confusión y métricas por clase
ConfusionMatrixDisplay.from_estimator(
    modelo_tree, X_tree_test, y_tree_test,
    display_labels=["Solo Daños", "Con Heridos", "Con Muertos"],
    cmap="Blues", values_format="d"
)
plt.title("Matriz de Confusión - Árbol de Decisión")
plt.show()

report_tree = classification_report(y_tree_test, y_tree_pred, output_dict=True)
df_report_tree = pd.DataFrame(report_tree).transpose().iloc[:-3]
df_report_tree[["precision", "recall", "f1-score"]].plot(kind="bar", figsize=(10, 6), ylim=(0,1))
plt.title("Métricas por clase - Árbol de Decisión")
plt.grid(axis='y')
plt.tight_layout()
plt.show()


In [None]:

# Importancia de variables
plt.figure(figsize=(10, 6))
plt.barh(X.columns, modelo_tree.feature_importances_)
plt.title("Importancia de Variables - Árbol de Decisión")
plt.xlabel("Importancia")
plt.tight_layout()
plt.show()


## 🌲 Random Forest - Clasificación

In [None]:

from sklearn.ensemble import RandomForestClassifier

# Entrenamiento con mejora class_weight
modelo_rf = RandomForestClassifier(
    class_weight='balanced',
    n_estimators=200,
    max_depth=20,
    random_state=42
)
modelo_rf.fit(X_tree_train, y_tree_train)  # usamos mismos datos para comparación
y_rf_pred = modelo_rf.predict(X_tree_test)


In [None]:

# Métricas globales
metricas_rf = {
    "Modelo": "Random Forest",
    "Accuracy": accuracy_score(y_tree_test, y_rf_pred),
    "F1 Macro": f1_score(y_tree_test, y_rf_pred, average="macro"),
    "F1 Weighted": f1_score(y_tree_test, y_rf_pred, average="weighted")
}
df_rf = pd.DataFrame([metricas_rf]).set_index("Modelo")
df_rf.plot(kind="bar", figsize=(8,5), ylim=(0,1))
plt.title("Métricas globales - Random Forest")
plt.xticks(rotation=0)
plt.grid(axis='y')
plt.tight_layout()
plt.show()


In [None]:

# Matriz de confusión y métricas por clase
ConfusionMatrixDisplay.from_estimator(
    modelo_rf, X_tree_test, y_tree_test,
    display_labels=["Solo Daños", "Con Heridos", "Con Muertos"],
    cmap="Blues", values_format="d"
)
plt.title("Matriz de Confusión - Random Forest")
plt.show()

report_rf = classification_report(y_tree_test, y_rf_pred, output_dict=True)
df_report_rf = pd.DataFrame(report_rf).transpose().iloc[:-3]
df_report_rf[["precision", "recall", "f1-score"]].plot(kind="bar", figsize=(10, 6), ylim=(0,1))
plt.title("Métricas por clase - Random Forest")
plt.grid(axis='y')
plt.tight_layout()
plt.show()


In [None]:

# Importancia de variables
plt.figure(figsize=(10, 6))
plt.barh(X.columns, modelo_rf.feature_importances_)
plt.title("Importancia de Variables - Random Forest")
plt.xlabel("Importancia")
plt.tight_layout()
plt.show()


## 📈 Árbol de Regresión

In [None]:

from sklearn.tree import DecisionTreeRegressor

# Simulación de datos (reemplaza por X_reg, y_reg)
X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(X, y, test_size=0.3, random_state=42)
modelo_reg = DecisionTreeRegressor(random_state=42)
modelo_reg.fit(X_reg_train, y_reg_train)
y_reg_pred = modelo_reg.predict(X_reg_test)


In [None]:

# Métricas de regresión
mae = mean_absolute_error(y_reg_test, y_reg_pred)
mse = mean_squared_error(y_reg_test, y_reg_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_reg_test, y_reg_pred)

print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R2 Score: {r2:.2f}")


In [None]:

# Gráfico de residuos
residuos = y_reg_test - y_reg_pred
plt.figure(figsize=(8,5))
sns.scatterplot(x=y_reg_pred, y=residuos)
plt.axhline(0, color='red', linestyle='--')
plt.title("Gráfico de residuos - Árbol de Regresión")
plt.xlabel("Predicción")
plt.ylabel("Error")
plt.grid()
plt.tight_layout()
plt.show()
