Instalacion de dependencias

In [28]:
pip install pandas matplotlib seaborn scikit-learn openpyxl graphviz

Note: you may need to restart the kernel to use updated packages.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import openpyxl

In [20]:
file = 'Pruebas experimentales del rotavapor RCL.xlsx'
sheet = 'Datos'

try:
    df = pd.read_excel(file, sheet_name=sheet)
except FileNotFoundError:
    print(f"Error: The file '{file}' was not found.")
    exit()
except Exception as e:
    print(f"An error occurred while reading the file: {e}")
    exit()

tabla = df.iloc[12:, 1:]
tabla.columns = df.iloc[11, 1:]
tabla.reset_index(drop=True, inplace=True)

print(tabla)

11  Name  X1 X2   X3 X4    X5     X6    X7 Masa Agua Tasa de evaporación
0      1  80  3  200  8  4.26  0.052  6.54   189.814                 NaN
1      2  60  2  150  8  4.33   0.04  6.54   102.816                 NaN
2      3  60  1  200  8  4.33  0.052  6.54    78.214                 NaN
3      4  70  2  200  8  4.28  0.052  6.54   177.115                 NaN
4      5  70  1  250  8  4.28  0.076  6.54   158.054                 NaN
..   ...  .. ..  ... ..   ...    ...   ...       ...                 ...
107  108  85  2  275  8  4.25  0.082  6.54   216.333                 NaN
108  109  65  2  275  8   4.3  0.082  6.54   170.492                 NaN
109  110  75  1  175  8  4.27  0.046  6.54   186.669                 NaN
110  111  65  3  225  8   4.3  0.064  6.54   175.175                 NaN
111  112  65  2  225  6   4.3  0.064  6.56   177.439                 NaN

[112 rows x 10 columns]


In [None]:
targets = [""]
features = [""]

# Arbol de decision

In [23]:
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier

def decisionTreeF(data):
    X = data[features]
    y = data[target]

    decisionTree = DecisionTreeClassifier(random_state=42)
    decisionTree.fit(X, y)

    # Metrica de AUC-ROC
    auc_scores = cross_val_score(decisionTree, X, y, cv=5, scoring='roc_auc')
    print("AUC-ROC en validación cruzada:", auc_scores)
    print("Promedio AUC-ROC:", np.mean(auc_scores))

    # Metrica de f1-score
    f1_scores = cross_val_score(decisionTree, X, y, cv=5, scoring='f1')
    print("f1-score en validación cruzada:", f1_scores)
    print("Promedio f1-score:", np.mean(f1_scores))

    # Metrica de precisión
    precision_scores = cross_val_score(decisionTree, X, y, cv=5, scoring='precision')
    print("Precisión en validación cruzada:", precision_scores)
    print("Promedio Precisión:", np.mean(precision_scores))

    

In [26]:
import graphviz
from sklearn.tree import export_graphviz

def dibujar_arbol(data, name, deep):
    X = data[features]
    y = data[target]

    decisionTree = DecisionTreeClassifier(random_state=42, max_depth=deep)
    decisionTree.fit(X, y)

    # Metrica de AUC-ROC
    auc_scores = cross_val_score(decisionTree, X, y, cv=5, scoring='roc_auc')
    print("AUC-ROC en validación cruzada:", auc_scores)
    print("Promedio AUC-ROC:", np.mean(auc_scores))

    # Metrica de f1-score
    f1_scores = cross_val_score(decisionTree, X, y, cv=5, scoring='f1')
    print("f1-score en validación cruzada:", f1_scores)
    print("Promedio f1-score:", np.mean(f1_scores))

    # Metrica de precisión
    precision_scores = cross_val_score(decisionTree, X, y, cv=5, scoring='precision')
    print("Precisión en validación cruzada:", precision_scores)
    print("Promedio Precisión:", np.mean(precision_scores))


    export_graphviz(decisionTree, out_file=f"{name}_tree.dot", feature_names=features, class_names=["No Lluvia", "Lluvia"], filled=True)

# Random Forest

In [27]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve

def randomForest(data):
    X = data[features]
    y = data[target]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

    rf = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
    rf.fit(X_train, y_train)

    y_pred = rf.predict(X_test)
    print("Matriz de Confusión:")
    print(confusion_matrix(y_test, y_pred))
    print("\nReporte de Clasificación:")
    print(classification_report(y_test, y_pred))
    auc = roc_auc_score(y_test, rf.predict_proba(X_test)[:,1])
    print("AUC-ROC:", auc)

    # Visualizar la Curva ROC
    fpr, tpr = roc_curve(y_test, rf.predict_proba(X_test)[:,1])
    plt.figure(figsize=(8,6))
    plt.plot(fpr, tpr, label=f'ROC curve (AUC = {auc:.2f})')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel("Tasa de Falsos Positivos")
    plt.ylabel("Tasa de Verdaderos Positivos")
    plt.title("Curva ROC - Random Forest")
    plt.legend(loc="lower right")
    plt.grid(True, linestyle="--", alpha=0.6)
    plt.show()

    # Evaluar importancia de las variables
    importances = rf.feature_importances_
    feature_importance = pd.DataFrame({'Feature': features, 'Importance': importances}).sort_values(by='Importance', ascending=False)
    print("Importancia de Variables:")
    print(feature_importance)

    # Graficar importancia de las variables
    plt.figure(figsize=(8,6))
    plt.barh(feature_importance['Feature'], feature_importance['Importance'], color='skyblue')
    plt.xlabel('Importancia')
    plt.title('Importancia de Variables en el Modelo Random Forest')
    plt.gca().invert_yaxis()
    plt.show()