In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, accuracy_score
from imblearn.over_sampling import SMOTE
import joblib

In [2]:
data = pd.read_csv("data_procesada.csv")
data

Unnamed: 0,Uso_CPU,Uso_Memoria,Numero_Hilos,Tiempo_Ejecucion,Numero_Errores,Tipo_Proceso_Aplicación,Tipo_Proceso_Servicio,Tipo_Proceso_Sistema,Estado
0,37.454012,59.515562,16,8.184879,3,1.0,0.0,0.0,0
1,95.071431,36.471714,18,76.195256,8,1.0,0.0,0.0,0
2,73.199394,0.537562,21,46.991138,3,1.0,0.0,0.0,0
3,59.865848,56.108773,28,87.884325,4,1.0,0.0,0.0,0
4,15.601864,89.657041,39,4.622599,1,0.0,0.0,1.0,0
...,...,...,...,...,...,...,...,...,...
999995,88.808083,46.057947,29,27.258890,4,0.0,1.0,0.0,0
999996,43.731876,90.516047,36,5.368654,7,0.0,0.0,1.0,0
999997,41.807198,68.311082,9,63.019433,4,0.0,0.0,1.0,0
999998,42.867126,33.865907,48,4.543244,8,0.0,1.0,0.0,0


In [3]:
X = data.drop("Estado", axis = 1)
y = data["Estado"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [4]:
scaler = RobustScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
columnas_x = scaler.get_feature_names_out()
joblib.dump(scaler, "scaler.joblib")

X_train = pd.DataFrame(X_train, columns = columnas_x)
X_test = pd.DataFrame(X_test, columns = columnas_x)

smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)



In [5]:
models = {
    'Logistic Regression': LogisticRegression(),
    'LinearSVC': LinearSVC(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier()
}

param_grid = {
    'Logistic Regression': {
        'C': [0.1, 1]
    },
    'LinearSVC': {
        'C': [0.1, 1],
    },
    'Decision Tree': {
        'max_depth': [5, 10]
    },
    'Random Forest': {
        'n_estimators': [10, 20]
    }
}

best_models = {}
best_model_overall = None
best_model_name = ''
best_model_score = -1

for model_name, model in models.items():
    grid_search = GridSearchCV(model, param_grid[model_name], cv=3, scoring='accuracy', verbose=1)
    grid_search.fit(X_train_resampled, y_train_resampled)
    
    best_models[model_name] = grid_search.best_estimator_

    y_pred = best_models[model_name].predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)
    test_precision = precision_score(y_test, y_pred, average='binary')
    test_recall = recall_score(y_test, y_pred, average='binary')

    if test_accuracy > best_model_score:
        best_model_score = test_accuracy
        best_model_overall = best_models[model_name]
        best_model_name = model_name

print("Mejores modelos con sus métricas en el conjunto de test:\n")
for model_name, model in best_models.items():
    y_pred = model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)
    test_precision = precision_score(y_test, y_pred, average='binary')
    test_recall = recall_score(y_test, y_pred, average='binary')
    print(f"{model_name} - Hiperparámetros: {model.get_params()}")
    print(f"Precisión: {test_precision}")
    print(f"Recall: {test_recall}")
    print(f"Exactitud (accuracy): {test_accuracy}\n")

print(f"El mejor modelo es {best_model_name} con una precisión de {best_model_score} en el conjunto de test.")
joblib.dump(best_model_overall, 'best_model.joblib')
print("Modelo guardado exitosamente como 'best_model.joblib'")

Fitting 3 folds for each of 2 candidates, totalling 6 fits
Fitting 3 folds for each of 2 candidates, totalling 6 fits
Fitting 3 folds for each of 2 candidates, totalling 6 fits
Fitting 3 folds for each of 2 candidates, totalling 6 fits
Mejores modelos con sus métricas en el conjunto de test:

Logistic Regression - Hiperparámetros: {'C': 0.1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 100, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': None, 'solver': 'lbfgs', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}
Precisión: 0.38910556848349775
Recall: 0.9256198347107438
Exactitud (accuracy): 0.902035

LinearSVC - Hiperparámetros: {'C': 0.1, 'class_weight': None, 'dual': 'auto', 'fit_intercept': True, 'intercept_scaling': 1, 'loss': 'squared_hinge', 'max_iter': 1000, 'multi_class': 'ovr', 'penalty': 'l2', 'random_state': None, 'tol': 0.0001, 'verbose': 0}
Precisión: 0.38268317721031697
Recal