<a href="https://colab.research.google.com/github/Aiadevop/training_model_wine_quality/blob/main/DS_NL_Clasificaci%C3%B3n_MODELO_XGBOOST_Predicci%C3%B3n_calidad_del_vino.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import kagglehub
import matplotlib.pyplot as plt
import seaborn as sns
url="https://raw.githubusercontent.com/Aiadevop/training_model_wine_quality/refs/heads/main/data/df_cl_final.csv"
df_cl = pd.read_csv(url)
# Divido los datos para test y entrenamiento.
from sklearn.model_selection import train_test_split

# Dividir el DataFrame completo. Test_size 20,80 y random_state es la semilla
df_train, df_test = train_test_split(
    df_cl, test_size=0.2, random_state=42, stratify=df_cl['quality']
)

# Luego extraer X e y de cada DataFrame
X_train = df_train.drop('quality', axis=1)
y_train = df_train['quality']

X_test = df_test.drop('quality', axis=1)
y_test = df_test['quality']

In [2]:
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, roc_auc_score

# Crear modelos con configuración básica
xgb = XGBClassifier(eval_metric='logloss', random_state=42)

# Entrenar y predecir - XGBoost
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)
y_proba_xgb = xgb.predict_proba(X_test)[:,1]

# Evaluar
acc_xgb = accuracy_score(y_test, y_pred_xgb)
roc_xgb = roc_auc_score(y_test, y_proba_xgb)


# Imprimir resultados
print("📈 RESULTADOS BOOSTING 📈\n")
print(f"XGBoost      - Accuracy: {acc_xgb:.3f}, ROC-AUC: {roc_xgb:.3f}")


📈 RESULTADOS BOOSTING 📈

XGBoost      - Accuracy: 0.825, ROC-AUC: 0.896


In [3]:
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score, roc_auc_score

# Modelo base
xgb = XGBClassifier(eval_metric='logloss',  random_state=42)

# Espacio de búsqueda
param_dist = {
    'n_estimators': [100, 200, 300, 500],
    'max_depth': [3, 5, 7, 10, 15],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0],
    'gamma': [0, 0.1, 0.3, 0.5],
    'reg_alpha': [0, 0.01, 0.1],
    'reg_lambda': [1, 1.5, 2.0]
}

# Búsqueda aleatoria
random_search_xgb = RandomizedSearchCV(
    xgb,
    param_distributions=param_dist,
    n_iter=30,
    scoring='roc_auc',
    cv=5,
    verbose=1,
    n_jobs=-1,
    random_state=42
)

# Entrenar
random_search_xgb.fit(X_train, y_train)

# Mejor modelo y evaluación
best_xgb = random_search_xgb.best_estimator_
y_pred_xgb = best_xgb.predict(X_test)
y_proba_xgb = best_xgb.predict_proba(X_test)[:, 1]

# Métricas
acc_xgb = accuracy_score(y_test, y_pred_xgb)
roc_xgb = roc_auc_score(y_test, y_proba_xgb)

# Resultados
print("\n🔧 Mejor configuración de XGBoost:")
print(random_search_xgb.best_params_)

print(f"\n✅ Resultados XGBoost optimizado:")
print(f"Accuracy: {acc_xgb:.3f}")
print(f"ROC-AUC:  {roc_xgb:.3f}")


Fitting 5 folds for each of 30 candidates, totalling 150 fits

🔧 Mejor configuración de XGBoost:
{'subsample': 1.0, 'reg_lambda': 1.5, 'reg_alpha': 0.1, 'n_estimators': 500, 'max_depth': 7, 'learning_rate': 0.05, 'gamma': 0, 'colsample_bytree': 0.6}

✅ Resultados XGBoost optimizado:
Accuracy: 0.831
ROC-AUC:  0.895


In [4]:
# Probar métodos de ensamblaje para ver si mejora
from sklearn.ensemble import VotingClassifier, StackingClassifier, BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.svm import SVC

# 🧠 Modelos base
log_clf = LogisticRegression(max_iter=1000, random_state=42)
rf_clf = RandomForestClassifier(n_estimators=300, random_state=42)
xgb_clf = XGBClassifier(
    subsample=1.0,
    reg_lambda=1.5,
    reg_alpha=0.01,
    n_estimators=500,
    max_depth=10,
    learning_rate=0.1,
    gamma=0.1,
    colsample_bytree=1.0,
    random_state=42,
    eval_metric='logloss'
)

### 1️⃣ Voting Classifier (Soft)
voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rf_clf), ('xgb', xgb_clf)],
    voting='soft'
)
voting_clf.fit(X_train, y_train)
y_pred_voting = voting_clf.predict(X_test)
y_proba_voting = voting_clf.predict_proba(X_test)[:, 1]
acc_voting = accuracy_score(y_test, y_pred_voting)
roc_voting = roc_auc_score(y_test, y_proba_voting)

### 2️⃣ Stacking Classifier
stack_clf = StackingClassifier(
    estimators=[('lr', log_clf), ('rf', rf_clf), ('xgb', xgb_clf)],
    final_estimator=LogisticRegression(max_iter=1000),
    cv=5
)
stack_clf.fit(X_train, y_train)
y_pred_stack = stack_clf.predict(X_test)
y_proba_stack = stack_clf.predict_proba(X_test)[:, 1]
acc_stack = accuracy_score(y_test, y_pred_stack)
roc_stack = roc_auc_score(y_test, y_proba_stack)

### 3️⃣ Bagging Classifier (con LogisticRegression como ejemplo)
bagging_clf = BaggingClassifier(
    estimator=LogisticRegression(max_iter=1000),
    n_estimators=50,
    random_state=42
)
bagging_clf.fit(X_train, y_train)
y_pred_bag = bagging_clf.predict(X_test)
y_proba_bag = bagging_clf.predict_proba(X_test)[:, 1]
acc_bag = accuracy_score(y_test, y_pred_bag)
roc_bag = roc_auc_score(y_test, y_proba_bag)

# 🧾 Mostrar resultados
print("\n📊 RESULTADOS DE LOS MÉTODOS DE ENSAMBLE 📊")
print(f"VotingClassifier - Accuracy: {acc_voting:.3f}, ROC-AUC: {roc_voting:.3f}")
print(f"StackingClassifier - Accuracy: {acc_stack:.3f}, ROC-AUC: {roc_stack:.3f}")
print(f"BaggingClassifier - Accuracy: {acc_bag:.3f}, ROC-AUC: {roc_bag:.3f}")


📊 RESULTADOS DE LOS MÉTODOS DE ENSAMBLE 📊
VotingClassifier - Accuracy: 0.809, ROC-AUC: 0.900
StackingClassifier - Accuracy: 0.806, ROC-AUC: 0.904
BaggingClassifier - Accuracy: 0.738, ROC-AUC: 0.824


In [5]:
# No mejora me quedo con XGBoost optimizado

In [6]:
import joblib

# Guardar el mejor modelo como archivo .pkl
joblib.dump(best_xgb, 'xgb_model_optimizado.pkl')


['xgb_model_optimizado.pkl']

In [7]:
from google.colab import files

# Descargar el modelo guardado a tu PC
# files.download('xgb_model_optimizado.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>