In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    classification_report, confusion_matrix,
    accuracy_score, roc_curve,
    roc_auc_score, matthews_corrcoef, log_loss
)
import matplotlib.pyplot as plt

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.utils import class_weight

def classify_wine_quality_advanced(df):
    # 1. Binariza quality a float
    df['quality'] = df['quality'].apply(lambda q: 1.0 if q >= 7 else 0.0).astype(np.float32)

    X = df.drop('quality', axis=1)
    y = df['quality']

    # 2. Split estratificado
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    # 3. Escalado
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test  = scaler.transform(X_test)

    # 4. Pesos de clase
    cw = class_weight.compute_class_weight(
        class_weight='balanced',
        classes=np.unique(y_train),
        y=y_train
    )
    class_weights = dict(enumerate(cw))

    # 5. Construye la red
    model = Sequential([
        Dense(128, activation='relu', kernel_regularizer=l2(0.001),
              input_shape=(X_train.shape[1],)),
        BatchNormalization(), Dropout(0.3),

        Dense(64, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(), Dropout(0.3),

        Dense(32, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(), Dropout(0.2),

        Dense(1, activation='sigmoid')
    ])

    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy', 'Precision', 'Recall', 'AUC']
    )

    # 6. Callbacks
    early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
    reduce_lr  = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=1)

    # 7. Entrena con pesos de clase
    history = model.fit(
        X_train, y_train,
        validation_split=0.2,
        epochs=200,
        batch_size=32,
        callbacks=[early_stop, reduce_lr],
        verbose=1,
        class_weight=class_weights
    )

    # 8. Predicciones
    y_pred_probs = model.predict(X_test).ravel()

    # 9. Umbral óptimo
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_probs)
    optimal_idx      = np.argmax(tpr - fpr)
    optimal_threshold = thresholds[optimal_idx]

    y_pred_opt = (y_pred_probs >= optimal_threshold).astype(int)

    # 10. Métricas universales
    auc_value = roc_auc_score(y_test, y_pred_probs)
    mcc_value = matthews_corrcoef(y_test, y_pred_opt)
    ll_value  = log_loss(y_test, y_pred_probs)

    # 11. Imprime resultados
    print(f"\n🔍 Optimal threshold: {optimal_threshold:.4f}")
    print(f"✅ AUC-ROC    : {auc_value:.4f}")
    print(f"✅ MCC        : {mcc_value:.4f}")
    print(f"✅ Log-Loss   : {ll_value:.4f}")

    # 12. Classification report y confusion matrix
    print("\n📊 Classification Report:\n",
          classification_report(y_test, y_pred_opt))
    print("📌 Confusion Matrix:\n",
          confusion_matrix(y_test, y_pred_opt))

    # 13. (Opcional) gráfica de entrenamiento
    plt.figure(figsize=(12,5))
    plt.subplot(1,2,1)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Val Loss')
    plt.legend(), plt.title('Loss over epochs')

    plt.subplot(1,2,2)
    plt.plot(history.history['accuracy'], label='Train Acc')
    plt.plot(history.history['val_accuracy'], label='Val Acc')
    plt.legend(), plt.title('Accuracy over epochs')
    plt.tight_layout(), plt.show()

    # 14. Retorna todo
    return {
        "model": model,
        "scaler": scaler,
        "optimal_threshold": optimal_threshold,
        "auc": auc_value,
        "mcc": mcc_value,
        "log_loss": ll_value,
        "history": history.history
    }


In [17]:
# Clean and save multiple datasets
df1=pd.read_csv("../cleanDatasets/winequality-red_cleaned.csv", sep=",", quotechar='"')
df2=pd.read_csv("../cleanDatasets/winequality-white_cleaned.csv", sep=",", quotechar='"')

In [18]:
classify_wine_quality(df1)
#model_wine(df2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



✅ Accuracy on test set (from model.evaluate): 0.9081
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
✅ Accuracy (manual): 0.9081

Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.96      0.95       238
           1       0.66      0.56      0.60        34

    accuracy                           0.91       272
   macro avg       0.80      0.76      0.78       272
weighted avg       0.90      0.91      0.90       272

Confusion Matrix:
 [[228  10]
 [ 15  19]]


{'model': <Sequential name=sequential_8, built=True>,
 'accuracy': 0.9080882352941176,
 'classification_report': {'0': {'precision': 0.9382716049382716,
   'recall': 0.957983193277311,
   'f1-score': 0.9480249480249481,
   'support': 238.0},
  '1': {'precision': 0.6551724137931034,
   'recall': 0.5588235294117647,
   'f1-score': 0.6031746031746031,
   'support': 34.0},
  'accuracy': 0.9080882352941176,
  'macro avg': {'precision': 0.7967220093656875,
   'recall': 0.7584033613445378,
   'f1-score': 0.7755997755997757,
   'support': 272.0},
  'weighted avg': {'precision': 0.9028842060451255,
   'recall': 0.9080882352941176,
   'f1-score': 0.904918654918655,
   'support': 272.0}},
 'confusion_matrix': array([[228,  10],
        [ 15,  19]])}

In [69]:
wine_model = load_model('wine_quality_model.keras')
scaler = joblib.load('scaler.pkl')

In [70]:
sample = df2.drop('quality', axis=1).sample(1, random_state=42)

In [71]:
sample_scaled = scaler.transform(sample)
predicted_quality = wine_model.predict(sample_scaled)

print(f"\nInput data:\n{sample}")
print(f"\nPredicted wine quality (1–10 scale): {predicted_quality[0][0]:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step

Input data:
     fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
149            7.2              0.32         0.47             5.1      0.044   

     free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
149                 19.0                  65.0    0.991  3.03       0.41   

     alcohol  
149     12.6  

Predicted wine quality (1–10 scale): 6.87
