In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import pandas as pd
import numpy as np

def classify_wine_quality(df):
    # Convert quality to binary class: 1 = good (>=7), 0 = bad (<7)
    df['quality'] = df['quality'].apply(lambda q: 1 if q >= 7 else 0)

    X = df.drop('quality', axis=1)
    y = df['quality']

    # Split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Normalize
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Model
    model = Sequential([
        Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')  # sigmoid for binary output
    ])

    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    # Train
    model.fit(X_train, y_train, epochs=30, batch_size=16, validation_split=0.2, verbose=0)

    # Evaluate
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    print(f"\n✅ Accuracy on test set (from model.evaluate): {accuracy:.4f}")

    # Predict
    y_pred_probs = model.predict(X_test)
    y_pred = (y_pred_probs > 0.5).astype(int)

    # Accuracy manually
    manual_accuracy = accuracy_score(y_test, y_pred)
    print(f"✅ Accuracy (manual): {manual_accuracy:.4f}")
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

    return {
        "model": model,
        "accuracy": manual_accuracy,
        "classification_report": classification_report(y_test, y_pred, output_dict=True),
        "confusion_matrix": confusion_matrix(y_test, y_pred)
    }


In [3]:
# Clean and save multiple datasets
df1=pd.read_csv("../cleanDatasets/winequality-red_cleaned.csv", sep=",", quotechar='"')
df2=pd.read_csv("../cleanDatasets/winequality-white_cleaned.csv", sep=",", quotechar='"')

In [4]:
classify_wine_quality(df2)
#model_wine(df2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



✅ Accuracy on test set (from model.evaluate): 0.8058
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
✅ Accuracy (manual): 0.8058

Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.94      0.88       618
           1       0.61      0.33      0.43       175

    accuracy                           0.81       793
   macro avg       0.72      0.63      0.65       793
weighted avg       0.78      0.81      0.78       793

Confusion Matrix:
 [[582  36]
 [118  57]]


{'model': <Sequential name=sequential, built=True>,
 'accuracy': 0.8058007566204287,
 'classification_report': {'0': {'precision': 0.8314285714285714,
   'recall': 0.941747572815534,
   'f1-score': 0.8831562974203339,
   'support': 618.0},
  '1': {'precision': 0.6129032258064516,
   'recall': 0.32571428571428573,
   'f1-score': 0.4253731343283582,
   'support': 175.0},
  'accuracy': 0.8058007566204287,
  'macro avg': {'precision': 0.7221658986175115,
   'recall': 0.6337309292649098,
   'f1-score': 0.654264715874346,
   'support': 793.0},
  'weighted avg': {'precision': 0.7832041887250771,
   'recall': 0.8058007566204287,
   'f1-score': 0.7821322702562787,
   'support': 793.0}},
 'confusion_matrix': array([[582,  36],
        [118,  57]], dtype=int64)}

In [69]:
wine_model = load_model('wine_quality_model.keras')
scaler = joblib.load('scaler.pkl')

In [70]:
sample = df2.drop('quality', axis=1).sample(1, random_state=42)

In [71]:
sample_scaled = scaler.transform(sample)
predicted_quality = wine_model.predict(sample_scaled)

print(f"\nInput data:\n{sample}")
print(f"\nPredicted wine quality (1–10 scale): {predicted_quality[0][0]:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step

Input data:
     fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
149            7.2              0.32         0.47             5.1      0.044   

     free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
149                 19.0                  65.0    0.991  3.03       0.41   

     alcohol  
149     12.6  

Predicted wine quality (1–10 scale): 6.87
