In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd
import numpy as np

def classify_wine_quality(df):
    # Convert quality to binary class: 1 = good (>=7), 0 = bad (<7)
    df['quality'] = df['quality'].apply(lambda q: 1 if q >= 7 else 0)

    X = df.drop('quality', axis=1)
    y = df['quality']

    # Split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Normalize
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Model
    model = Sequential([
        Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')  # sigmoid for binary output
    ])

    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    # Train
    model.fit(X_train, y_train, epochs=30, batch_size=16, validation_split=0.2)

    # Evaluate
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    print(f"Accuracy on test set: {accuracy:.4f}")

    # Predict
    y_pred_probs = model.predict(X_test)
    y_pred = (y_pred_probs > 0.5).astype(int)

    # Report
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))

    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

    return model


In [2]:
# Clean and save multiple datasets
df1=pd.read_csv("../cleanDatasets/winequality-red_cleaned.csv", sep=",", quotechar='"')
df2=pd.read_csv("../cleanDatasets/winequality-white_cleaned.csv", sep=",", quotechar='"')

In [4]:
classify_wine_quality(df2)
#model_wine(df2)

Epoch 1/30


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7951 - loss: 0.4775 - val_accuracy: 0.8233 - val_loss: 0.3678
Epoch 2/30
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 731us/step - accuracy: 0.8178 - loss: 0.3973 - val_accuracy: 0.8265 - val_loss: 0.3595
Epoch 3/30
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 761us/step - accuracy: 0.8374 - loss: 0.3661 - val_accuracy: 0.8344 - val_loss: 0.3531
Epoch 4/30
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 724us/step - accuracy: 0.8295 - loss: 0.3826 - val_accuracy: 0.8360 - val_loss: 0.3504
Epoch 5/30
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 709us/step - accuracy: 0.8407 - loss: 0.3519 - val_accuracy: 0.8375 - val_loss: 0.3520
Epoch 6/30
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 719us/step - accuracy: 0.8332 - loss: 0.3760 - val_accuracy: 0.8360 - val_loss: 0.3506
Epoch 7/30
[1m159/159[0m [

<Sequential name=sequential_1, built=True>

In [69]:
wine_model = load_model('wine_quality_model.keras')
scaler = joblib.load('scaler.pkl')

In [70]:
sample = df2.drop('quality', axis=1).sample(1, random_state=42)

In [71]:
sample_scaled = scaler.transform(sample)
predicted_quality = wine_model.predict(sample_scaled)

print(f"\nInput data:\n{sample}")
print(f"\nPredicted wine quality (1–10 scale): {predicted_quality[0][0]:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step

Input data:
     fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
149            7.2              0.32         0.47             5.1      0.044   

     free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
149                 19.0                  65.0    0.991  3.03       0.41   

     alcohol  
149     12.6  

Predicted wine quality (1–10 scale): 6.87
