In [2]:
from clean import clean_dataset 
from exploration import explore
from model import model
from tensorflow.keras.models import load_model
import joblib 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import joblib 

In [3]:
# Clean and save multiple datasets
df1=clean_dataset("data/winequality-red.csv")
df2=clean_dataset("data/winequality-white.csv")

First rows of the dataset:
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0                  34.0   0.9978  3.51       0.56   

   alcohol  quality  
0      9.4        5  
1      9.8     

In [4]:
X = df2.drop('quality', axis=1)
y = df2['quality']

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalise: each column has mean 0 and standard deviation 1, so that the model can learn more efficiently and stably
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#sequential is a type of network where each layer goes directly after the previous one (ideal for simple architectures)
model = Sequential([
    #Dense creates a dense layer, it means its fully conected (each neuron is connected with all of the previous layer)
    #Dense(number of neurons, each neuron uses the activation function ReLU (Rectified Linear Unit), which introduces
    #  no-linearity, helpful for complex patterns)
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1)  # just one output
])

# configures how model is gonna be trained
model.compile(optimizer='adam',# adam is a very famous algorithm of weights optimization
    loss='mean_squared_error',# This is the loss function(the measure the model tries to minimize),
                            # MSE isIdeal for continuos regression problems like this 
    metrics=['mae'])#set metrics used for evaluating the training, mean absolute error is easy to interpret


#train
history = model.fit(X_train, y_train, epochs=100, batch_size=16, validation_split=0.2)


#evaluate
loss, mae = model.evaluate(X_test, y_test)
print(f"MAE:{mae:.2f} This tells you, on average, the model is predicting the wine quality within {mae:.2f} points on a 1–10 scale") 
model.save('wine_quality_model.keras')
joblib.dump(scaler, 'scaler.pkl')

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 16.5990 - mae: 3.5417 - val_loss: 2.8731 - val_mae: 1.2695
Epoch 2/100
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 707us/step - loss: 2.3982 - mae: 1.2031 - val_loss: 1.8099 - val_mae: 1.0275
Epoch 3/100
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 700us/step - loss: 1.6425 - mae: 1.0054 - val_loss: 1.3225 - val_mae: 0.8863
Epoch 4/100
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 712us/step - loss: 1.1733 - mae: 0.8438 - val_loss: 1.0186 - val_mae: 0.7688
Epoch 5/100
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 712us/step - loss: 0.8682 - mae: 0.7222 - val_loss: 0.7595 - val_mae: 0.6794
Epoch 6/100
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 724us/step - loss: 0.7277 - mae: 0.6476 - val_loss: 0.6289 - val_mae: 0.6150
Epoch 7/100
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 763us

['scaler.pkl']

In [5]:
wine_model = load_model('wine_quality_model.keras')
scaler = joblib.load('scaler.pkl')

In [6]:
sample = df2.drop('quality', axis=1).sample(1, random_state=42)

In [7]:
sample_scaled = scaler.transform(sample)
predicted_quality = wine_model.predict(sample_scaled)

print(f"\nInput data:\n{sample}")
print(f"\nPredicted wine quality (1–10 scale): {predicted_quality[0][0]:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step

Input data:
     fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
176            7.2              0.32         0.47             5.1      0.044   

     free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
176                 19.0                  65.0    0.991  3.03       0.41   

     alcohol  
176     12.6  

Predicted wine quality (1–10 scale): 5.87
