In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from math import sqrt
import joblib
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, LSTM
import warnings
warnings.filterwarnings("ignore")

In [23]:
data = pd.read_csv("wine quality.csv")

In [24]:
data = data.drop (['Unnamed: 0'],axis=1)

In [25]:
data

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.270,0.36,20.7,0.045,45.0,170.0,1.00100,3.00,0.450000,8.8,6
1,6.3,0.300,0.34,1.6,0.049,14.0,132.0,0.99400,3.30,0.490000,9.5,6
2,8.1,0.280,0.40,6.9,0.050,30.0,97.0,0.99510,3.26,0.440000,10.1,6
3,7.2,0.230,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.400000,9.9,6
4,7.2,0.230,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.400000,9.9,6
...,...,...,...,...,...,...,...,...,...,...,...,...
6492,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.580000,10.5,5
6493,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.531215,11.2,6
6494,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.750000,11.0,6
6495,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.710000,10.2,5


In [11]:
data.dtypes

fixed acidity           float64
volatile acidity        float64
citric acid             float64
residual sugar          float64
chlorides               float64
free sulfur dioxide     float64
total sulfur dioxide    float64
density                 float64
pH                      float64
sulphates               float64
alcohol                 float64
quality                   int64
dtype: object

In [12]:
data.shape

(6497, 12)

In [13]:
data.isnull().sum()

fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
dtype: int64

MODEL TRAINING - CNN MODEL

In [26]:
X = data[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 
          'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 
          'pH', 'sulphates', 'alcohol']]
y = data['quality']

In [27]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [29]:
cnn_model = Sequential()

In [30]:
cnn_model.add(Conv1D(64, 3, activation='relu', input_shape=(X_train.shape[1], 1)))
cnn_model.add(MaxPooling1D(2))
cnn_model.add(Flatten())
cnn_model.add(Dense(64, activation='relu'))
cnn_model.add(Dense(1, activation='linear')) 

In [31]:
cnn_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

In [32]:
X_train_cnn = np.expand_dims(X_train, axis=2)
X_test_cnn = np.expand_dims(X_test, axis=2)

In [33]:
cnn_model.fit(X_train_cnn, y_train, epochs=10, batch_size=32, validation_data=(X_test_cnn, y_test))

Epoch 1/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 10.9533 - mae: 2.6226 - val_loss: 1.2954 - val_mae: 0.8871
Epoch 2/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 1.3185 - mae: 0.8757 - val_loss: 0.7705 - val_mae: 0.6901
Epoch 3/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.8658 - mae: 0.7108 - val_loss: 0.5720 - val_mae: 0.5960
Epoch 4/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.6500 - mae: 0.6281 - val_loss: 0.5105 - val_mae: 0.5649
Epoch 5/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.5875 - mae: 0.6018 - val_loss: 0.4848 - val_mae: 0.5551
Epoch 6/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.5827 - mae: 0.5920 - val_loss: 0.4777 - val_mae: 0.5521
Epoch 7/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step -

<keras.src.callbacks.history.History at 0x22dde142630>

In [34]:
cnn_predictions = cnn_model.predict(X_test_cnn)

[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


In [35]:
cnn_mae = mean_absolute_error(y_test, cnn_predictions)
cnn_mse = mean_squared_error(y_test, cnn_predictions)

print(f"CNN Model - MAE: {cnn_mae}, MSE: {cnn_mse}")

CNN Model - MAE: 0.5439106225967407, MSE: 0.45717379450798035


RNN MODEL

In [36]:
rnn_model = Sequential()

In [37]:
rnn_model.add(LSTM(64, activation='relu', input_shape=(X_train.shape[1], 1)))
rnn_model.add(Dense(64, activation='relu'))
rnn_model.add(Dense(1, activation='linear'))

In [38]:
rnn_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

In [39]:
rnn_model.fit(X_train_cnn, y_train, epochs=10, batch_size=32, validation_data=(X_test_cnn, y_test))

Epoch 1/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - loss: 25.6831 - mae: 4.3563 - val_loss: 2.3544 - val_mae: 1.1711
Epoch 2/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 1.5897 - mae: 0.9563 - val_loss: 0.8967 - val_mae: 0.7078
Epoch 3/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.8563 - mae: 0.7116 - val_loss: 0.7013 - val_mae: 0.6474
Epoch 4/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.7432 - mae: 0.6713 - val_loss: 0.6450 - val_mae: 0.6298
Epoch 5/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.6857 - mae: 0.6445 - val_loss: 0.5835 - val_mae: 0.6000
Epoch 6/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.6529 - mae: 0.6339 - val_loss: 0.5759 - val_mae: 0.5994
Epoch 7/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step -

<keras.src.callbacks.history.History at 0x22de6581e20>

In [40]:
rnn_predictions = rnn_model.predict(X_test_cnn)

[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step


In [41]:
rnn_mae = mean_absolute_error(y_test, cnn_predictions)
rnn_mse = mean_squared_error(y_test, cnn_predictions)

print(f"RNN Model - MAE: {rnn_mae}, MSE: {rnn_mse}")

RNN Model - MAE: 0.5439106225967407, MSE: 0.45717379450798035


SVM MODEL

In [42]:
svm_model = SVR(kernel='rbf')
svm_model.fit(X_train, y_train)

In [43]:
svm_predictions = svm_model.predict(X_test)

In [44]:
svm_mae = mean_absolute_error(y_test, svm_predictions)
svm_mse = mean_squared_error(y_test, svm_predictions)

print(f"SVM Model - MAE: {svm_mae}, MSE: {svm_mse}")

SVM Model - MAE: 0.4900248510143713, MSE: 0.41363560771603036


The SVM model appears to perform better based on both MAE and MSE, as it has lower values for both metrics compared to the CNN and RNN models. Therefore, in this case, SVM is the better model for predicting wine quality.

In [45]:
joblib.dump(svm_model, 'svm_model.pkl') 
 
print("Model saved successfully.")

Model saved successfully.


In [46]:
svm_model_loaded = joblib.load('svm_model.pkl')

print("Model loaded successfully.")

Model loaded successfully.
