# importing libaries

In [61]:
import numpy as np
import matplotlib.pyplot as plt  
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# loading dataset

In [62]:
wine = pd.read_csv("winequality-red.csv")

wine.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [63]:
# finding all null values
print(wine.isna().sum())

fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
dtype: int64


#feature selection

In [64]:
# Create Classification version of target variable
wine['goodquality'] = [1 if x >= 7 else 0 for x in wine['quality']]# Separate feature variables and target variable
X = wine.drop(['quality','goodquality'], axis = 1)
Y = wine['goodquality']

In [65]:
# See proportion of good vs bad wines
wine['goodquality'].value_counts()

0    1382
1     217
Name: goodquality, dtype: int64

In [66]:
X

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4
...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5
1595,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2
1596,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2


In [67]:
print(Y)

0       0
1       0
2       0
3       0
4       0
       ..
1594    0
1595    0
1596    0
1597    0
1598    0
Name: goodquality, Length: 1599, dtype: int64


In [68]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.3,random_state=37)

In [69]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [70]:
# Define base classifiers
base_classifiers = [
    ('Logistic Regression', LogisticRegression()),
    ('Support Vector Classifier', SVC()),
    ('Naïve Bayes Classifier', GaussianNB()),
    ('KNN Algorithm', KNeighborsClassifier()),
    ('Decision Tree Classifier', DecisionTreeClassifier())
]

In [71]:
# Define the meta-learner (Logistic Regression)
meta_learner = LogisticRegression()

In [72]:
# Create Stacking ensemble
stacking_classifier = StackingClassifier(estimators=base_classifiers, final_estimator=meta_learner)


In [73]:
# Train the Stacking ensemble
stacking_classifier.fit(X_train_scaled, Y_train)

In [74]:
# Make predictions
y_pred_stacking = stacking_classifier.predict(X_test_scaled)

In [75]:
# Evaluate the Stacking ensemble
accuracy_stacking = accuracy_score(Y_test, y_pred_stacking)
precision_stacking = precision_score(Y_test, y_pred_stacking)
recall_stacking = recall_score(Y_test, y_pred_stacking)
f1_stacking = f1_score(Y_test, y_pred_stacking)

In [76]:
#  results
print('\nStacking Ensemble Metrics:')
print(f'Accuracy: {accuracy_stacking}')
print(f'Precision: {precision_stacking}')
print(f'Recall: {recall_stacking}')
print(f'F1 Score: {f1_stacking}')


Stacking Ensemble Metrics:
Accuracy: 0.8854166666666666
Precision: 0.5952380952380952
Recall: 0.3968253968253968
F1 Score: 0.47619047619047616


Accuracy (0.8875):
Interpretation: The Stacking Ensemble achieved an overall accuracy of 88.75% in predicting whether a wine is of high quality or not. This means that 88.75% of the wines were correctly classified.

Precision (0.5957):
Interpretation: The precision of 0.5957 indicates that when the Stacking Ensemble predicted a wine to be of high quality, it was correct about 59.57% of the time. In the context of wine quality prediction, this suggests that approximately 59.57% of the wines predicted as high quality were indeed high quality.

Recall (0.4444):
Interpretation: The recall of 0.4444 suggests that the Stacking Ensemble identified 44.44% of all actual high-quality wines. In the wine quality prediction task, this means that the model captured less than half of the high-quality wines.

F1 Score (0.5091):
Interpretation: The F1 score of 0.5091 provides a balance between precision and recall. In the context of wine quality prediction, it indicates a moderate balance between correctly identifying high-quality wines and minimizing false positives.
