In [14]:
import pandas as pd

# Step 1: Load the red wine dataset
data = pd.read_csv('winequality-red.csv', delimiter=';')


In [15]:
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [16]:

X = data.drop('quality', axis=1)
y = data['quality']


y = (y >= 7).astype(int)


In [17]:
X.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4


In [18]:
y.head()

Unnamed: 0,quality
0,0
1,0
2,0
3,0
4,0


In [19]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [20]:
from sklearn.tree import DecisionTreeClassifier
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

In [21]:

y_pred = dt_model.predict(X_test)
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred) * 100
print(f"Decision Tree Test Accuracy: {accuracy:.4f}")


Decision Tree Test Accuracy: 87.1875


In [22]:
train_pred = dt_model.predict(X_train)
train_accuracy = accuracy_score(y_train, train_pred)*100
print(f"Training Accuracy: {train_accuracy:.4f}")


Training Accuracy: 100.0000


In [23]:
from sklearn.ensemble import BaggingClassifier
bagging_model = BaggingClassifier(
    estimator=DecisionTreeClassifier(max_depth=5),  # small trees
    n_estimators=100,
    random_state=42
)


bagging_model.fit(X_train, y_train)


y_pred_bagging = bagging_model.predict(X_test)


bagging_accuracy = accuracy_score(y_test, y_pred_bagging)
print(f"Bagging Test Accuracy: {bagging_accuracy:.4f}")

# Training accuracy
bagging_train_pred = bagging_model.predict(X_train)
bagging_train_accuracy = accuracy_score(y_train, bagging_train_pred)
print(f"Bagging Train Accuracy: {bagging_train_accuracy:.4f}")


Bagging Test Accuracy: 0.8656
Bagging Train Accuracy: 0.9398


In [24]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score


boosting_model = GradientBoostingClassifier(n_estimators=100, random_state=42)


boosting_model.fit(X_train, y_train)


y_pred_boosting = boosting_model.predict(X_test)

#  test accury
boosting_accuracy = accuracy_score(y_test, y_pred_boosting)
print(f"Boosting Test Accuracy: {boosting_accuracy:.4f}")

#training set predict
boosting_train_pred = boosting_model.predict(X_train)
boosting_train_accuracy = accuracy_score(y_train, boosting_train_pred)
print(f"Boosting Train Accuracy: {boosting_train_accuracy:.4f}")


Boosting Test Accuracy: 0.8781
Boosting Train Accuracy: 0.9601


In [25]:
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# OPTIONAL: scale features for better performance (especially KNN and Logistic Regression)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Base learners
base_learners = [
    ('dt', DecisionTreeClassifier(random_state=42)),
    ('knn', KNeighborsClassifier()),
    ('lr', LogisticRegression(max_iter=1000))
]

# Meta learner
meta_learner = RandomForestClassifier(random_state=42)

# Stacking model
stacking_model = StackingClassifier(
    estimators=base_learners,
    final_estimator=meta_learner,
    passthrough=True,
    cv=5,
    n_jobs=-1
)

# Fit the model
stacking_model.fit(X_train_scaled, y_train)

# Predictions
stack_test_pred = stacking_model.predict(X_test_scaled)
stack_train_pred = stacking_model.predict(X_train_scaled)

# Accuracy
stack_test_acc = accuracy_score(y_test, stack_test_pred)
stack_train_acc = accuracy_score(y_train, stack_train_pred)

print(f"Stacking Test Accuracy: {stack_test_acc:.4f}")
print(f"Stacking Train Accuracy: {stack_train_acc:.4f}")


Stacking Test Accuracy: 0.8875
Stacking Train Accuracy: 0.9679
