In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression

In [2]:
# loading the dataset
data = pd.read_csv("C:\\Users\\prana\\OneDrive\\Desktop\\SEM-2\\IMI_PROJ\\winequality-white.csv")
# setting the data to low, medium and high based on their quality
# 0 for low quality, 1 for medium quality and 2 for high quality
bins = [3, 5, 7, 10]
labels = [0, 1, 2]
# 3 to 5 quality are considered as 0 and similarly the rremaining
data['quality'] = pd.cut(data['quality'], bins=bins, labels=labels, include_lowest=True)

In [3]:
# Seperating the data from the dataset into features and target
X = data.drop('quality', axis=1)
y = data['quality']
# Balancing the data using smote as the dataset is imbalanced
smote = SMOTE(random_state=42)
X_smote, y_smote = smote.fit_resample(X, y)
# Splitting the dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X_smote, y_smote, test_size=0.2, random_state=42)
# Standardizing the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [8]:
# models rf and xg boost
rf_model = RandomForestClassifier(n_estimators=1000, max_depth=25, random_state=42)
xgb_model = XGBClassifier(n_estimators=1000, max_depth=10, objective='softmax', num_class=3, random_state=42)
# defining th stacking classifier
stacked_model = StackingClassifier(estimators=[('rf', rf_model),('xgb', xgb_model)],final_estimator=LogisticRegression(max_iter=1000, random_state=42),cv=2)
# training
stacked_model.fit(X_train_scaled, y_train)
# prediction
stacked_predictions = stacked_model.predict(X_test_scaled)


In [10]:
# Printing classification report,confusion matrix and normalized confusion matrix
print("-------------------------------------------------------")
print("Classification Report ")
print(classification_report(y_test, stacked_predictions))
print("-------------------------------------------------------")
print(" Confusion Matrix ")
cm = confusion_matrix(y_test, stacked_predictions)
print(cm)
print("-----------------------------------------")
print("Normalized Confusion Matrix ")
cm_n = confusion_matrix(y_test, stacked_predictions, normalize='true')
print(cm_n)
print("-----------------------------------------")

-------------------------------------------------------
Classification Report 
              precision    recall  f1-score   support

           0       0.88      0.84      0.86       630
           1       0.84      0.87      0.85       612
           2       0.98      0.99      0.99       605

    accuracy                           0.90      1847
   macro avg       0.90      0.90      0.90      1847
weighted avg       0.90      0.90      0.90      1847

-------------------------------------------------------
 Confusion Matrix 
[[530  99   1]
 [ 69 530  13]
 [  0   4 601]]
-----------------------------------------
Normalized Confusion Matrix 
[[0.84126984 0.15714286 0.0015873 ]
 [0.1127451  0.86601307 0.02124183]
 [0.         0.00661157 0.99338843]]
-----------------------------------------


: 