In [38]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, classification_report
import joblib
import os
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score

In [39]:
# Step 2: Define the Class
class WineModel:
    def __init__(self, url):
        # Load dataset directly from URL
        self.data = pd.read_csv(url, sep=';')
        print("Dataset Loaded. Shape:", self.data.shape)
        self.X = None
        self.y = None
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None
        self.models = {}
        self.predictions = {}
        self.results_df = None

    def prepare_data(self, target_column="quality", test_size=0.2, random_state=42, binary=False):
      self.X = self.data.drop(target_column, axis=1)
      self.y = self.data[target_column]

      if binary:
          self.y = self.y.apply(lambda x: 1 if x>=6 else 0)
      else:
          # Multi-class: map to 0-based labels
          class_mapping = {int(k): i for i, k in enumerate(sorted(self.y.unique()))}
          self.y = self.y.map(class_mapping)
          print("Class mapping:", class_mapping)

      # Train/test split first
      self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
        self.X,
        self.y,
        test_size=test_size,
        random_state=random_state,
        stratify=self.y)


      # Feature scaling (fit only on training data)
      scaler = StandardScaler()
      self.X_train = scaler.fit_transform(self.X_train)
      self.X_test = scaler.transform(self.X_test)
      print(f"Data split: Train={self.X_train.shape[0]}, Test={self.X_test.shape[0]}")

    def train_models(self):
        # Logistic Regression
        self.models['Logistic Regression'] = LogisticRegression(max_iter=1000)
        self.models['Logistic Regression'].fit(self.X_train, self.y_train)
        self.predictions['Logistic Regression'] = self.models['Logistic Regression'].predict(self.X_test)

        # Decision Tree
        self.models['Decision Tree'] = DecisionTreeClassifier(random_state=42)
        self.models['Decision Tree'].fit(self.X_train, self.y_train)
        self.predictions['Decision Tree'] = self.models['Decision Tree'].predict(self.X_test)

        # KNN
        self.models['KNN'] = KNeighborsClassifier(n_neighbors=5)
        self.models['KNN'].fit(self.X_train, self.y_train)
        self.predictions['KNN'] = self.models['KNN'].predict(self.X_test)

        # Naive Bayes
        self.models['Naive Bayes'] = GaussianNB()
        self.models['Naive Bayes'].fit(self.X_train, self.y_train)
        self.predictions['Naive Bayes'] = self.models['Naive Bayes'].predict(self.X_test)

        # Random Forest
        self.models['Random Forest'] = RandomForestClassifier(n_estimators=100, random_state=42)
        self.models['Random Forest'].fit(self.X_train, self.y_train)
        self.predictions['Random Forest'] = self.models['Random Forest'].predict(self.X_test)

        # XGBoost
        self.models['XGBoost'] = xgb.XGBClassifier(eval_metric='mlogloss', random_state=42)
        self.models['XGBoost'].fit(self.X_train, self.y_train)
        self.predictions['XGBoost'] = self.models['XGBoost'].predict(self.X_test)

        print("All models trained successfully!")

    def evaluate_models(self):

      results = []

      for name, model in self.models.items():

          y_pred = self.predictions[name]
          y_proba = model.predict_proba(self.X_test)

          acc = accuracy_score(self.y_test, y_pred)

          # Multi-class AUC
          auc = roc_auc_score(
              self.y_test,
              y_proba,
              multi_class='ovr'
          )

          prec = precision_score(
              self.y_test, y_pred,
              average='weighted',
              zero_division=0
          )

          rec = recall_score(
              self.y_test, y_pred,
              average='weighted',
              zero_division=0
          )

          f1 = f1_score(
              self.y_test, y_pred,
              average='weighted',
              zero_division=0
          )

          mcc = matthews_corrcoef(self.y_test, y_pred)

          results.append([name, acc, auc, prec, rec, f1, mcc])

      results_df = pd.DataFrame(
          results,
          columns=["Model", "Accuracy", "AUC", "Precision", "Recall", "F1 Score", "MCC"]
      )

      return results_df

    def save_models(self, folder="model"):
        os.makedirs(folder, exist_ok=True)
        for name, model in self.models.items():
            filename = f"{folder}/{name.replace(' ', '_').lower()}_model.pkl"
            joblib.dump(model, filename)
        print(f"All models saved in '{folder}/' folder.")

In [40]:
# Step 3: Use the Class
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
wine_model = WineModel(url)

Dataset Loaded. Shape: (1599, 12)


In [41]:
# Prepare Data (binary=False for multi-class)
wine_model.prepare_data(binary=False)

Class mapping: {3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5}
Data split: Train=1279, Test=320


In [42]:
# Train all 6 models
wine_model.train_models()

All models trained successfully!


In [43]:
# Evaluate models and show comparison table
results_df = wine_model.evaluate_models()
print("Model Comparison Table:")
results_df

Model Comparison Table:


Unnamed: 0,Model,Accuracy,AUC,Precision,Recall,F1 Score,MCC
0,Logistic Regression,0.590625,0.76399,0.569525,0.590625,0.567298,0.32502
1,Decision Tree,0.609375,0.658352,0.612092,0.609375,0.609477,0.398241
2,KNN,0.609375,0.698329,0.584116,0.609375,0.595887,0.373313
3,Naive Bayes,0.5625,0.683783,0.574461,0.5625,0.568067,0.329911
4,Random Forest,0.675,0.766131,0.650369,0.675,0.660332,0.476837
5,XGBoost,0.653125,0.798961,0.648027,0.653125,0.643372,0.445301


In [44]:
# Save all trained models
wine_model.save_models()

All models saved in 'model/' folder.
