In [10]:
# Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, f1_score
from xgboost import XGBClassifier
import joblib




In [9]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-3.0.2-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.2-py3-none-win_amd64.whl (150.0 MB)
   ---------------------------------------- 0.0/150.0 MB ? eta -:--:--
   ---------------------------------------- 0.1/150.0 MB 2.0 MB/s eta 0:01:16
   ---------------------------------------- 0.2/150.0 MB 2.1 MB/s eta 0:01:12
   ---------------------------------------- 0.3/150.0 MB 2.4 MB/s eta 0:01:03
   ---------------------------------------- 0.4/150.0 MB 2.3 MB/s eta 0:01:06
   ---------------------------------------- 0.5/150.0 MB 2.3 MB/s eta 0:01:05
   ---------------------------------------- 0.6/150.0 MB 2.2 MB/s eta 0:01:08
   ---------------------------------------- 0.7/150.0 MB 2.3 MB/s eta 0:01:05
   ---------------------------------------- 0.8/150.0 MB 2.2 MB/s eta 0:01:08
   ---------------------------------------- 0.9/150.0 MB 2.2 MB/s eta 0:01:07
   ---------------------------------------- 1.1/150.0 MB 2.4 MB/s eta 0:01:03
 

In [11]:
# Load Data
df = pd.read_csv(r"C:/Users/mahen/Downloads/archive/WineQT.csv")
df

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,Id
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5,0
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5,1
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5,2
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6,3
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1138,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6,1592
1139,6.8,0.620,0.08,1.9,0.068,28.0,38.0,0.99651,3.42,0.82,9.5,6,1593
1140,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5,1594
1141,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6,1595


In [12]:
# Relabel Target
def relabel(q):
    if q <= 4:
        return 0
    elif q <= 6:
        return 1
    else:
        return 2

df['quality'] = df['quality'].apply(relabel)



In [13]:
# Split Data
X = df.drop('quality', axis=1)
y = df['quality']
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

# Scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Models
rf = RandomForestClassifier()
gb = GradientBoostingClassifier()
xgb = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')

# Voting Ensemble
voting = VotingClassifier(estimators=[
    ('rf', rf), ('gb', gb), ('xgb', xgb)
], voting='soft')

voting.fit(X_train_scaled, y_train)



Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [14]:

# Evaluation
preds = voting.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test, preds))
print("F1 Score:", f1_score(y_test, preds, average='macro'))
print(classification_report(y_test, preds))


Accuracy: 0.9039301310043668
F1 Score: 0.5674994135585268
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         8
           1       0.91      0.98      0.94       189
           2       0.85      0.69      0.76        32

    accuracy                           0.90       229
   macro avg       0.59      0.56      0.57       229
weighted avg       0.87      0.90      0.89       229



  labels to calculate F-beta score for.
  labels to calculate F-beta score for.
  labels to calculate F-beta score for.


In [16]:
# Save model & scaler
joblib.dump(voting, "C:/Users/mahen/Downloads/archive/final_model.pkl")
joblib.dump(scaler, "C:/Users/mahen/Downloads/archive/scaler.pkl")

['C:/Users/mahen/Downloads/archive/scaler.pkl']

In [7]:
!pip install scikit-learn==1.4.2

Collecting scikit-learn==1.4.2
  Downloading scikit_learn-1.4.2-cp311-cp311-win_amd64.whl.metadata (11 kB)
Downloading scikit_learn-1.4.2-cp311-cp311-win_amd64.whl (10.6 MB)
   ---------------------------------------- 0.0/10.6 MB ? eta -:--:--
   -- ------------------------------------- 0.6/10.6 MB 13.6 MB/s eta 0:00:01
   --- ------------------------------------ 1.0/10.6 MB 13.3 MB/s eta 0:00:01
   --- ------------------------------------ 1.0/10.6 MB 13.3 MB/s eta 0:00:01
   --- ------------------------------------ 1.0/10.6 MB 13.3 MB/s eta 0:00:01
   --- ------------------------------------ 1.0/10.6 MB 13.3 MB/s eta 0:00:01
   --- ------------------------------------ 1.0/10.6 MB 13.3 MB/s eta 0:00:01
   --- ------------------------------------ 1.0/10.6 MB 13.3 MB/s eta 0:00:01
   --- ------------------------------------ 1.0/10.6 MB 13.3 MB/s eta 0:00:01
   ----- ---------------------------------- 1.4/10.6 MB 3.2 MB/s eta 0:00:03
   ------- -------------------------------- 2.1/10.6 MB