In [27]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import PolynomialFeatures
import warnings
warnings.filterwarnings("ignore")
np.random.seed(42)


In [28]:
df= pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv",sep=";")

In [29]:
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [30]:
df.shape

(4898, 12)

In [31]:
y=df.pop('quality')

In [32]:
y.head()

0    6
1    6
2    6
3    6
4    6
Name: quality, dtype: int64

In [33]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4898 entries, 0 to 4897
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fixed acidity         4898 non-null   float64
 1   volatile acidity      4898 non-null   float64
 2   citric acid           4898 non-null   float64
 3   residual sugar        4898 non-null   float64
 4   chlorides             4898 non-null   float64
 5   free sulfur dioxide   4898 non-null   float64
 6   total sulfur dioxide  4898 non-null   float64
 7   density               4898 non-null   float64
 8   pH                    4898 non-null   float64
 9   sulphates             4898 non-null   float64
 10  alcohol               4898 non-null   float64
dtypes: float64(11)
memory usage: 421.0 KB


In [34]:
df.shape

(4898, 11)

In [35]:
for i in df.columns:
    df[i]=df[i].fillna(np.mean(df[i]))

In [36]:
train,test, y_train, y_test=train_test_split(df,y,test_size=0.2,random_state=101)

In [37]:
lr=LogisticRegression()
lr.fit(train,y_train)
y_pred=lr.predict(test)
print("Accuracy Score Baseline:", accuracy_score(y_test,y_pred))

Accuracy Score Baseline: 0.47346938775510206


In [38]:
def fit_predict(train,test, y_train, y_test, scaler, max_depth, criterion="entropy",max_features=1,min_samples_split=4):
    train_scaled=scaler.fit_transform(train)
    test_scaled= scaler.transform(test)
    dt=DecisionTreeClassifier(criterion=criterion, max_depth=max_depth,random_state=42,max_features=max_features,
                             min_samples_split=min_samples_split)
    dt.fit(train_scaled,y_train)
    y_pred=dt.predict(test_scaled)
    print(accuracy_score(y_test,y_pred))

In [39]:
dt=DecisionTreeClassifier()
dt.fit(train,y_train)
y_pred=dt.predict(test)
print(accuracy_score(y_test,y_pred))

0.5989795918367347


# Max Depth Tuning

In [11]:
for i in range(1,25):
    print('Accuracy Score using max depth=',i,end=': ')
    fit_predict(train,test,y_train,y_test,StandardScaler(),i)

Accuracy Score using max depth= 1: 0.4418367346938776
Accuracy Score using max depth= 2: 0.4418367346938776
Accuracy Score using max depth= 3: 0.4326530612244898
Accuracy Score using max depth= 4: 0.4489795918367347
Accuracy Score using max depth= 5: 0.45918367346938777
Accuracy Score using max depth= 6: 0.4704081632653061
Accuracy Score using max depth= 7: 0.4806122448979592
Accuracy Score using max depth= 8: 0.47653061224489796
Accuracy Score using max depth= 9: 0.48673469387755103
Accuracy Score using max depth= 10: 0.4928571428571429
Accuracy Score using max depth= 11: 0.4857142857142857
Accuracy Score using max depth= 12: 0.5346938775510204
Accuracy Score using max depth= 13: 0.5346938775510204
Accuracy Score using max depth= 14: 0.5469387755102041
Accuracy Score using max depth= 15: 0.5448979591836735
Accuracy Score using max depth= 16: 0.5428571428571428
Accuracy Score using max depth= 17: 0.5418367346938775
Accuracy Score using max depth= 18: 0.5540816326530612
Accuracy Score u

# Max Features Tuning

In [12]:
for i in np.arange(0.1, 1.0, 0.1):
    print('Accuracy Score using Max Features:',i, end=': ')
    fit_predict(train,test,y_train,y_test,StandardScaler(),max_depth=19,max_features=i)

Accuracy Score using Max Features: 0.1: 0.5561224489795918
Accuracy Score using Max Features: 0.2: 0.573469387755102
Accuracy Score using Max Features: 0.30000000000000004: 0.5857142857142857
Accuracy Score using Max Features: 0.4: 0.5887755102040816
Accuracy Score using Max Features: 0.5: 0.5867346938775511
Accuracy Score using Max Features: 0.6: 0.5581632653061225
Accuracy Score using Max Features: 0.7000000000000001: 0.5581632653061225
Accuracy Score using Max Features: 0.8: 0.5857142857142857
Accuracy Score using Max Features: 0.9: 0.5887755102040816


# Min Samples Split Tuning

In [13]:
for i in range(2,10):
    print('Accuracy Score using min samples split=',i,end=': ')
    fit_predict(train,test,y_train,y_test, StandardScaler(),19,max_features=0.9,min_samples_split=i)

Accuracy Score using min samples split= 2: 0.6030612244897959
Accuracy Score using min samples split= 3: 0.576530612244898
Accuracy Score using min samples split= 4: 0.5887755102040816
Accuracy Score using min samples split= 5: 0.5836734693877551
Accuracy Score using min samples split= 6: 0.5714285714285714
Accuracy Score using min samples split= 7: 0.5612244897959183
Accuracy Score using min samples split= 8: 0.5438775510204081
Accuracy Score using min samples split= 9: 0.560204081632653


# Criterion Tuning

In [14]:
for i in ['gini','entropy']:
    print('Accuracy Score using Criterion=',i, end =': ')
    fit_predict(train,test,y_train,y_test, StandardScaler(), 19, max_features=0.9, min_samples_split=2,criterion =i)

Accuracy Score using Criterion= gini: 0.5877551020408164
Accuracy Score using Criterion= entropy: 0.6030612244897959


In [15]:
def create_poly(train,test,degree):
    poly=PolynomialFeatures(degree=degree)
    train_poly = poly.fit_transform(train)
    test_poly = poly.fit_transform(test)
    return train_poly,test_poly

In [16]:
for degree in [1,2,3,4,5]:
    train_poly,test_poly= create_poly(train,test,degree)
    print('polynominal degree:',degree)
    fit_predict(train_poly, test_poly,y_train,y_test, StandardScaler(),19, max_features=0.9, min_samples_split=2, criterion='entropy')
    print(10*'-')
    
train_poly,test_poly= create_poly(train,test,2)

polynominal degree: 1
0.5857142857142857
----------
polynominal degree: 2
0.5836734693877551
----------
polynominal degree: 3
0.5653061224489796
----------
polynominal degree: 4
0.5938775510204082
----------
polynominal degree: 5
0.5979591836734693
----------


In [17]:
def feat_eng(df):
    df['eng1'] = df['fixed acidity'] * df['pH']
    df['eng2']= df['total sulfur dioxide'] / df['free sulfur dioxide']
    df['eng3']= df['sulphates']/df['chlorides']
    df['eng4']= df['chlorides']/df['sulphates']
    return df

train= feat_eng(train)
test=feat_eng(test)

print('Additional feature Engineering:')

fit_predict(train,test,y_train,y_test, StandardScaler(),19,max_features=0.9,min_samples_split=2, criterion='entropy')

train_poly,test_poly=create_poly(train,test,2)

fit_predict(train_poly,test_poly,y_train,y_test, StandardScaler(),19,max_features=0.9,min_samples_split=2, criterion='entropy')

Additional feature Engineering:
0.5826530612244898
0.6051020408163266


In [18]:
train1=feat_eng(df)
train1.columns

Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'eng1', 'eng2', 'eng3', 'eng4'],
      dtype='object')

In [19]:
train.columns

Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'eng1', 'eng2', 'eng3', 'eng4'],
      dtype='object')

# RANDOM FOREST

In [20]:
from sklearn.ensemble import RandomForestClassifier

In [21]:
rf=RandomForestClassifier(criterion='gini')

In [22]:
rf.fit(train,y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [23]:
pred_rf=rf.predict(test)

In [24]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test,pred_rf))

0.6846938775510204


In [25]:
from sklearn.model_selection import GridSearchCV

In [26]:
params= {'n_estimators':[200,500,700],'max_depth':[10,15,18,20],
        'min_samples_leaf':[3,5,7]}

In [27]:
gs=GridSearchCV(rf,params,verbose=3)

In [28]:
gs.fit(train,y_train)

Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV] max_depth=10, min_samples_leaf=3, n_estimators=200 ..............


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  max_depth=10, min_samples_leaf=3, n_estimators=200, score=0.608, total=   1.6s
[CV] max_depth=10, min_samples_leaf=3, n_estimators=200 ..............


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.5s remaining:    0.0s


[CV]  max_depth=10, min_samples_leaf=3, n_estimators=200, score=0.602, total=   1.6s
[CV] max_depth=10, min_samples_leaf=3, n_estimators=200 ..............


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    3.1s remaining:    0.0s


[CV]  max_depth=10, min_samples_leaf=3, n_estimators=200, score=0.625, total=   1.6s
[CV] max_depth=10, min_samples_leaf=3, n_estimators=200 ..............
[CV]  max_depth=10, min_samples_leaf=3, n_estimators=200, score=0.617, total=   1.5s
[CV] max_depth=10, min_samples_leaf=3, n_estimators=200 ..............
[CV]  max_depth=10, min_samples_leaf=3, n_estimators=200, score=0.619, total=   1.6s
[CV] max_depth=10, min_samples_leaf=3, n_estimators=500 ..............
[CV]  max_depth=10, min_samples_leaf=3, n_estimators=500, score=0.616, total=   3.7s
[CV] max_depth=10, min_samples_leaf=3, n_estimators=500 ..............
[CV]  max_depth=10, min_samples_leaf=3, n_estimators=500, score=0.602, total=   3.7s
[CV] max_depth=10, min_samples_leaf=3, n_estimators=500 ..............
[CV]  max_depth=10, min_samples_leaf=3, n_estimators=500, score=0.635, total=   3.7s
[CV] max_depth=10, min_samples_leaf=3, n_estimators=500 ..............
[CV]  max_depth=10, min_samples_leaf=3, n_estimators=500, score=

[CV]  max_depth=15, min_samples_leaf=3, n_estimators=700, score=0.647, total=   8.0s
[CV] max_depth=15, min_samples_leaf=3, n_estimators=700 ..............
[CV]  max_depth=15, min_samples_leaf=3, n_estimators=700, score=0.628, total=   7.3s
[CV] max_depth=15, min_samples_leaf=3, n_estimators=700 ..............
[CV]  max_depth=15, min_samples_leaf=3, n_estimators=700, score=0.662, total=   6.1s
[CV] max_depth=15, min_samples_leaf=3, n_estimators=700 ..............
[CV]  max_depth=15, min_samples_leaf=3, n_estimators=700, score=0.646, total=   6.0s
[CV] max_depth=15, min_samples_leaf=3, n_estimators=700 ..............
[CV]  max_depth=15, min_samples_leaf=3, n_estimators=700, score=0.653, total=   6.0s
[CV] max_depth=15, min_samples_leaf=5, n_estimators=200 ..............
[CV]  max_depth=15, min_samples_leaf=5, n_estimators=200, score=0.630, total=   1.7s
[CV] max_depth=15, min_samples_leaf=5, n_estimators=200 ..............
[CV]  max_depth=15, min_samples_leaf=5, n_estimators=200, score=

[CV]  max_depth=18, min_samples_leaf=5, n_estimators=200, score=0.632, total=   1.7s
[CV] max_depth=18, min_samples_leaf=5, n_estimators=200 ..............
[CV]  max_depth=18, min_samples_leaf=5, n_estimators=200, score=0.627, total=   1.7s
[CV] max_depth=18, min_samples_leaf=5, n_estimators=500 ..............
[CV]  max_depth=18, min_samples_leaf=5, n_estimators=500, score=0.633, total=   4.2s
[CV] max_depth=18, min_samples_leaf=5, n_estimators=500 ..............
[CV]  max_depth=18, min_samples_leaf=5, n_estimators=500, score=0.608, total=   4.2s
[CV] max_depth=18, min_samples_leaf=5, n_estimators=500 ..............
[CV]  max_depth=18, min_samples_leaf=5, n_estimators=500, score=0.649, total=   4.3s
[CV] max_depth=18, min_samples_leaf=5, n_estimators=500 ..............
[CV]  max_depth=18, min_samples_leaf=5, n_estimators=500, score=0.630, total=   4.3s
[CV] max_depth=18, min_samples_leaf=5, n_estimators=500 ..............
[CV]  max_depth=18, min_samples_leaf=5, n_estimators=500, score=

[CV]  max_depth=20, min_samples_leaf=5, n_estimators=700, score=0.605, total=   5.9s
[CV] max_depth=20, min_samples_leaf=5, n_estimators=700 ..............
[CV]  max_depth=20, min_samples_leaf=5, n_estimators=700, score=0.649, total=   6.1s
[CV] max_depth=20, min_samples_leaf=5, n_estimators=700 ..............
[CV]  max_depth=20, min_samples_leaf=5, n_estimators=700, score=0.630, total=   6.1s
[CV] max_depth=20, min_samples_leaf=5, n_estimators=700 ..............
[CV]  max_depth=20, min_samples_leaf=5, n_estimators=700, score=0.635, total=   5.8s
[CV] max_depth=20, min_samples_leaf=7, n_estimators=200 ..............
[CV]  max_depth=20, min_samples_leaf=7, n_estimators=200, score=0.619, total=   1.6s
[CV] max_depth=20, min_samples_leaf=7, n_estimators=200 ..............
[CV]  max_depth=20, min_samples_leaf=7, n_estimators=200, score=0.591, total=   1.5s
[CV] max_depth=20, min_samples_leaf=7, n_estimators=200 ..............
[CV]  max_depth=20, min_samples_leaf=7, n_estimators=200, score=

[Parallel(n_jobs=1)]: Done 180 out of 180 | elapsed: 11.5min finished


GridSearchCV(cv=None, error_score=nan,
             estimator=RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                              class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              max_samples=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators=100, n_jobs=None,
                                              oob_score=False,
                                              ra

In [30]:
from sklearn.model_selection import cross_val_score

In [32]:
cross_val_accuracy=cross_val_score(rf,df,y,scoring='accuracy',cv=10)

In [33]:
cross_val_accuracy

array([0.51632653, 0.53877551, 0.48979592, 0.49183673, 0.52040816,
       0.59387755, 0.49795918, 0.54285714, 0.56850716, 0.52556237])

In [34]:
from sklearn.metrics import confusion_matrix,classification_report

In [35]:
print(confusion_matrix(y_test,pred_rf))

[[  0   0   3   2   0   0]
 [  0   9  19  13   0   0]
 [  0   2 207  95   2   0]
 [  0   0  56 353  23   1]
 [  0   0   4  67  87   0]
 [  0   0   0  15   7  15]]


In [36]:
print(classification_report(y_test,pred_rf))

              precision    recall  f1-score   support

           3       0.00      0.00      0.00         5
           4       0.82      0.22      0.35        41
           5       0.72      0.68      0.70       306
           6       0.65      0.82      0.72       433
           7       0.73      0.55      0.63       158
           8       0.94      0.41      0.57        37

    accuracy                           0.68       980
   macro avg       0.64      0.44      0.49       980
weighted avg       0.70      0.68      0.67       980



In [29]:
gs.best_params_

{'max_depth': 20, 'min_samples_leaf': 3, 'n_estimators': 700}

In [38]:
gs.best_estimator_

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=20, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=3, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=700,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

rf1 same as above

In [47]:
rf1= RandomForestClassifier(bootstrap=True,class_weight=None, criterion='gini',max_depth=18,max_features='auto',
                          max_leaf_nodes=None,min_impurity_decrease=0.0, min_impurity_split=None,min_samples_leaf=3,
                          min_samples_split=2,min_weight_fraction_leaf=0.0, n_estimators=200,n_jobs=None,
                          oob_score=True, random_state=101,verbose=0,warm_start=False)

In [None]:
gs.fit(train,y_train)  #not needed

In [48]:
rf1.fit(train,y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=18, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=3, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=200,
                       n_jobs=None, oob_score=True, random_state=101, verbose=0,
                       warm_start=False)

In [49]:
rf1.oob_score_

0.6613067891781521

In [50]:
pred_rf1=rf1.predict(test)

In [51]:
print(accuracy_score(y_test,pred_rf1))

0.6551020408163265


In [52]:
rf1.feature_importances_

array([0.04534775, 0.08304531, 0.06023127, 0.06590562, 0.05468137,
       0.06331117, 0.06638535, 0.08442805, 0.06125381, 0.0494871 ,
       0.11065323, 0.05978584, 0.07110379, 0.06098079, 0.06339955])

In [54]:
sorted(list(zip(rf1.feature_importances_,df.columns)),reverse= True)

[(0.11065323395750853, 'alcohol'),
 (0.0844280469937049, 'density'),
 (0.08304530930186114, 'volatile acidity'),
 (0.06638534621531496, 'total sulfur dioxide'),
 (0.06590562044991818, 'residual sugar'),
 (0.06331117319896117, 'free sulfur dioxide'),
 (0.06125381152623091, 'pH'),
 (0.06023127339784735, 'citric acid'),
 (0.0546813703083246, 'chlorides'),
 (0.04948709828745739, 'sulphates'),
 (0.045347745048974854, 'fixed acidity')]

In [59]:
sorted(list(zip(rf1.feature_importances_,train1.columns)),reverse= True)

[(0.11065323395750853, 'alcohol'),
 (0.0844280469937049, 'density'),
 (0.08304530930186114, 'volatile acidity'),
 (0.07110378788318274, 'eng2'),
 (0.06638534621531496, 'total sulfur dioxide'),
 (0.06590562044991818, 'residual sugar'),
 (0.06339955376246868, 'eng4'),
 (0.06331117319896117, 'free sulfur dioxide'),
 (0.06125381152623091, 'pH'),
 (0.06098078811594788, 'eng3'),
 (0.06023127339784735, 'citric acid'),
 (0.05978584155229692, 'eng1'),
 (0.0546813703083246, 'chlorides'),
 (0.04948709828745739, 'sulphates'),
 (0.045347745048974854, 'fixed acidity')]

In [56]:
df.corr()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
fixed acidity,1.0,-0.022697,0.289181,0.089021,0.023086,-0.049396,0.09107,0.265331,-0.425858,-0.017143,-0.120881
volatile acidity,-0.022697,1.0,-0.149472,0.064286,0.070512,-0.097012,0.089261,0.027114,-0.031915,-0.035728,0.067718
citric acid,0.289181,-0.149472,1.0,0.094212,0.114364,0.094077,0.121131,0.149503,-0.163748,0.062331,-0.075729
residual sugar,0.089021,0.064286,0.094212,1.0,0.088685,0.299098,0.401439,0.838966,-0.194133,-0.026664,-0.450631
chlorides,0.023086,0.070512,0.114364,0.088685,1.0,0.101392,0.19891,0.257211,-0.090439,0.016763,-0.360189
free sulfur dioxide,-0.049396,-0.097012,0.094077,0.299098,0.101392,1.0,0.615501,0.29421,-0.000618,0.059217,-0.250104
total sulfur dioxide,0.09107,0.089261,0.121131,0.401439,0.19891,0.615501,1.0,0.529881,0.002321,0.134562,-0.448892
density,0.265331,0.027114,0.149503,0.838966,0.257211,0.29421,0.529881,1.0,-0.093591,0.074493,-0.780138
pH,-0.425858,-0.031915,-0.163748,-0.194133,-0.090439,-0.000618,0.002321,-0.093591,1.0,0.155951,0.121432
sulphates,-0.017143,-0.035728,0.062331,-0.026664,0.016763,0.059217,0.134562,0.074493,0.155951,1.0,-0.017433


# Code to find the Node column and Node condition for Decision Tree:

In [40]:
from sklearn.tree import export_text

In [41]:
import sklearn as sk
sk.__version__

'0.22.1'

In [42]:
print(sklearn.__version__)

NameError: name 'sklearn' is not defined

In [43]:
r = export_text(dt, feature_names=list(train))

In [44]:
print(r)

|--- alcohol <= 10.85
|   |--- volatile acidity <= 0.24
|   |   |--- alcohol <= 8.85
|   |   |   |--- volatile acidity <= 0.16
|   |   |   |   |--- fixed acidity <= 7.10
|   |   |   |   |   |--- class: 6
|   |   |   |   |--- fixed acidity >  7.10
|   |   |   |   |   |--- class: 7
|   |   |   |--- volatile acidity >  0.16
|   |   |   |   |--- pH <= 2.95
|   |   |   |   |   |--- free sulfur dioxide <= 54.50
|   |   |   |   |   |   |--- class: 8
|   |   |   |   |   |--- free sulfur dioxide >  54.50
|   |   |   |   |   |   |--- class: 6
|   |   |   |   |--- pH >  2.95
|   |   |   |   |   |--- sulphates <= 0.42
|   |   |   |   |   |   |--- chlorides <= 0.06
|   |   |   |   |   |   |   |--- total sulfur dioxide <= 152.00
|   |   |   |   |   |   |   |   |--- class: 5
|   |   |   |   |   |   |   |--- total sulfur dioxide >  152.00
|   |   |   |   |   |   |   |   |--- residual sugar <= 15.52
|   |   |   |   |   |   |   |   |   |--- class: 5
|   |   |   |   |   |   |   |   |--- residual sugar > 

|--- alcohol <= 10.85
|   |--- volatile acidity <= 0.24
|   |   |--- alcohol <= 8.85
|   |   |   |--- pH <= 2.92
|   |   |   |   |--- class: 7
|   |   |   |--- pH >  2.92
|   |   |   |   |--- class: 5
|   |   |--- alcohol >  8.85
|   |   |   |--- free sulfur dioxide <= 13.50
|   |   |   |   |--- class: 5
|   |   |   |--- free sulfur dioxide >  13.50
|   |   |   |   |--- class: 6
|   |--- volatile acidity >  0.24
|   |   |--- alcohol <= 9.85
|   |   |   |--- citric acid <= 0.27
|   |   |   |   |--- class: 5
|   |   |   |--- citric acid >  0.27
|   |   |   |   |--- class: 5
|   |   |--- alcohol >  9.85
|   |   |   |--- free sulfur dioxide <= 20.50
|   |   |   |   |--- class: 5
|   |   |   |--- free sulfur dioxide >  20.50
|   |   |   |   |--- class: 6
|--- alcohol >  10.85
|   |--- alcohol <= 12.55
|   |   |--- free sulfur dioxide <= 11.50
|   |   |   |--- citric acid <= 0.24
|   |   |   |   |--- class: 4
|   |   |   |--- citric acid >  0.24
|   |   |   |   |--- class: 6
|   |   |--- free sulfur dioxide >  11.50
|   |   |   |--- sulphates <= 0.57
|   |   |   |   |--- class: 6
|   |   |   |--- sulphates >  0.57
|   |   |   |   |--- class: 7
|   |--- alcohol >  12.55
|   |   |--- chlorides <= 0.05
|   |   |   |--- free sulfur dioxide <= 8.50
|   |   |   |   |--- class: 6
|   |   |   |--- free sulfur dioxide >  8.50
|   |   |   |   |--- class: 7
|   |   |--- chlorides >  0.05
|   |   |   |--- citric acid <= 0.40
|   |   |   |   |--- class: 6
|   |   |   |--- citric acid >  0.40
|   |   |   |   |--- class: 5