# Voting Classifier

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import cross_val_score, GridSearchCV, train_test_split

from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.pipeline import Pipeline

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import classification_report

In [6]:
wine = pd.read_csv('data\Data\white_wine.csv')
wine.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6.0
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6.0
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6.0
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6.0
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6.0


In [7]:
wine['alcohol'].fillna(wine['alcohol'].mean(), inplace=True)

In [8]:
wine['label'] = np.where(wine['quality']>6,1,0)
X = wine[['alcohol', 'density']]
y = wine['label']

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=2020)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((416, 2), (104, 2), (416,), (104,))

In [10]:
lr = LogisticRegression()
dt = DecisionTreeClassifier(max_depth=5)
knn = KNeighborsClassifier(n_neighbors=3)

In [11]:
for clfs, clf_name in zip([lr, dt, knn], ['LogisticRegression', 'DecisionTree', 'KNN']):
    clfs.fit(X_train, y_train)
    y_pred = clfs.predict(X_test)
    print(f'{clf_name}: \n {classification_report(y_test, y_pred)}')

LogisticRegression: 
               precision    recall  f1-score   support

           0       0.88      0.96      0.92        84
           1       0.75      0.45      0.56        20

    accuracy                           0.87       104
   macro avg       0.82      0.71      0.74       104
weighted avg       0.86      0.87      0.85       104

DecisionTree: 
               precision    recall  f1-score   support

           0       0.98      1.00      0.99        84
           1       1.00      0.90      0.95        20

    accuracy                           0.98       104
   macro avg       0.99      0.95      0.97       104
weighted avg       0.98      0.98      0.98       104

KNN: 
               precision    recall  f1-score   support

           0       0.94      0.94      0.94        84
           1       0.75      0.75      0.75        20

    accuracy                           0.90       104
   macro avg       0.85      0.85      0.85       104
weighted avg       0.90      

In [12]:
vc = VotingClassifier([
    ('clf1', lr),
    ('clf2', dt),
    ('clf3', knn)
], voting='soft')

In [13]:
vc.fit(X_train, y_train)

VotingClassifier(estimators=[('clf1',
                              LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='auto',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=None,
                                                 solver='lbfgs', tol=0.0001,
                                                 verbose=0, warm_start=False)),
                             ('clf2',
                              DecisionTreeClassifier(ccp_alpha=0.0,
                                                     class_weight=None,
                                                     criterion='gi...
                                        

In [14]:
y_pred_vc = vc.predict(X_test)

In [15]:
print(classification_report(y_test, y_pred_vc))

              precision    recall  f1-score   support

           0       0.95      1.00      0.98        84
           1       1.00      0.80      0.89        20

    accuracy                           0.96       104
   macro avg       0.98      0.90      0.93       104
weighted avg       0.96      0.96      0.96       104




## Modeling Improvement

In [None]:
# Transformer
poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
standard = StandardScaler()

pipeline_lr = Pipeline([
    ('preprocess', poly),
    ('clf', lr)
])

df = DecisionTreeClassifier(max_depth=5)

pipeline_knn = Pipeline([
    ('scaler', standard),
    ('clf', knn)
])