In [1]:
import numpy
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder

df = pd.read_csv('winequality-red.csv')

In [2]:
X = df.loc[:,'fixed acidity':'alcohol']

scaler = StandardScaler().fit(X)

In [3]:
bins = (2, 5.5, 8)
group_names = ['bad', 'good']
df['quality'] = pd.cut(df['quality'], bins = bins, labels = group_names)
label_quality = LabelEncoder()
df['quality'] = label_quality.fit_transform(df['quality'])
y = df['quality']

In [5]:
from sklearn.model_selection import train_test_split, GridSearchCV
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [6]:
#First we have implemented the BaggigngClassifier from sklearn without tuning the parameters.
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
b=BaggingClassifier()
b.fit(X_train,y_train)
pred = b.predict(X_test)
print('Confusion matrix:')
print(confusion_matrix(pred,y_test))
print('\nAccuracy:')
print(accuracy_score(pred,y_test))
print(classification_report(pred,y_test))

Confusion matrix:
[[114  35]
 [ 27 144]]

Accuracy:
0.80625
              precision    recall  f1-score   support

           0       0.81      0.77      0.79       149
           1       0.80      0.84      0.82       171

    accuracy                           0.81       320
   macro avg       0.81      0.80      0.80       320
weighted avg       0.81      0.81      0.81       320



In [14]:
#As the only tweakable parameters of Bagging Classifier is max_features which needs to be considere and number of estimators that is the trees so we use GridSearch to estimate these parameters. With the features being 11 so we have kept the window between 1 and 10 with the number of trees between 40 and 50.
parameter_candidates = {
    'max_features' :list(range(1,10)),
    'n_estimators': list(range(40,50))
}

grid_b = GridSearchCV(estimator = b, param_grid= parameter_candidates, scoring= 'accuracy', cv = 5)
grid_b.fit(X_train, y_train)
grid_b.best_params_

{'max_features': 8, 'n_estimators': 42}

In [15]:
b1=BaggingClassifier(max_features=8, n_estimators=42)
b1.fit(X_train,y_train)
pred = b1.predict(X_test)
print('Confusion matrix:')
print(confusion_matrix(pred,y_test))
print('\nAccuracy:')
print(accuracy_score(pred,y_test))
print(classification_report(pred,y_test))

Confusion matrix:
[[111  38]
 [ 30 141]]

Accuracy:
0.7875
              precision    recall  f1-score   support

           0       0.79      0.74      0.77       149
           1       0.79      0.82      0.81       171

    accuracy                           0.79       320
   macro avg       0.79      0.78      0.79       320
weighted avg       0.79      0.79      0.79       320

