In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
%matplotlib inline

In [2]:
#loading Datasets
wine = pd.read_csv("winequality-red.csv", sep=";")


## Preprocessing Data

In [3]:
bins = (2, 6.5 , 8)
group_names =["bad", "good"]
wine["quality"] = pd.cut(wine["quality"], bins = bins, labels = group_names)
wine["quality"].unique()

['bad', 'good']
Categories (2, object): ['bad' < 'good']

In [4]:
label_quality = LabelEncoder()

In [5]:
wine["quality"] = label_quality.fit_transform(wine["quality"])

In [6]:
#seprate feature and label
X = wine.drop("quality", axis = 1)
y = wine["quality"]

In [7]:
#train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

## Standard Scaler

In [9]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

array([[ 0.21833164,  0.88971201,  0.19209222,  0.30972563, -0.04964208,
         0.69100692,  1.04293362,  1.84669643,  1.09349989,  0.45822284,
         1.12317723],
       [-1.29016623, -1.78878251,  0.65275338, -0.80507963, -0.45521361,
         2.38847304,  3.59387025, -3.00449133, -0.40043872, -0.40119696,
         1.40827174],
       [ 1.49475291, -0.78434707,  1.01104539, -0.52637831,  0.59927236,
        -0.95796016, -0.99174203,  0.76865471, -0.07566946,  0.51551749,
        -0.58738978],
       [ 0.27635078,  0.86181102, -0.06383064, -0.66572897, -0.00908493,
         0.01202048, -0.71842739,  0.08948842,  0.05423824, -1.08873281,
        -0.96751578],
       [ 0.04427419,  2.81487994, -0.62686095,  2.39998549, -0.31326357,
        -0.47296984,  0.2229897 ,  1.1998714 ,  0.37900751, -0.9741435 ,
        -0.49235828],
       [-0.07176411, -0.78434707,  1.11341454, -0.17800167,  0.21397941,
         3.01896045,  2.62208486,  0.60694845,  0.44396136,  1.89058918,
        -0.587

## Random Forest Classifier

In [17]:
rfc = RandomForestClassifier(n_estimators=200)
rfc.fit(X_train, y_train)
Predicted = rfc.predict(X_test)

In [21]:
#performance of RFC
print(classification_report(y_test, Predicted))

              precision    recall  f1-score   support

           0       0.92      0.97      0.94       273
           1       0.73      0.51      0.60        47

    accuracy                           0.90       320
   macro avg       0.82      0.74      0.77       320
weighted avg       0.89      0.90      0.89       320



## SVC Classifier

In [22]:
clf = svm.SVC()
clf.fit(X_train, y_train)
predict_clf = clf.predict(X_test)

In [23]:
print(classification_report(y_test, predict_clf))
print(confusion_matrix(y_test, predict_clf))

              precision    recall  f1-score   support

           0       0.88      0.98      0.93       273
           1       0.71      0.26      0.37        47

    accuracy                           0.88       320
   macro avg       0.80      0.62      0.65       320
weighted avg       0.86      0.88      0.85       320

[[268   5]
 [ 35  12]]


## Neural Network

In [24]:
mlpc = MLPClassifier(hidden_layer_sizes=(11,11,11), max_iter=500)
mlpc.fit(X_train, y_train)
predict_mlpc = mlpc.predict(X_test)



In [25]:
print(classification_report(y_test, predict_mlpc))
print(confusion_matrix(y_test, predict_mlpc))

              precision    recall  f1-score   support

           0       0.92      0.93      0.92       273
           1       0.56      0.51      0.53        47

    accuracy                           0.87       320
   macro avg       0.74      0.72      0.73       320
weighted avg       0.86      0.87      0.87       320

[[254  19]
 [ 23  24]]
