In [163]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.datasets import load_wine

In [164]:
data=pd.read_csv("/content/winequality_red.csv")
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [165]:
data.tail()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
1594,6.2,0.6,0.08,2.0,0.09,32.0,44.0,0.9949,3.45,0.58,10.5,5
1595,5.9,0.55,0.1,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6
1596,6.3,0.51,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5
1598,6.0,0.31,0.47,3.6,0.067,18.0,42.0,0.99549,3.39,0.66,11.0,6


 unique() will tell the unique values in the quality column

In [166]:
data['quality'].unique()

array([5, 6, 7, 4, 8, 3])

Here bins will have 2 bins i.e bad and good.
values greater than 6 will be good quality
values less than 6 will be bad quality

In [167]:
bins = (2, 6, 8)       
group_names = ['bad', 'good']
data['quality'] = pd.cut(data['quality'], bins = bins, labels = group_names) 
data['quality'].unique()

['bad', 'good']
Categories (2, object): ['bad' < 'good']

LabelEncoder() which encodes labels with values between 0 and 1

In [168]:
label_quality = LabelEncoder()
data['quality'] = label_quality.fit_transform(data['quality'])

Here, quality is the dependent variable and rest of others are independent variables

In [169]:
from sklearn.model_selection import train_test_split
x=data.drop('quality',axis=1)
y=data['quality']

Splitting the dataset into train and test splits

random_state controls the shuffling applied to the data before applying the split

In [170]:
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.30,random_state=1)

In [171]:
xtrain.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
126,8.2,1.33,0.0,1.7,0.081,3.0,12.0,0.9964,3.53,0.49,10.9
810,7.3,0.49,0.1,2.6,0.068,4.0,14.0,0.99562,3.3,0.47,10.5
635,8.7,0.84,0.0,1.4,0.065,24.0,33.0,0.9954,3.27,0.55,9.7
598,8.5,0.585,0.18,2.1,0.078,5.0,30.0,0.9967,3.2,0.48,9.8
880,9.2,0.56,0.18,1.6,0.078,10.0,21.0,0.99576,3.15,0.49,9.9


In [172]:
xtest.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
75,8.8,0.41,0.64,2.2,0.093,9.0,42.0,0.9986,3.54,0.66,10.5
1283,8.7,0.63,0.28,2.7,0.096,17.0,69.0,0.99734,3.26,0.63,10.2
408,10.4,0.34,0.58,3.7,0.174,6.0,16.0,0.997,3.19,0.7,11.3
1281,7.1,0.46,0.2,1.9,0.077,28.0,54.0,0.9956,3.37,0.64,10.4
1118,7.1,0.39,0.12,2.1,0.065,14.0,24.0,0.99252,3.3,0.53,13.3


We have simply print the shapes of the train and test data here

In [173]:
print(f'xtrain shape {xtrain.shape}')
print(f'ytrain shape {xtrain.shape}')
print(f'xtest shape {xtest.shape}')
print(f'ytest shape {ytest.shape}')

xtrain shape (1119, 11)
ytrain shape (1119, 11)
xtest shape (480, 11)
ytest shape (480,)


We will apply Min Max Scaler to transform our data in the range 0 to 1

In [174]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
xtrain_transform=scaler.fit_transform(xtrain)
xtest_transform=scaler.fit_transform(xtest)
xtrain_transform[0:10]

array([[0.31858407, 0.82876712, 0.        , 0.06153846, 0.11538462,
        0.02816901, 0.02205882, 0.48394495, 0.62204724, 0.09580838,
        0.38461538],
       [0.23893805, 0.25342466, 0.1       , 0.13076923, 0.09364548,
        0.04225352, 0.02941176, 0.42431193, 0.44094488, 0.08383234,
        0.32307692],
       [0.36283186, 0.49315068, 0.        , 0.03846154, 0.08862876,
        0.32394366, 0.09926471, 0.40749235, 0.41732283, 0.13173653,
        0.2       ],
       [0.34513274, 0.31849315, 0.18      , 0.09230769, 0.11036789,
        0.05633803, 0.08823529, 0.50688073, 0.36220472, 0.08982036,
        0.21538462],
       [0.40707965, 0.30136986, 0.18      , 0.05384615, 0.11036789,
        0.12676056, 0.05514706, 0.43501529, 0.32283465, 0.09580838,
        0.23076923],
       [0.32743363, 0.13013699, 0.39      , 0.11538462, 0.11036789,
        0.22535211, 0.13602941, 0.33409786, 0.4488189 , 0.26347305,
        0.63076923],
       [0.30088496, 0.32191781, 0.05      , 0.08461538, 0.

In [175]:
xtest_transform[0:10]

array([[0.35849057, 0.29145729, 0.81012658, 0.0890411 , 0.09440559,
        0.09230769, 0.12720848, 0.62628488, 0.76136364, 0.1835443 ,
        0.33962264],
       [0.3490566 , 0.51256281, 0.35443038, 0.12328767, 0.09965035,
        0.21538462, 0.22261484, 0.53377386, 0.44318182, 0.16455696,
        0.28301887],
       [0.50943396, 0.22110553, 0.73417722, 0.19178082, 0.23601399,
        0.04615385, 0.03533569, 0.50881057, 0.36363636, 0.20886076,
        0.49056604],
       [0.19811321, 0.34170854, 0.25316456, 0.06849315, 0.06643357,
        0.38461538, 0.16961131, 0.40602056, 0.56818182, 0.17088608,
        0.32075472],
       [0.19811321, 0.27135678, 0.15189873, 0.08219178, 0.04545455,
        0.16923077, 0.06360424, 0.17988253, 0.48863636, 0.10126582,
        0.86792453],
       [0.18867925, 0.10050251, 0.37974684, 0.06164384, 0.04545455,
        0.2       , 0.04946996, 0.48825257, 0.84090909, 0.28481013,
        0.24528302],
       [0.35849057, 0.15075377, 0.58227848, 0.08219178, 0.

Here, we have applied the MLP Classifier
with hidden layers, activation function, maximum iterations and optimizer

In [176]:
from sklearn.neural_network import MLPClassifier
model=MLPClassifier(hidden_layer_sizes=(150,100,45),activation='relu',max_iter=300,solver='adam')

In [177]:
model.fit(xtrain_transform,ytrain)



MLPClassifier(hidden_layer_sizes=(150, 100, 45), max_iter=300)

In [178]:
ypred=model.predict(xtest_transform)

In [179]:
from sklearn.metrics import accuracy_score,confusion_matrix
accuracy=accuracy_score(ytest,ypred)
print("accuracy is:",accuracy)

accuracy is: 0.8791666666666667


In [180]:
cm=confusion_matrix(ytest,ypred)
cm

array([[412,  13],
       [ 45,  10]])

In [181]:
from sklearn.metrics import classification_report
cr=classification_report(ytest,ypred)
print(cr)

              precision    recall  f1-score   support

           0       0.90      0.97      0.93       425
           1       0.43      0.18      0.26        55

    accuracy                           0.88       480
   macro avg       0.67      0.58      0.60       480
weighted avg       0.85      0.88      0.86       480

