# Import necessary libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


# Separate features and target

In [63]:
dataset = pd.read_csv('winequality-red.csv')
X = dataset.iloc[:, 1:-1]
y = dataset.iloc[:, -1]

# Split the dataset into an 80-20 training-test set

In [64]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [65]:
X_train

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
493,8.7,0.690,0.31,3.0,0.086,23.0,81.0,1.00020,3.48,0.74,11.6
354,6.1,0.210,0.40,1.4,0.066,40.5,165.0,0.99120,3.25,0.59,11.9
342,10.9,0.390,0.47,1.8,0.118,6.0,14.0,0.99820,3.30,0.75,9.8
834,8.8,0.685,0.26,1.6,0.088,16.0,23.0,0.99694,3.32,0.47,9.4
705,8.4,1.035,0.15,6.0,0.073,11.0,54.0,0.99900,3.37,0.49,9.9
...,...,...,...,...,...,...,...,...,...,...,...
1130,9.1,0.600,0.00,1.9,0.058,5.0,10.0,0.99770,3.18,0.63,10.4
1294,8.2,0.635,0.10,2.1,0.073,25.0,60.0,0.99638,3.29,0.75,10.9
860,7.2,0.620,0.06,2.7,0.077,15.0,85.0,0.99746,3.51,0.54,9.5
1459,7.9,0.200,0.35,1.7,0.054,7.0,15.0,0.99458,3.32,0.80,11.9


In [66]:
X_test

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
803,7.7,0.560,0.08,2.50,0.114,14.0,46.0,0.99710,3.24,0.66,9.6
124,7.8,0.500,0.17,1.60,0.082,21.0,102.0,0.99600,3.39,0.48,9.5
350,10.7,0.670,0.22,2.70,0.107,17.0,34.0,1.00040,3.28,0.98,9.9
682,8.5,0.460,0.31,2.25,0.078,32.0,58.0,0.99800,3.33,0.54,9.8
1326,6.7,0.460,0.24,1.70,0.077,18.0,34.0,0.99480,3.39,0.60,10.6
...,...,...,...,...,...,...,...,...,...,...,...
1259,6.8,0.640,0.00,2.70,0.123,15.0,33.0,0.99538,3.44,0.63,11.3
1295,6.6,0.630,0.00,4.30,0.093,51.0,77.5,0.99558,3.20,0.45,9.5
1155,8.3,0.600,0.25,2.20,0.118,9.0,38.0,0.99616,3.15,0.53,9.8
963,8.8,0.270,0.39,2.00,0.100,20.0,27.0,0.99546,3.15,0.69,11.2




# Create an instance of the StandardScaler class


In [67]:
sc = StandardScaler()

In [68]:
features = [feature for feature in X_train.columns if (np.abs(X_train[feature])>4).any() == True]
features.remove('pH')
print(features)

['fixed acidity', 'residual sugar', 'free sulfur dioxide', 'total sulfur dioxide', 'alcohol']




# Fit the StandardScaler on the features from the training set and transform it




In [69]:
X_train[features] = sc.fit_transform(X_train[features])

In [70]:
X_train

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
493,0.218332,0.690,0.31,0.309726,0.086,0.691007,1.042934,1.00020,3.48,0.74,1.123177
354,-1.290166,0.210,0.40,-0.805080,0.066,2.388473,3.593870,0.99120,3.25,0.59,1.408272
342,1.494753,0.390,0.47,-0.526378,0.118,-0.957960,-0.991742,0.99820,3.30,0.75,-0.587390
834,0.276351,0.685,0.26,-0.665729,0.088,0.012020,-0.718427,0.99694,3.32,0.47,-0.967516
705,0.044274,1.035,0.15,2.399985,0.073,-0.472970,0.222990,0.99900,3.37,0.49,-0.492358
...,...,...,...,...,...,...,...,...,...,...,...
1130,0.450408,0.600,0.00,-0.456703,0.058,-1.054958,-1.113215,0.99770,3.18,0.63,-0.017201
1294,-0.071764,0.635,0.10,-0.317352,0.073,0.885003,0.405199,0.99638,3.29,0.75,0.457957
860,-0.651956,0.620,0.06,0.100700,0.077,-0.084978,1.164407,0.99746,3.51,0.54,-0.872484
1459,-0.245822,0.200,0.35,-0.596054,0.054,-0.860962,-0.961374,0.99458,3.32,0.80,1.408272


# Apply the transform to the test set




In [51]:
features

['fixed acidity',
 'residual sugar',
 'free sulfur dioxide',
 'total sulfur dioxide',
 'pH',
 'alcohol']

In [71]:
X_test[features] = sc.transform(X_test[features])

In [72]:
X_test

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
803,-0.361860,0.560,0.08,-0.038651,0.114,-0.181976,-0.019957,0.99710,3.24,0.66,-0.777453
124,-0.303841,0.500,0.17,-0.665729,0.082,0.497011,1.680668,0.99600,3.39,0.48,-0.872484
350,1.378715,0.670,0.22,0.100700,0.107,0.109019,-0.384376,1.00040,3.28,0.98,-0.492358
682,0.102293,0.460,0.31,-0.212839,0.078,1.563989,0.344463,0.99800,3.33,0.54,-0.587390
1326,-0.942051,0.460,0.24,-0.596054,0.077,0.206017,-0.384376,0.99480,3.39,0.60,0.172862
...,...,...,...,...,...,...,...,...,...,...,...
1259,-0.884032,0.640,0.00,0.100700,0.123,-0.084978,-0.414744,0.99538,3.44,0.63,0.838083
1295,-1.000070,0.630,0.00,1.215505,0.093,3.406953,0.936645,0.99558,3.20,0.45,-0.872484
1155,-0.013745,0.600,0.25,-0.247677,0.118,-0.666966,-0.262903,0.99616,3.15,0.53,-0.587390
963,0.276351,0.270,0.39,-0.387028,0.100,0.400013,-0.596954,0.99546,3.15,0.69,0.743051


# Print the scaled training and test datasets

In [73]:
X_train

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
493,0.218332,0.690,0.31,0.309726,0.086,0.691007,1.042934,1.00020,3.48,0.74,1.123177
354,-1.290166,0.210,0.40,-0.805080,0.066,2.388473,3.593870,0.99120,3.25,0.59,1.408272
342,1.494753,0.390,0.47,-0.526378,0.118,-0.957960,-0.991742,0.99820,3.30,0.75,-0.587390
834,0.276351,0.685,0.26,-0.665729,0.088,0.012020,-0.718427,0.99694,3.32,0.47,-0.967516
705,0.044274,1.035,0.15,2.399985,0.073,-0.472970,0.222990,0.99900,3.37,0.49,-0.492358
...,...,...,...,...,...,...,...,...,...,...,...
1130,0.450408,0.600,0.00,-0.456703,0.058,-1.054958,-1.113215,0.99770,3.18,0.63,-0.017201
1294,-0.071764,0.635,0.10,-0.317352,0.073,0.885003,0.405199,0.99638,3.29,0.75,0.457957
860,-0.651956,0.620,0.06,0.100700,0.077,-0.084978,1.164407,0.99746,3.51,0.54,-0.872484
1459,-0.245822,0.200,0.35,-0.596054,0.054,-0.860962,-0.961374,0.99458,3.32,0.80,1.408272


In [74]:
X_test

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
803,-0.361860,0.560,0.08,-0.038651,0.114,-0.181976,-0.019957,0.99710,3.24,0.66,-0.777453
124,-0.303841,0.500,0.17,-0.665729,0.082,0.497011,1.680668,0.99600,3.39,0.48,-0.872484
350,1.378715,0.670,0.22,0.100700,0.107,0.109019,-0.384376,1.00040,3.28,0.98,-0.492358
682,0.102293,0.460,0.31,-0.212839,0.078,1.563989,0.344463,0.99800,3.33,0.54,-0.587390
1326,-0.942051,0.460,0.24,-0.596054,0.077,0.206017,-0.384376,0.99480,3.39,0.60,0.172862
...,...,...,...,...,...,...,...,...,...,...,...
1259,-0.884032,0.640,0.00,0.100700,0.123,-0.084978,-0.414744,0.99538,3.44,0.63,0.838083
1295,-1.000070,0.630,0.00,1.215505,0.093,3.406953,0.936645,0.99558,3.20,0.45,-0.872484
1155,-0.013745,0.600,0.25,-0.247677,0.118,-0.666966,-0.262903,0.99616,3.15,0.53,-0.587390
963,0.276351,0.270,0.39,-0.387028,0.100,0.400013,-0.596954,0.99546,3.15,0.69,0.743051
