**Types of scaling**:

* MinMaxScaler - scales all features to $[a, b]$ range

* StandardScaler - removes mean and divides by variance of all features. $X^{new}_i = \frac{X_i - \mu}{\sigma}$, where $\mu $is for mean and $\sigma$ is for variance

* RobustScaler - same as StandardScaler but removes median and divides by IQR


In [7]:
import pandas as pd
import numpy as np

In [8]:
df = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv",sep = ';')

In [10]:
y = df.pop('quality')

In [11]:
for i in df.columns:
    df[i] = df[i].fillna(np.mean(df[i]))

In [12]:
from sklearn.preprocessing import StandardScaler,MinMaxScaler,RobustScaler
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [13]:
np.random.seed(42)
train,test,y_train,y_test = train_test_split(df,y,test_size = 0.1)

In [14]:
def fit_predict(train,test,y_train,y_test,scaler = None):
    if scaler is None:
        lr = Ridge()
        lr.fit(train,y_train)
        y_pred = lr.predict(test)
        print('MSE score:', mean_squared_error(y_test,y_pred))
    else:
        train_scaled = scaler.fit_transform(train)
        test_scaled = scaler.transform(test)
        lr = Ridge()
        lr.fit(train_scaled,y_train)
        y_pred = lr.predict(test_scaled)
        print('MSE score:', mean_squared_error(y_test,y_pred))

In [15]:
fit_predict(train,test,y_train,y_test)

MSE score: 0.57404414001


In [16]:
fit_predict(train,test,y_train,y_test,MinMaxScaler())

MSE score: 0.567545067343


In [17]:
fit_predict(train,test,y_train,y_test,StandardScaler())

MSE score: 0.558144966334


In [18]:
fit_predict(train,test,y_train,y_test,RobustScaler())

MSE score: 0.55823299573
