### 4

In [1]:
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
import numpy as np

In [7]:
df = pd.read_csv("wine+quality/winequality-red.csv", sep=';')
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
X_train, y_train = X[:1400], y[:1400]
X_test, y_test = X[1400:], y[1400:]

# Standardize the features (x-mean(x))/std(x)
scaler = StandardScaler()
# Use X_train to get the mean and standard deviation
X_train_std = scaler.fit_transform(X_train)
# Use the mean and standard deviation from X_train to standardize X_test
# as having X_train and X_test with same standardization terms makes sense.
X_test_std = scaler.transform(X_test)

def addBiasToSamples(data):
    return np.hstack([data, np.ones((data.shape[0], 1))])
    
X_train_std = addBiasToSamples(X_train_std)
X_test_std = addBiasToSamples(X_test_std)

lambdas = [0, 10**-1, 10**-2, 10**-3]

In [3]:
def trainModel(regularization_coefficient = 0):
    model = Ridge(alpha = regularization_coefficient, fit_intercept=False)
    model.fit(X_train_std, y_train)
    return model

In [4]:
for i in lambdas:
    model = trainModel(regularization_coefficient = i)
    y_pred = model.predict(X_test_std)
    mse = np.square(np.subtract(y_test, y_pred)).mean()
    print("MSE for predictions using lambda =", i, "is", mse)

MSE for predictions using lambda = 0 is 0.4868691102549516
MSE for predictions using lambda = 0.1 is 0.48679037671598435
MSE for predictions using lambda = 0.01 is 0.48686122087563066
MSE for predictions using lambda = 0.001 is 0.4868683211567339


In [5]:
print(y_pred)

[4.96968956 4.96968956 6.48022124 6.24348    5.63512459 6.47637574
 6.46109026 6.02312003 6.89340766 6.02312003 5.46661258 5.78955934
 6.46109026 5.65754314 5.81636801 5.49213338 5.81636801 6.4858995
 5.45928528 4.99971898 5.45928528 5.42065771 6.13049355 5.73302682
 5.80205261 5.80205261 6.46573438 5.98007449 5.73193801 6.55401253
 5.75838624 5.43865946 6.38829602 5.80489126 5.12262846 5.12262846
 5.03487309 5.29849859 5.52115807 5.64123401 6.24890783 4.88163005
 5.49439433 5.9155035  5.6504892  4.89088525 5.49439433 5.39623184
 5.4979915  6.27302043 6.24890783 6.13299239 6.13077909 5.09326975
 5.97816429 5.46479226 5.57130752 5.09326975 5.94152713 6.67692155
 5.78388268 5.25063352 5.52565753 5.58974503 5.44456581 5.44456581
 5.60922918 5.20959451 5.60922918 4.92164916 5.3607052  6.07610532
 6.12068586 5.65823825 5.11819883 6.66163544 5.11819883 6.67089063
 5.14817026 5.92353078 5.41030272 5.92353078 5.43703254 6.03564813
 5.4251022  5.26327806 5.45832632 5.73961589 5.86960525 5.91860

In [6]:
y_test

array([5, 5, 6, 8, 6, 7, 6, 6, 7, 6, 6, 6, 6, 5, 5, 5, 5, 7, 5, 5, 5, 5,
       6, 4, 6, 6, 6, 5, 5, 5, 5, 6, 6, 7, 6, 6, 5, 5, 5, 6, 7, 6, 5, 5,
       6, 6, 5, 5, 5, 8, 7, 7, 7, 5, 6, 6, 6, 5, 5, 7, 6, 4, 6, 6, 5, 5,
       7, 4, 7, 3, 5, 5, 6, 5, 5, 7, 5, 7, 3, 5, 4, 5, 4, 5, 4, 5, 5, 5,
       5, 6, 6, 5, 5, 5, 7, 6, 5, 6, 6, 6, 5, 5, 5, 6, 6, 3, 6, 6, 6, 5,
       6, 5, 6, 6, 6, 6, 5, 6, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 6, 6, 6, 5,
       6, 5, 7, 6, 6, 6, 5, 5, 6, 7, 6, 6, 7, 6, 5, 5, 5, 8, 5, 5, 6, 5,
       6, 7, 5, 6, 5, 5, 5, 5, 5, 5, 5, 6, 6, 5, 5, 6, 6, 6, 5, 6, 6, 6,
       6, 6, 6, 5, 6, 5, 5, 5, 7, 6, 6, 6, 6, 5, 6, 6, 6, 6, 5, 6, 6, 5,
       6])