# Ridge regression

In [1]:
import numpy as np
import scipy
from sklearn.metrics import mean_squared_error
from sklearn import datasets
from sklearn.linear_model import Ridge 

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

data = datasets.load_diabetes()
X_train, y_train = data.data, data.target

In [2]:
import math

In [3]:
def fit_inverse(X, y, alpha=1.0, fit_intercept=False):
    """Direct method using the inverse"""
    I_d = np.eye(X.shape[1]) 
    if fit_intercept == True:
        one = np.array([np.ones(X.shape[0])])
        X = np.concatenate((X, one.T), axis=1)
        I_d = np.eye(X.shape[1]) 
        I_d[X.shape[1]-1][X.shape[1]-1] = 0
        
    weights = np.linalg.inv(X.T@X + alpha*I_d)@X.T@y
    return weights

In [4]:
w = fit_inverse(X_train, y_train, alpha=0.1)

sk_model = Ridge(fit_intercept=False, alpha=0.1)
sk_model.fit(X_train, y_train)

error = rel_error(sk_model.coef_, w)
assert error <= 1e-11
print("prediction error: ", error)

prediction error:  1.2296662980529577e-13


In [6]:
def fit_qr_linear_regression(X, y):
    """ QR approach for linear regression from previous assignement"""
    b = np.dot(X.T, y)
    A = np.dot(X.T, X)
    Q, R = np.linalg.qr(A)
    Q_inv = Q.T
    x1 = np.dot(Q_inv, b)
    weight = scipy.linalg.solve_triangular(R, x1)
    return weight

def fit_qr(X, y, alpha=1.0, fit_intercept=False):
    """QR approach"""
    I_d = np.eye(X.shape[1]) 
    if fit_intercept == True:
        one = np.array([np.ones(X.shape[0])])
        X = np.concatenate((X, one.T), axis=1)
        I_d = np.eye(X.shape[1]) 
        I_d[X.shape[1]-1][X.shape[1]-1] = 0
        
    X_tilde = np.concatenate((X, math.sqrt(alpha) * I_d), axis=0)
    zeros = np.zeros((X.shape[1]))
    y_tilde = np.concatenate((y, zeros), axis=0)
    print(X_tilde.shape)
    print(y_tilde.shape)
    weight = fit_qr_linear_regression(X_tilde, y_tilde)
    return weight

In [7]:
w = fit_qr(X_train, y_train, alpha=0.1)

sk_model = Ridge(fit_intercept=False, alpha=0.1)
sk_model.fit(X_train, y_train)

error = rel_error(sk_model.coef_, w)
#assert error <= 1e-11
print("prediction error: ", error)

(452, 10)
(452,)
prediction error:  1.0334971579632515e-13


In [28]:
def fit_svd(X, y, alpha=1.0, fit_intercept=False):
    """SVD approach"""
    I_d = np.eye(X.shape[1]) 
    if fit_intercept == True:
        one = np.array([np.ones(X.shape[0])])
        X = np.concatenate((X, one.T), axis=1)
        I_d = np.eye(X.shape[1]) 
        I_d[X.shape[1]-1][X.shape[1]-1] = 0
        
    X_tilde = np.concatenate((X, math.sqrt(alpha) * I_d), axis=0)
    zeros = np.zeros((X.shape[1]))
    y_tilde = np.concatenate((y, zeros), axis=0)
    
    U, Sigma_diag, V_t = np.linalg.svd(X_tilde, full_matrices=False)
    Sigma_diag_inv = 1/Sigma_diag
    Sigma_mat_inv = np.diag(Sigma_diag_inv)
    weight = V_t.T@Sigma_mat_inv@U.T@y_tilde
    print(weight.shape)
    return weight

In [29]:
w = fit_svd(X_train, y_train, alpha=0.1)

sk_model = Ridge(fit_intercept=False, alpha=0.1)
sk_model.fit(X_train, y_train)

error = rel_error(sk_model.coef_, w)
#assert error <= 1e-11
print("prediction error: ", error)

(10,)
prediction error:  4.415928694665666e-14


# Everything in a class

In [9]:
class RidgeRegression():
    def __init__(self, fit_intercept=True, method="inverse", alpha=1.0):
        self.w = 0
        self.fit_intercept = fit_intercept # bias
        self.method = method
        self.alpha = alpha
    
    def fit(self, X, y):
        I_d = np.eye(X.shape[1]) 
        if self.fit_intercept == True:
            one = np.array([np.ones(X.shape[0])])
            X = np.concatenate((X, one.T), axis=1)
            I_d = np.eye(X.shape[1]) 
            I_d[X.shape[1]-1][X.shape[1]-1] = 0
            
        if self.method == "inverse":
            weights = np.linalg.inv(X.T@X + self.alpha*I_d)@X.T@y
            
        elif self.method == "qr":
            X_tilde = np.concatenate((X, math.sqrt(self.alpha) * I_d), axis=0)
            zeros = np.zeros((X.shape[1]))
            y_tilde = np.concatenate((y, zeros), axis=0)
            weights = self.fit_qr_linear_regression(X_tilde, y_tilde)
            
        elif self.method == "svd":
            X_tilde = np.concatenate((X, math.sqrt(self.alpha) * I_d), axis=0)
            zeros = np.zeros((X.shape[1]))
            y_tilde = np.concatenate((y, zeros), axis=0)

            U, Sigma_diag, V_t = np.linalg.svd(X_tilde, full_matrices=False)
            Sigma_diag_inv = 1/Sigma_diag
            Sigma_mat_inv = np.diag(Sigma_diag_inv)
            weights = V_t.T@Sigma_mat_inv@U.T@y_tilde
            
        self.w = weights
    
    def fit_qr_linear_regression(self, X, y):
        """ QR approach for linear regression from previous assignement"""
        b = np.dot(X.T, y)
        A = np.dot(X.T, X)
        Q, R = np.linalg.qr(A)
        Q_inv = Q.T
        x1 = np.dot(Q_inv, b)
        weight = scipy.linalg.solve_triangular(R, x1)
        return weight
    
    def predict(self, X):
        if self.fit_intercept == True:
            one = np.array([np.ones(X.shape[0])])
            X = np.concatenate((X, one.T), axis=1)
        y_predict = np.dot(X, self.w) 
        return y_predict

## without bias

In [10]:
# OTHER APPROACHES
sk_model = Ridge(fit_intercept=False, alpha=0.1)
sk_model.fit(X_train, y_train)
sk_pred = sk_model.predict(X_train)

model = RidgeRegression(fit_intercept=False, method="inverse", alpha=0.1)
model.fit(X_train, y_train)
pred = model.predict(X_train)

error = rel_error(pred, sk_pred)
print(error)
assert error <= 1e-11
print("prediction error inverse: ", error)

model_qr = RidgeRegression(fit_intercept=False, method="qr", alpha=0.1)
model_qr.fit(X_train, y_train)
pred_qr = model_qr.predict(X_train)

error_qr = rel_error(pred_qr, sk_pred)
assert error_qr <= 1e-11
print("prediction error qr: ", error_qr)

model_svd = RidgeRegression(fit_intercept=False, method="svd", alpha=0.1)
model_svd.fit(X_train, y_train)
pred_svd = model_svd.predict(X_train)

error_svd = rel_error(pred_svd, sk_pred)
assert error_svd <= 1e-11
print("prediction error svd: ", error_svd)

9.592829399440487e-14
prediction error inverse:  9.592829399440487e-14
prediction error qr:  1.0828073868059346e-13
prediction error svd:  6.661687082945866e-14


In [11]:
# OTHER APPROACHES
sk_model = Ridge(fit_intercept=False, alpha=0.1)
sk_model.fit(X_train, y_train)
sk_pred = sk_model.predict(X_train)

model = RidgeRegression(fit_intercept=False, method="inverse", alpha=0.1)
model.fit(X_train, y_train)
pred = model.predict(X_train)

error = rel_error(pred, sk_pred)
assert error <= 1e-11
print("prediction error inverse: ", error)

model_qr = RidgeRegression(fit_intercept=False, method="qr", alpha=0.1)
model_qr.fit(X_train, y_train)
pred_qr = model_qr.predict(X_train)

error_qr = rel_error(pred_qr, sk_pred)
assert error_qr <= 1e-11
print("prediction error qr: ", error_qr)

model_svd = RidgeRegression(fit_intercept=False, method="svd", alpha=0.1)
model_svd.fit(X_train, y_train)
pred_svd = model_svd.predict(X_train)

error_svd = rel_error(pred_svd, sk_pred)
assert error_svd <= 1e-11
print("prediction error svd: ", error_svd)

prediction error inverse:  9.592829399440487e-14
prediction error qr:  1.0828073868059346e-13
prediction error svd:  6.661687082945866e-14


## with bias

In [12]:
# OTHER APPROACHES
sk_model = Ridge(fit_intercept=True, alpha=0.1)
sk_model.fit(X_train, y_train)
sk_pred = sk_model.predict(X_train)

model = RidgeRegression(fit_intercept=True, method="inverse", alpha=0.1)
model.fit(X_train, y_train)
pred = model.predict(X_train)

error = rel_error(pred, sk_pred)
assert error <= 1e-11
print("prediction error inverse: ", error)

model_qr = RidgeRegression(fit_intercept=True, method="qr", alpha=0.1)
model_qr.fit(X_train, y_train)
pred_qr = model_qr.predict(X_train)

error_qr = rel_error(pred_qr, sk_pred)
#assert error_qr <= 1e-11
print("prediction error qr: ", error_qr)

model_svd = RidgeRegression(fit_intercept=True, method="svd", alpha=0.1)
model_svd.fit(X_train, y_train)
pred_svd = model_svd.predict(X_train)

error_svd = rel_error(pred_svd, sk_pred)
assert error_svd <= 1e-11
print("prediction error svd: ", error_svd)

prediction error inverse:  5.939142232445097e-16
prediction error qr:  8.711170560454582e-16
prediction error svd:  3.0992106746641406e-14
