# Linear regression

In [82]:
import numpy as np
import scipy
from sklearn.metrics import mean_squared_error
from sklearn import datasets
from sklearn.linear_model import LinearRegression as LR
import cvxpy as cp
from sklearn.linear_model import Ridge, Lasso

def rel_error(x, y):
    """ returns relative error """
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

def generate_data(N=100, d=20, sigma=5):
    """ Data for Ridge """
    np.random.seed(1)
    w_star = np.random.randn(d)
    X = np.random.randn(N, d)
    y = X.dot(w_star) + np.random.normal(0, sigma, size=N)
    return X, y

def generate_data_lasso(N=100, d=20, sigma=5, density=0.2):
    """ Data for Lasso """
    np.random.seed(1)
    w_star = np.random.randn(d)
    idxs = np.random.choice(range(d), int((1-density)*d), replace=False)
    for idx in idxs:
        w_star[idx] = 0
    X = np.random.randn(N,d)
    y = X.dot(w_star) + np.random.normal(0, sigma, size=N)
    return X, y

def sigmoid(z):
    return 1/(1 + np.exp(-z))

def generate_data_log_reg(N=50, d=50):
    np.random.seed(1)
    w_star = np.array([1, 0.5, -0.5] + [0]*(d - 3))
    X = (np.random.random((N, d)) - 0.5)*10
    y = np.round(sigmoid(X @ w_star + np.random.randn(N)*0.5))
    return X, y

data = datasets.load_diabetes()
X_train, y_train = data.data, data.target
X_train2, y_train2 = generate_data()
X_train3, y_train3 = generate_data_lasso()
X_train4, y_train4 = generate_data_log_reg()

### Formulas:
The loss function is given by $$ RSS(w) = \dfrac{1}{2} \sum_{i=1}^{N}(y_{i} - f(x_{i}))^{2}$$
But minimize this function is equivalent to minimize $$ Loss(w) = \sum_{i=1}^{N}(y_{i} - f(x_{i}))^{2}$$

In [83]:
#Conde inspired by https://www.cvxpy.org/examples/basic/least_squares.html
class LinearRegression():
    def __init__(self, fit_intercept=True):
        self.w = 0
        self.fit_intercept = fit_intercept # bias
    
    def fit(self, X, y):
        # YOUR CODE HERE
        if self.fit_intercept == True:
            one = np.array([np.ones(X.shape[0])])
            X = np.concatenate((X, one.T), axis=1)
            
        self.w = cp.Variable(X.shape[1])
        cost = cp.sum_squares(y - X@self.w)
        prob = 0.5*cp.Problem(cp.Minimize(cost))
        prob.solve()
        self.w = self.w.value
        
    def predict(self, X):
        # YOUR CODE HERE
        if self.fit_intercept == True:
            one = np.array([np.ones(X.shape[0])])
            X = np.concatenate((X, one.T), axis=1)
        y_predict = X@self.w
        return y_predict

In [84]:
# Without bias
sk_model = LR(fit_intercept=False)
sk_model.fit(X_train, y_train)
sk_pred = sk_model.predict(X_train)

model = LinearRegression(fit_intercept=False)
model.fit(X_train, y_train)
pred = model.predict(X_train)

error = rel_error(pred, sk_pred)
assert error <= 1e-11
print("prediction error: ", error)

prediction error:  4.986916512007281e-14


In [85]:
# With bias
sk_model = LR(fit_intercept=True)
sk_model.fit(X_train, y_train)
sk_pred = sk_model.predict(X_train)

model = LinearRegression(fit_intercept=True)
model.fit(X_train, y_train)
pred = model.predict(X_train)

error = rel_error(pred, sk_pred)
assert error <= 1e-11
print("prediction error: ", error)

prediction error:  1.7719364782222394e-15


### Formulas:
The loss function is given by $$ J(w) = RSS(w) + \lambda \left\|w\right\|_{2}^{2}= \dfrac{1}{2} \sum_{i=1}^{N}(y_{i} - f(x_{i}))^{2} + \lambda \left\|w\right\|_{2}^{2}$$

# Ridge regression

In [86]:
class RidgeRegression():
    def __init__(self, fit_intercept=True, alpha=1.0):
        self.w = 0
        self.fit_intercept = fit_intercept # bias
        self.alpha = alpha
        self.b = 0 
        
    def fit(self, X, y):
        # YOUR CODE HERE
        self.w = cp.Variable(X.shape[1])
        if self.fit_intercept == True:
            self.b = cp.Variable(1)
            cost = cp.norm(y - (X@self.w + self.b), 2)**2 + self.alpha*cp.norm(self.w, 2)**2            
            problem = cp.Problem(cp.Minimize(cost))
            problem.solve()
            self.w = self.w.value
            self.b = self.b.value
            self.w = np.concatenate([self.b, self.w])
        else:
            cost = cp.norm(y - X@self.w, 2)**2 + self.alpha*cp.norm(self.w, 2)**2            
            problem = cp.Problem(cp.Minimize(cost))
            problem.solve()
            self.w = self.w.value

        
    def predict(self, X):
        # YOUR CODE HERE
        if self.fit_intercept == True:
            return X@self.w + self.b
        else:
            return X@self.w

In [87]:
# without bias
model = RidgeRegression(fit_intercept=False, alpha=0.1)
model.fit(X_train2, y_train2)

w_solution = [2.2741331962708733,-1.4638470967067754,-1.0248494680125682,-2.0920403465511344,0.19793283915844787,-1.5186692704860287,1.4772054728555917,-0.5873242037184364,0.9478891631775056,0.20512816292505345,1.251288772139991,-2.681990788073989,0.04476204682607866,-0.8659943546608414,0.6794151132231774,-0.45806886087608134,-0.11772977214105436,-1.5167038016358336,-0.7285498050097046,1.1970655855063765]
error = rel_error(model.w, w_solution)
print("prediction error: ", error)

prediction error:  1.2513225312639476e-10




In [88]:
# without bias
model = RidgeRegression(fit_intercept=False, alpha=0.1)
model.fit(X_train2, y_train2)

w_solution = [2.2741331962708733,-1.4638470967067754,-1.0248494680125682,-2.0920403465511344,0.19793283915844787,-1.5186692704860287,1.4772054728555917,-0.5873242037184364,0.9478891631775056,0.20512816292505345,1.251288772139991,-2.681990788073989,0.04476204682607866,-0.8659943546608414,0.6794151132231774,-0.45806886087608134,-0.11772977214105436,-1.5167038016358336,-0.7285498050097046,1.1970655855063765]
error = rel_error(model.w, w_solution)
assert error <= 1e-11
print("prediction error: ", error)

AssertionError: 

In [89]:
# with bias
model = RidgeRegression(fit_intercept=True, alpha=0.1)
model.fit(X_train2, y_train2)

w_solution = [-0.12421153467148652, 2.2885621086080183, -1.460016084362311, -1.0386230518778734, -2.0755554006911163, 0.16722384639912463, -1.5196366460908797, 1.490644600189988, -0.5506589908428944, 0.953560073286487, 0.20519345577354192, 1.2565834667864626, -2.6559028064874886, 0.05943949693736531, -0.8413627640000328, 0.689138089040695, -0.4717409588520616, -0.11380803855096185, -1.5157445906226719, -0.7155151711254747, 1.2094429722709097]
error = rel_error(model.w, w_solution)
print("prediction error: ", error)

prediction error:  2.0885510538440786e-07


In [90]:
# with bias
model = RidgeRegression(fit_intercept=True, alpha=0.1)
model.fit(X_train2, y_train2)

w_solution = [-0.12421153467148652, 2.2885621086080183, -1.460016084362311, -1.0386230518778734, -2.0755554006911163, 0.16722384639912463, -1.5196366460908797, 1.490644600189988, -0.5506589908428944, 0.953560073286487, 0.20519345577354192, 1.2565834667864626, -2.6559028064874886, 0.05943949693736531, -0.8413627640000328, 0.689138089040695, -0.4717409588520616, -0.11380803855096185, -1.5157445906226719, -0.7155151711254747, 1.2094429722709097]
error = rel_error(model.w, w_solution)
assert error <= 1e-11
print("prediction error: ", error)

AssertionError: 

# Lasso

In [91]:
class LassoRegression():
    def __init__(self, fit_intercept=True, alpha=1.0):
        self.w = 0
        self.fit_intercept = fit_intercept # bias
        self.alpha = alpha
        self.b = 0 
        
    def fit(self, X, y):
        # YOUR CODE HERE
        self.w = cp.Variable(X.shape[1])
        if self.fit_intercept == True:
            self.b = cp.Variable(1)
            cost = cp.norm(y - (X@self.w + self.b), 2)**2 + self.alpha*cp.norm(self.w, 1)        
            problem = cp.Problem(cp.Minimize(cost))
            problem.solve()
            self.w = self.w.value
            self.b = self.b.value
            self.w = np.concatenate([self.b, self.w])
        else:
            cost = cp.norm(y - X@self.w, 2)**2 + self.alpha*cp.norm(self.w, 1)           
            problem = cp.Problem(cp.Minimize(cost))
            problem.solve()
            self.w = self.w.value

        
    def predict(self, X):
        # YOUR CODE HERE
        if self.fit_intercept == True:
            return X@self.w + self.b
        else:
            return X@self.w

In [92]:
# without bias
model = LassoRegression(fit_intercept=False, alpha=0.1)
model.fit(X_train3, y_train3)

w_solution = [-0.8521262649671281, 0.0254836890059677, 0.7137682249492029, -0.8784736131759308, 0.26239208158878835, 0.6462086038227195, 0.6430994649127592, -0.6427109854827273, 0.8457229064959301, -0.3402411535357167, 0.33481565380057277, -2.2109314288098636, 0.22685158332884042, -0.9969899386988903, -0.486582184431374, -0.0654138939227482, 0.5269406964201837, -1.2991221762643268, -0.1472881184306273, -0.749580456217885]
error = rel_error(model.w, w_solution)
print("prediction error: ", error)

prediction error:  0.00015692226001567274


In [93]:
# without bias
model = LassoRegression(fit_intercept=False, alpha=0.1)
model.fit(X_train3, y_train3)

w_solution = [-0.8521262649671281, 0.0254836890059677, 0.7137682249492029, -0.8784736131759308, 0.26239208158878835, 0.6462086038227195, 0.6430994649127592, -0.6427109854827273, 0.8457229064959301, -0.3402411535357167, 0.33481565380057277, -2.2109314288098636, 0.22685158332884042, -0.9969899386988903, -0.486582184431374, -0.0654138939227482, 0.5269406964201837, -1.2991221762643268, -0.1472881184306273, -0.749580456217885]
error = rel_error(model.w, w_solution)
assert error <= 1e-11
print("prediction error: ", error)

AssertionError: 

In [94]:
# with bias
model = LassoRegression(fit_intercept=True, alpha=0.1)
model.fit(X_train3, y_train3)

w_solution = [0.02773625477624174, -0.8487201678165991, 0.025083418700813768, 0.7190663030638081, -0.8749520697657432, 0.26342660512442967, 0.6431043993589891, 0.6494385810352119, -0.6408605760028508, 0.8380835575881868, -0.34441398162201164, 0.3295390950817965, -2.2106797474729007, 0.22549557495145922, -0.9948431209736727, -0.48738779274910293, -0.06222135539355891, 0.52148327011433, -1.3019085858141572, -0.15286852250292499, -0.7487483354438857]
error = rel_error(model.w, w_solution)
print("prediction error: ", error)

prediction error:  5.709358101785286e-09


In [95]:
# with bias
model = LassoRegression(fit_intercept=True, alpha=0.1)
model.fit(X_train3, y_train3)

w_solution = [0.02773625477624174, -0.8487201678165991, 0.025083418700813768, 0.7190663030638081, -0.8749520697657432, 0.26342660512442967, 0.6431043993589891, 0.6494385810352119, -0.6408605760028508, 0.8380835575881868, -0.34441398162201164, 0.3295390950817965, -2.2106797474729007, 0.22549557495145922, -0.9948431209736727, -0.48738779274910293, -0.06222135539355891, 0.52148327011433, -1.3019085858141572, -0.15286852250292499, -0.7487483354438857]
error = rel_error(model.w, w_solution)
assert error <= 1e-11
print("prediction error: ", error)

AssertionError: 