# Least squares

In [1]:
import numpy as np
import scipy
from sklearn.metrics import mean_squared_error
from sklearn import datasets
from sklearn.linear_model import LinearRegression as LR

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

data = datasets.load_diabetes()
X_train, y_train = data.data, data.target

In [2]:
import pandas as pd

In [3]:
def fit_inverse(X, y):
    """Direct method using the inverse"""
    # YOUR CODE HERE
    w = (((np.linalg.inv((np.transpose(X)).dot(X))).dot(np.transpose(X)))).dot(y)
    
    return w

In [4]:
w = fit_inverse(X_train, y_train)

sk_model = LR(fit_intercept=False)
sk_model.fit(X_train, y_train)

error = rel_error(sk_model.coef_, w)
assert error <= 1e-13
print("prediction error: ", error)

prediction error:  3.776169742623381e-14


In [5]:
def fit_cholesky(X, y):
    """ Cholesky approach """
    # YOUR CODE HERE
    L = np.linalg.cholesky(np.transpose(X).dot(X))
    
    u = (np.transpose(X)).dot(y)
    z1 = scipy.linalg.solve_triangular(L, u, lower=True)
    w = scipy.linalg.solve_triangular(np.transpose(L), z1)
    
    return w

In [6]:
w = fit_cholesky(X_train, y_train)

sk_model = LR(fit_intercept=False)
sk_model.fit(X_train, y_train)

error = rel_error(sk_model.coef_, w)
assert error <= 1e-13
print("prediction error: ", error)

prediction error:  6.821014246451709e-14


In [7]:
def fit_qr(X, y):
    """ QR approach"""
    # YOUR CODE HERE
    (Q, R) = np.linalg.qr(X)
    
    z1 = (np.transpose(Q)).dot(y)
    w = scipy.linalg.solve_triangular(R, z1, lower=False)
    
    return w

In [8]:
w = fit_qr(X_train, y_train)

sk_model = LR(fit_intercept=False)
sk_model.fit(X_train, y_train)

error = rel_error(sk_model.coef_, w)
assert error <= 1e-13
print("prediction error: ", error)

prediction error:  8.516125015254192e-15


In [9]:
def fit_svd(X, y):
    """ SVD approach"""
    # YOUR CODE HERE
    (U, s, V) = np.linalg.svd(X)
    z1 = np.transpose(U).dot(y)
    
    S = np.zeros((len(s), len(U)))
    np.fill_diagonal(S, 1/s)
    
    z2 = S.dot(z1)
    w = np.transpose(V).dot(z2)
    
    return w

In [10]:
w = fit_svd(X_train, y_train)

sk_model = LR(fit_intercept=False)
sk_model.fit(X_train, y_train)

error = rel_error(sk_model.coef_, w)
assert error <= 1e-13
print("prediction error: ", error)

prediction error:  1.8008056021839425e-14


## Combine everything in a class

In [11]:
class LinearRegression():
    def __init__(self, fit_intercept=True, method="inverse"):
        self.w = 0
        self.fit_intercept = fit_intercept # bias
        self.method = method
    
    def fit(self, X, y):
        # YOUR CODE HERE
        if self.fit_intercept:
            tmp = np.ones((len(X),1))
            X = np.append(tmp, X, axis = 1)
                
        if self.method == "cholesky":
            self.w = fit_cholesky(X, y)
        elif self.method == "qr":
            self.w = fit_qr(X, y)
        elif self.method == "svd":
            self.w = fit_svd(X, y)
        # self.method == "inverse"
        else:
            self.w = fit_inverse(X, y)
            
            
    def predict(self, X):
        # YOUR CODE HERE
        if self.fit_intercept:
            tmp = np.ones((len(X),1))
            X = np.append(tmp, X, axis = 1)
            
        y = X.dot(self.w)
        
        return y

### without the bias term

In [12]:
# DIRECT INVERSE APPROACH
sk_model = LR(fit_intercept=False)
sk_model.fit(X_train, y_train)
sk_pred = sk_model.predict(X_train)

model = LinearRegression(fit_intercept=False)
model.fit(X_train, y_train)
pred = model.predict(X_train)

error = rel_error(pred, sk_pred)
assert error <= 1e-11
print("prediction error: ", error)

prediction error:  1.2016286059583595e-12


In [13]:
# OTHER APPROACHES
sk_model = LR(fit_intercept=False)
sk_model.fit(X_train, y_train)
sk_pred = sk_model.predict(X_train)

model_cholesky = LinearRegression(fit_intercept=False, method="cholesky")
model_cholesky.fit(X_train, y_train)
pred_cholesky = model_cholesky.predict(X_train)

error_cholesky = rel_error(pred_cholesky, sk_pred)
assert error_cholesky <= 1e-11
print("prediction error cholesky: ", error_cholesky)

model_qr = LinearRegression(fit_intercept=False, method="qr")
model_qr.fit(X_train, y_train)
pred_qr = model_qr.predict(X_train)

error_qr = rel_error(pred_qr, sk_pred)
assert error_qr <= 1e-11
print("prediction error qr: ", error_qr)

model_svd = LinearRegression(fit_intercept=False, method="svd")
model_svd.fit(X_train, y_train)
pred_svd = model_cholesky.predict(X_train)

error_cholesky = rel_error(pred_svd, sk_pred)
assert error_cholesky <= 1e-11
print("prediction error svd: ", error_cholesky)

prediction error cholesky:  2.640941991114797e-13
prediction error qr:  1.2823499602304161e-13
prediction error svd:  2.640941991114797e-13


### with the bias term

In [14]:
# DIRECT INVERSE APPROACH
sk_model = LR(fit_intercept=True)
sk_model.fit(X_train, y_train)
sk_pred = sk_model.predict(X_train)

model = LinearRegression(fit_intercept=True)
model.fit(X_train, y_train)
pred = model.predict(X_train)

error = rel_error(pred, sk_pred)
assert error <= 1e-14
print("prediction error: ", error)

prediction error:  5.051737745143608e-15


In [15]:
# OTHER APPROACHES
sk_model = LR(fit_intercept=True)
sk_model.fit(X_train, y_train)
sk_pred = sk_model.predict(X_train)

model_cholesky = LinearRegression(fit_intercept=True, method="cholesky")
model_cholesky.fit(X_train, y_train)
pred_cholesky = model_cholesky.predict(X_train)

error_cholesky = rel_error(pred_cholesky, sk_pred)
assert error_cholesky <= 1e-11
print("prediction error cholesky: ", error_cholesky)

model_qr = LinearRegression(fit_intercept=True, method="qr")
model_qr.fit(X_train, y_train)
pred_qr = model_qr.predict(X_train)

error_qr = rel_error(pred_qr, sk_pred)
assert error_qr <= 1e-11
print("prediction error qr: ", error_qr)

model_svd = LinearRegression(fit_intercept=True, method="svd")
model_svd.fit(X_train, y_train)
pred_svd = model_cholesky.predict(X_train)

error_cholesky = rel_error(pred_svd, sk_pred)
assert error_cholesky <= 1e-11
print("prediction error svd: ", error_cholesky)

prediction error cholesky:  7.274265542175575e-15
prediction error qr:  1.678676663578964e-15
prediction error svd:  7.274265542175575e-15


Compare the running time of the different approaches on a large dataset

In [16]:
from datetime import datetime

In [17]:
data_url = "./data.csv"
train_dset = pd.read_csv(data_url, header=0, sep=',')
train_dset.head(len(train_dset))

Unnamed: 0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,...,f276,f277,f278,f279,f280,f281,f282,f283,f284,target
0,0.2440,0.4690,0.32370,0.3787,0.4243,0.3362,0.5090,0.6333,0.4167,0.2544,...,1,0,0,0,0,0,1,0,0,1
1,0.2021,0.4385,0.17700,0.2214,0.4412,0.4524,0.4600,0.6655,0.4294,0.2263,...,0,0,0,0,0,0,0,0,0,1
2,0.1842,0.4695,0.02025,0.2455,0.5890,0.4610,0.4880,0.7120,0.5550,0.2220,...,0,0,0,0,0,0,1,1,0,0
3,0.5460,0.4717,0.01458,0.3400,0.4036,0.3862,0.3958,0.8115,0.4448,0.1494,...,0,0,0,1,0,0,0,0,0,0
4,0.1544,0.3667,0.25340,0.2969,0.5967,0.3667,0.5938,0.6420,0.5700,0.1913,...,1,1,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23471,0.2563,0.3262,0.34400,0.2766,0.5030,0.4434,0.4868,0.6130,0.4180,0.2627,...,0,1,1,0,0,0,0,0,0,1
23472,0.2023,0.5070,0.09674,0.2980,0.4966,0.4250,0.4170,0.5034,0.6084,0.2950,...,0,0,0,0,0,1,0,1,0,1
23473,0.2062,0.5464,0.08750,0.2607,0.5723,0.4490,0.4370,0.6533,0.6430,0.3140,...,0,0,0,0,0,1,0,1,0,0
23474,0.1799,0.5030,0.01859,0.3293,0.5320,0.4348,0.4275,0.6470,0.3640,0.1912,...,0,0,0,0,0,0,0,0,1,1


In [18]:
# inputs
X_train = train_dset.iloc[:len(train_dset), :285].to_numpy()

# labels
y_train = train_dset.iloc[:len(train_dset), 285:286].to_numpy().T[0]

### without the bias term

In [19]:
# DIRECT INVERSE APPROACH

### sklearn 
start_t0 = datetime.now()

sk_model = LR(fit_intercept=False)
sk_model.fit(X_train, y_train)
sk_pred = sk_model.predict(X_train)

end_t0 = datetime.now()

### implemented class
start_t1 = datetime.now()

model_inverse = LinearRegression(fit_intercept=False)
model_inverse.fit(X_train, y_train)
pred = model_inverse.predict(X_train)

end_t1 = datetime.now()

print('Method (sklearn): {}'.format(end_t0 - start_t0))
print('Inverse Methot (LinearRegression): {}'.format(end_t1 - start_t1))

Method (sklearn): 0:00:00.825154
Inverse Methot (LinearRegression): 0:00:00.347915


In [20]:
# CHOLESKY APPROACH

### implemented class
start_t = datetime.now()

model_cholesky = LinearRegression(fit_intercept=False, method="cholesky")
model_cholesky.fit(X_train, y_train)
pred_cholesky = model_cholesky.predict(X_train)

end_t = datetime.now()

print('Inverse Methot (LinearRegression): {}'.format(end_t - start_t))

Inverse Methot (LinearRegression): 0:00:00.239004


In [21]:
# QR APPROACH

### implemented class
start_t = datetime.now()

model_qr = LinearRegression(fit_intercept=False, method="qr")
model_qr.fit(X_train, y_train)
pred_qr = model_qr.predict(X_train)

end_t = datetime.now()

print('Inverse Methot (LinearRegression): {}'.format(end_t - start_t))

Inverse Methot (LinearRegression): 0:00:00.597008


In [22]:
# SVD APPROACH

### implemented class
start_t = datetime.now()

model_svd = LinearRegression(fit_intercept=False, method="svd")
model_svd.fit(X_train, y_train)
pred_svd = model_svd.predict(X_train)

end_t = datetime.now()
print('Inverse Methot (LinearRegression): {}'.format(end_t - start_t))

Inverse Methot (LinearRegression): 0:01:07.516996


### with the bias term

In [23]:
# DIRECT INVERSE APPROACH

### implemented class
start_t1 = datetime.now()

model_inverse = LinearRegression(fit_intercept=True)
model_inverse.fit(X_train, y_train)
pred = model_inverse.predict(X_train)

end_t1 = datetime.now()

print('Inverse Methot (LinearRegression): {}'.format(end_t1 - start_t1))

Inverse Methot (LinearRegression): 0:00:01.418997


In [24]:
# CHOLESKY APPROACH

### implemented class
start_t = datetime.now()

model_cholesky = LinearRegression(fit_intercept=True, method="cholesky")
model_cholesky.fit(X_train, y_train)
pred_cholesky = model_cholesky.predict(X_train)

end_t = datetime.now()

print('Inverse Methot (LinearRegression): {}'.format(end_t - start_t))

Inverse Methot (LinearRegression): 0:00:00.683996


In [25]:
# QR APPROACH

### implemented class
start_t = datetime.now()

model_qr = LinearRegression(fit_intercept=True, method="qr")
model_qr.fit(X_train, y_train)
pred_qr = model_qr.predict(X_train)

end_t = datetime.now()

print('Inverse Methot (LinearRegression): {}'.format(end_t - start_t))

Inverse Methot (LinearRegression): 0:00:01.536992


In [26]:
# SVD APPROACH

### implemented class
start_t = datetime.now()

model_svd = LinearRegression(fit_intercept=True, method="svd")
model_svd.fit(X_train, y_train)
pred_svd = model_svd.predict(X_train)

end_t = datetime.now()
print('SVD Method (LinearRegression): {}'.format(end_t - start_t))

SVD Method (LinearRegression): 0:00:59.336920
