In [8]:
from sklearn.datasets import load_boston
import numpy as np
import matplotlib.pyplot as plt
import warnings
from sklearn.model_selection import KFold
from sklearn.preprocessing import PolynomialFeatures

## Avoid printing out warnings
with warnings.catch_warnings():
     warnings.filterwarnings("ignore")
     X, y = load_boston(return_X_y=True)


### Linear Regression With Closed-Form


In [9]:

## Avoid printing out warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    X, y = load_boston(return_X_y=True)


# X has 506 datapoint with 13 features each

ones = np.ones((X.shape[0],1))
X = np.concatenate((ones,X), axis=1)
k = 10
kf = KFold(n_splits=k)

mse_train, mse_test = 0,0
for train,test in kf.split(X):
    x = X[train]
    x_t = np.transpose(x)
    y_train = y[train]
    thetas = np.matmul(np.matmul((np.linalg.inv(np.matmul(x_t,x))),x_t),y_train)

    mse_train += np.sqrt(np.average([(np.matmul(thetas,X[i])-y[i])**2 for i in train]))
    mse_test += np.sqrt(np.average([(np.matmul(thetas,X[i])-y[i])**2 for i in test]))

print("Test set error: ", mse_train/k)
print("Train set error: ", mse_test/k)
print("Average error: ", (mse_test+mse_train)/(2*k))


Test set error:  4.609066917948425
Train set error:  5.180845679340211
Average error:  4.894956298644319


### Ridge Regression With Linear - Finding and Applying Best Lambda (alpha)

In [10]:
## Avoid printing out warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    X, y = load_boston(return_X_y=True)


# X has 506 datapoint with 13 features each

ones = np.ones((X.shape[0],1))
X = np.concatenate((ones,X), axis=1)

k = 10
kf = KFold(n_splits=k)

Lambdas = np.logspace(1, 7, num=13)
I = np.identity(X.shape[1])
I[0,0] = 0

#stores [highest_score, best_alpha]
best = [10000,0]
for alpha in Lambdas:
    mse_test = 0
    for train,test in kf.split(X):
        x = X[train]
        x_t = np.transpose(x)
        y_train = y[train]
        thetas = np.matmul(np.matmul((np.linalg.inv(np.matmul(x_t,x)+(alpha*I))),x_t),y_train)

        mse_test += np.sqrt(np.average([(np.matmul(thetas,X[i])-y[i])**2 for i in test]))/k
    if mse_test < best[0]:
        best[0] = mse_test
        best[1] = alpha


alpha = best[1]
mse_train, mse_test = 0,0
for train,test in kf.split(X):
    x = X[train]
    x_t = np.transpose(x)
    y_train = y[train]
    thetas = np.matmul(np.matmul((np.linalg.inv(np.matmul(x_t,x)+(alpha*I))),x_t),y_train)

    mse_test += np.sqrt(np.average([(np.matmul(thetas,X[i])-y[i])**2 for i in test]))/k
    mse_train += np.sqrt(np.average([(np.matmul(thetas,X[i])-y[i])**2 for i in train]))/k

print("best alpha is: ", best[1])
print("average error with best alpha: ", (mse_train+mse_test)/2)
print("Test set error with best alpha: ",mse_test)
print("Train set error with best alpha: ", mse_train)




best alpha is:  31.622776601683793
average error with best alpha:  4.8855840644939885
Test set error with best alpha:  5.0279365263728835
Train set error with best alpha:  4.743231602615094


### Ridge Regression With Polynomial - Finding and Applying Best Lambda(alpha)

In [11]:

## Avoid printing out warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    X, y = load_boston(return_X_y=True)


# X has 506 datapoint with 13 features each

#ones = np.ones((X.shape[0],1))
#X = np.concatenate((ones,X), axis=1)
X = PolynomialFeatures(degree=2).fit_transform(X)

k = 10
kf = KFold(n_splits=k)

Lambdas = np.logspace(1, 7, num=13)
I = np.identity(X.shape[1])
I[0,0] = 0

#stores [highest_score, best_alpha]
best = [10000,0]
for alpha in Lambdas:
    mse_test = 0
    for train,test in kf.split(X):
        x = X[train]
        x_t = np.transpose(x)
        y_train = y[train]
        thetas = np.matmul(np.matmul((np.linalg.inv(np.matmul(x_t,x)+(alpha*I))),x_t),y_train)

        mse_test += np.sqrt(np.average([(np.matmul(thetas,X[i])-y[i])**2 for i in test]))/k
    if mse_test < best[0]:
        best[0] = mse_test
        best[1] = alpha
    
    
alpha = best[1]
mse_train, mse_test = 0,0
for train,test in kf.split(X):
    x = X[train]
    x_t = np.transpose(x)
    y_train = y[train]
    thetas = np.matmul(np.matmul((np.linalg.inv(np.matmul(x_t,x)+(alpha*I))),x_t),y_train)

    mse_test += np.sqrt(np.average([(np.matmul(thetas,X[i])-y[i])**2 for i in test]))/k
    mse_train += np.sqrt(np.average([(np.matmul(thetas,X[i])-y[i])**2 for i in train]))/k

print("best alpha is: ", best[1])
print("average error with best alpha: ", (mse_train+mse_test)/2)
print("Test set error with best alpha: " , mse_test)
print("Train set error with best alpha: ",mse_train)




best alpha is:  3162277.6601683795
average error with best alpha:  4.286747352235054
Test set error with best alpha:  4.676965803221149
Train set error with best alpha:  3.8965289012489572


### Gradient Descent 

In [12]:
# Avoid printing out warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    X, y = load_boston(return_X_y=True)


# X has 506 datapoint with 13 features each
ones = np.ones((X.shape[0],1))
X = np.concatenate((ones,X), axis=1)

thetas = np.random.random((X.shape[1],))
lr = 0.000001
k = 10
kf = KFold(n_splits=k)
epochs = 10000
mse_train, mse_test = 0,0
print(np.shape(X))
for train,test in kf.split(X):
    thetas = np.random.random((X.shape[1],))
    x = X[train]
    x_t = np.transpose(x)
    y_train = y[train]
    for i in range(epochs):
        grad = 2/len(x) * np.matmul(np.transpose(x), np.matmul(x,thetas)-y_train)
        thetas = thetas - (lr * grad)

    mse_train += np.sqrt(np.average([(np.matmul(thetas,X[i])-y[i])**2 for i in train]))
    mse_test += np.sqrt(np.average([(np.matmul(thetas,X[i])-y[i])**2 for i in test]))

print("Test set error: ", mse_train/k)
print("Train set error: ", mse_test/k)
print(np.round(thetas,3))



(506, 14)
Test set error:  8.004835415084644
Train set error:  8.76630711837296
[ 0.01   0.017  0.115  0.484  0.441  0.471  0.865  0.018  0.745  0.233
 -0.034  0.657  0.023 -0.195]


### Gradiant Descent with LASSO

In [13]:
# Avoid printing out warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    X, y = load_boston(return_X_y=True)

    
# X has 506 datapoint with 13 features each
ones = np.ones((X.shape[0],1))
X = np.concatenate((ones,X), axis=1)

I = np.identity(X.shape[1])
I[0,0] = 0
X = np.matmul(X,I)

thetas = np.random.random((X.shape[1],))
lr = 0.000001
k = 10
kf = KFold(n_splits=k)
epochs = 10000
mse_train, mse_test = 0,0
alpha = 1

for train,test in kf.split(X):
    thetas = np.random.random((X.shape[1],))
    x = X[train]
    x_t = np.transpose(x)
    y_train = y[train]
    for i in range(epochs):
        grad = (2/len(x) * np.matmul(np.transpose(x), np.matmul(x,thetas)-y_train)) + (alpha * np.sign(thetas))
        thetas = thetas - (lr * grad)

    mse_train += np.sqrt(np.average([(np.matmul(thetas,X[i])-y[i])**2 for i in train]))
    mse_test += np.sqrt(np.average([(np.matmul(thetas,X[i])-y[i])**2 for i in test]))
print("Test set error: ", mse_train/k)
print("Train set error: ", mse_test/k)
print(np.round(thetas,3))




Test set error:  8.245167297157625
Train set error:  9.203670681916112
[ 0.938  0.203  0.138  0.266  0.463  0.325  0.845  0.033  0.066  0.429
 -0.043  0.751  0.034 -0.289]


### Gradient Descent with Elastic Net


In [14]:
# Avoid printing out warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    X, y = load_boston(return_X_y=True)

# X has 506 datapoint with 13 features each
ones = np.ones((X.shape[0],1))
X = np.concatenate((ones,X), axis=1)

I = np.identity(X.shape[1],)
I[0,0] = 0
X = np.matmul(X,I)

thetas = np.random.random((X.shape[1],))
lr = 0.000001
k = 10
kf = KFold(n_splits=k)
epochs = 10000
mse_train, mse_test = 0,0
alpha_lasso = 0.7
alpha_ridge = 1-alpha_lasso

for train,test in kf.split(X):
    thetas = np.random.random((X.shape[1],))
    x = X[train]
    x_t = np.transpose(x)
    y_train = y[train]
    for i in range(epochs):
        grad = (2/len(x) * np.matmul(np.transpose(x), np.matmul(x,thetas)-y_train)) + (alpha_lasso * np.sign(thetas)) + (alpha_ridge * 2 * thetas) 
        thetas = thetas - (lr * grad)

    mse_train += np.sqrt(np.average([(np.matmul(thetas,X[i])-y[i])**2 for i in train]))
    mse_test += np.sqrt(np.average([(np.matmul(thetas,X[i])-y[i])**2 for i in test]))
print("Test set error: ", mse_train/k)
print("Train set error: ", mse_test/k)
print(np.round(thetas,3))




Test set error:  7.858785943156794
Train set error:  8.721560022256124
[ 0.042 -0.098  0.132  0.232  0.855  0.054  0.602  0.049  0.255  0.351
 -0.019  0.017  0.047 -0.192]
