## 練習時間
試著使用 sklearn datasets 的其他資料集 (boston, ...)，來訓練自己的線性迴歸模型，並加上適當的正則話來觀察訓練情形。

In [1]:
import numpy as np
import matplotlib.pyplot as plot
from sklearn import datasets, linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
def fnLinearRegression(x_train, x_test, y_train, y_test):
    linear = linear_model.LinearRegression()
    linear.fit(x_train, y_train)
    y_pred = linear.predict(x_test)
    return (mean_squared_error(y_test, y_pred))

def fnRegression(x_train, x_test, y_train, y_test, model, alpha):
    if (model == "Lasso"):
        Regression = linear_model.Lasso(alpha)
    elif (model == "Ridge"):
        Regression = linear_model.Ridge(alpha)
    else:
        print("Invail Value")
        return
    Regression.fit(x_train, y_train)
    y_pred = Regression.predict(x_test)
    return (mean_squared_error(y_test, y_pred))

In [3]:
boston = datasets.load_boston()

b_x_train, b_x_test, b_y_train, b_y_test = train_test_split(boston.data, boston.target, test_size=0.1, random_state=4)

In [4]:
Lasso_errors = []
Ridge_errors = []

print("Case:Boston\n")

lin_error = fnLinearRegression(b_x_train, b_x_test, b_y_train, b_y_test)
print("LinearRegression Mean Squared error: %.3f\n" % lin_error)

for alpha in np.arange(0, 10, 0.1):
    Lasso_errors.append(fnRegression(b_x_train, b_x_test, b_y_train, b_y_test, model="Lasso", alpha=0.1))
    Ridge_errors.append(fnRegression(b_x_train, b_x_test, b_y_train, b_y_test, model="Ridge", alpha=0.1))

print(f'Min Mean Squared error by using Lasso: {min(Lasso_errors)}')
print(f'Min Mean Squared error by using Ridge: {min(Ridge_errors)}')

Case:Boston

LinearRegression Mean Squared error: 17.032

Min Mean Squared error by using Lasso: 18.18040716055948
Min Mean Squared error by using Ridge: 17.058985804204816


In [5]:
wine = datasets.load_wine()

w_x_train, w_x_test, w_y_train, w_y_test = train_test_split(wine.data, wine.target, test_size=0.2, random_state=4)

In [6]:
Lasso_errors = []
Ridge_errors = []

print("Case:Wine\n")

lin_error = fnLinearRegression(w_x_train, w_x_test, w_y_train, w_y_test)
print("LinearRegression Mean Squared error: %.3f\n" % lin_error)

for alpha in np.arange(0, 10, 0.1):
    Lasso_errors.append(fnRegression(w_x_train, w_x_test, w_y_train, w_y_test, model="Lasso", alpha=0.1))
    Ridge_errors.append(fnRegression(w_x_train, w_x_test, w_y_train, w_y_test, model="Ridge", alpha=0.1))

print(f'Min Mean Squared error by using Lasso: {min(Lasso_errors)}')
print(f'Min Mean Squared error by using Ridge: {min(Ridge_errors)}')

Case:Wine

LinearRegression Mean Squared error: 0.067

Min Mean Squared error by using Lasso: 0.10175180481909947
Min Mean Squared error by using Ridge: 0.06709604906317543


In [7]:
linnerud = datasets.load_linnerud()

l_x_train, l_x_test, l_y_train, l_y_test = train_test_split(linnerud.data, linnerud.target, test_size=0.2, random_state=4)

In [8]:
Lasso_errors = []
Ridge_errors = []

print("Case:Linnerud\n")

lin_error = fnLinearRegression(l_x_train, l_x_test, l_y_train, l_y_test)
print("LinearRegression Mean Squared error: %.3f\n" % lin_error)

for alpha in np.arange(0, 10, 0.1):
    Lasso_errors.append(fnRegression(l_x_train, l_x_test, l_y_train, l_y_test, model="Lasso", alpha=50))
    Ridge_errors.append(fnRegression(l_x_train, l_x_test, l_y_train, l_y_test, model="Ridge", alpha=50))


fnLinearRegression(l_x_train, l_x_test, l_y_train, l_y_test)
fnRegression(l_x_train, l_x_test, l_y_train, l_y_test, model="Lasso", alpha=50)
fnRegression(l_x_train, l_x_test, l_y_train, l_y_test, model="Ridge", alpha=50)

print(f'Min Mean Squared error by using Lasso: {min(Lasso_errors)}')
print(f'Min Mean Squared error by using Ridge: {min(Ridge_errors)}')


Case:Linnerud

LinearRegression Mean Squared error: 386.152

Min Mean Squared error by using Lasso: 349.62875522033823
Min Mean Squared error by using Ridge: 377.53996796861276


# Conclusionn
* 三組對照 Datasets:1. Boston house-prices, 2. Wine, and 3. Linnerud.
* 使用alpha 為 0 - 10，間距為 0.1 進行測試，根據測試結果，只有使用 Linnerud 的 Datastes 得到比較好的結果，對照 Datastes 的說明，Boston house-prices 與 Wine 均為簡單的 regression 問題，而 Linnerud 為 multivariate regression 問題，因此驗證了一昧使用 Lasso/Ridge 並不會使結果變得更好，要事先分析是否為 overfitting/multivariate 的情況。